Subject: | parse_html_string() sets the document URL unefficiently for libxml2 >= 2.6.27 |
The following code in _parse_html_string() in LibXML.xs
Show quoted text
--- CODE START ---
#if LIBXML_VERSION >= 20627
if (recover) options |= HTML_PARSE_RECOVER;
real_doc = htmlReadDoc((xmlChar*)ptr, URL, encoding, options);
#else
real_doc = htmlParseDoc((xmlChar*)ptr, encoding);
#endif
if ( real_doc != NULL ) {
if (real_doc->URL) xmlFree((xmlChar *)real_doc->URL);
if (URL) {
real_doc->URL = xmlStrdup((const xmlChar*) URL);
} else {
SV * newURI = sv_2mortal(newSVpvf("unknown-%12.12d",
(void*)real_doc));
real_doc->URL = xmlStrdup((const
xmlChar*)SvPV_nolen(newURI));
}
--- CODE END ---
should be better written as
--- CODE START ---
#if LIBXML_VERSION >= 20627
if (recover) options |= HTML_PARSE_RECOVER;
real_doc = htmlReadDoc((xmlChar*)ptr, URL, encoding, options);
#else
real_doc = htmlParseDoc((xmlChar*)ptr, encoding);
if ( real_doc != NULL ) {
if (real_doc->URL) xmlFree((xmlChar *)real_doc->URL);
if (URL != NULL) {
real_doc->URL = xmlStrdup((const xmlChar*) URL);
}
}
#endif
if ( URL == NULL && real_doc ) {
SV * newURI = sv_2mortal(newSVpvf("unknown-%12.12d",
(void*)real_doc));
real_doc->URL = xmlStrdup((const xmlChar*)SvPV_nolen(newURI));
}
--- CODE END ---
This reflects that newer libxml2 versions actually set the correct URI.
Therefore, only the case when no URI has been provided has to be handled
in both settings.
Christian