Skip Menu |

This queue is for tickets about the XML-LibXML CPAN distribution.

Report information
The Basics
Id: 69166
Status: resolved
Priority: 0/
Queue: XML-LibXML

People
Owner: Nobody in particular
Requestors: SAMSK [...] cpan.org
Cc:
AdminCc:

Bug Information
Severity: Wishlist
Broken in: (no value)
Fixed in: (no value)



I have a simple patch adding function XML::LibXML::externalEntityLoader for setting a global entity loader handler for whole app. Unlike existing functionality (ext_ent_handler parser option), this entity loader handler works also for libXSLT processing (in import,include and document()) and others libraries using xml*ExternalEntityLoader() functions for processing external entities.
Subject: perl-XML LibXML+global_entity_loader-0.2.diff
diff -ur XML-LibXML-1.75/LibXML.pm XML-LibXML-1.75-new/LibXML.pm --- XML-LibXML-1.75/LibXML.pm 2011-06-24 18:56:47.000000000 +0200 +++ XML-LibXML-1.75-new/LibXML.pm 2011-06-28 21:47:58.000000000 +0200 @@ -443,6 +443,12 @@ # callback functions # #-------------------------------------------------------------------------# +use Data::Dumper; +sub externalEntityLoader(&) +{ + return _externalEntityLoader($_[0]); +} + sub input_callbacks { my $self = shift; my $icbclass = shift; diff -ur XML-LibXML-1.75/LibXML.xs XML-LibXML-1.75-new/LibXML.xs --- XML-LibXML-1.75/LibXML.xs 2011-06-24 18:38:46.000000000 +0200 +++ XML-LibXML-1.75-new/LibXML.xs 2011-06-28 21:47:58.000000000 +0200 @@ -146,6 +146,9 @@ /* this should keep the default */ static xmlExternalEntityLoader LibXML_old_ext_ent_loader = NULL; +/* global external entity loader */ +SV *EXTERNAL_ENTITY_LOADER_FUNC = (SV *)NULL; + SV* PROXY_NODE_REGISTRY_MUTEX = NULL; /* **************************************************************** @@ -778,8 +781,7 @@ const char * ID, xmlParserCtxtPtr ctxt) { - SV * self; - HV * real_obj; + SV ** func; int count; SV * results; @@ -787,7 +789,8 @@ const char * results_pv; xmlParserInputBufferPtr input_buf; - if (ctxt->_private == NULL) { + if (ctxt->_private == NULL + && EXTERNAL_ENTITY_LOADER_FUNC == NULL) { return xmlNewInputFromFile(ctxt, URL); } @@ -798,11 +801,22 @@ ID = ""; } - self = (SV *)ctxt->_private; - real_obj = (HV *)SvRV(self); - func = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + /* fetch entity loader function */ + if(EXTERNAL_ENTITY_LOADER_FUNC != NULL) + { + func = &EXTERNAL_ENTITY_LOADER_FUNC; + } + else + { + SV * self; + HV * real_obj; + + self = (SV *)ctxt->_private; + real_obj = (HV *)SvRV(self); + func = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + } - if (func != NULL && SvTRUE(*func)) { + if (func != NULL && SvTRUE(*func)) { dTHX; dSP; @@ -899,18 +913,21 @@ if (ctxt) ctxt->linenumbers = 0; } - item = hv_fetch(real_obj, "ext_ent_handler", 15, 0); - if ( item != NULL && SvTRUE(*item)) { - LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); - xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_load_external_entity ); - } - else { - if (parserOptions & XML_PARSE_NONET) { - LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); - xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); - } - /* LibXML_old_ext_ent_loader = NULL; */ - } + if(LibXML_old_ext_ent_loader == NULL) + { + item = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + if (item != NULL && SvTRUE(*item)) { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_load_external_entity ); + } + else { + if (parserOptions & XML_PARSE_NONET) { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + /* LibXML_old_ext_ent_loader = NULL; */ + } + } } return real_obj; @@ -921,7 +938,8 @@ #ifndef WITH_SERRORS xmlGetWarningsDefaultValue = 0; #endif - if (LibXML_old_ext_ent_loader != NULL ) { + if (EXTERNAL_ENTITY_LOADER_FUNC == NULL + && LibXML_old_ext_ent_loader != NULL) { xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_old_ext_ent_loader ); } } @@ -2642,6 +2660,27 @@ OUTPUT: RETVAL +SV* +_externalEntityLoader( loader ) + SV* loader + CODE: + { + RETVAL = EXTERNAL_ENTITY_LOADER_FUNC; + if(EXTERNAL_ENTITY_LOADER_FUNC == NULL) + EXTERNAL_ENTITY_LOADER_FUNC = newSVsv(loader); +/* + else + SvSetSv(EXTERNAL_ENTITY_LOADER_FUNC, loader); +*/ + + if (LibXML_old_ext_ent_loader == NULL ) { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader((xmlExternalEntityLoader)LibXML_load_external_entity); + } + } + OUTPUT: + RETVAL + MODULE = XML::LibXML PACKAGE = XML::LibXML::HashTable xmlHashTablePtr @@ -9401,4 +9440,3 @@ #endif OUTPUT: RETVAL - diff -ur XML-LibXML-1.75/lib/XML/LibXML/Parser.pod XML-LibXML-1.75-new/lib/XML/LibXML/Parser.pod --- XML-LibXML-1.75/lib/XML/LibXML/Parser.pod 2011-06-24 18:57:22.000000000 +0200 +++ XML-LibXML-1.75-new/lib/XML/LibXML/Parser.pod 2011-06-28 22:15:57.000000000 +0200 @@ -71,6 +71,11 @@ $parser->load_catalog( $catalog_file ); + # Global external entity loader (similar to ext_ent_handler option + # but this works really globally, also in XML::LibXSLT include etc..) + + XML::LibXML::externalEntityLoader(&my_loader); + =head1 PARSING A XML document is read into a data structure such as a DOM tree by a piece of
Hi SAMSK, On Tue Jun 28 16:25:05 2011, SAMSK wrote: Show quoted text
> I have a simple patch adding function XML::LibXML::externalEntityLoader > for setting a global entity loader handler for whole app. > > Unlike existing functionality (ext_ent_handler parser option), this > entity loader handler works also for libXSLT processing (in > import,include and document()) and others libraries using > xml*ExternalEntityLoader() functions for processing external entities.
Thanks for your patch, but it has several issues: 1. It does not contain a test script in t/*.t (should be written using Test::More and with Test::Count annotations). 2. It violates some of the style guidelines in the HACKING.txt file (which admittedly I just added): https://bitbucket.org/shlomif/perl-xml-libxml/src/f51ee01afca7/HACKING.txt * Tabs instead of spaces for indentation. * Single-statement conditional/loop clauses. ------------ Please correct these issues and then we can proceed. Regards, -- Shlomi Fish
Hi, i've added test 49 and changed formating to conform new guidelines. Patch attached - hope its better now ;) Sam On Wed Jun 29 06:47:40 2011, SHLOMIF wrote: Show quoted text
> > Thanks for your patch, but it has several issues: > > 1. It does not contain a test script in t/*.t (should be written using > Test::More and with Test::Count annotations). > > 2. It violates some of the style guidelines in the HACKING.txt file > (which admittedly I just added): > > https://bitbucket.org/shlomif/perl-xml-libxml/src/f51ee01afca7/HACKING.txt > > * Tabs instead of spaces for indentation. > > * Single-statement conditional/loop clauses. > > ------------ > > Please correct these issues and then we can proceed. > > Regards, > > -- Shlomi Fish
Subject: perl-XML__LibXML+global_entity_loader-0.3.diff
diff -urN XML-LibXML-1.75/LibXML.pm XML-LibXML-1.75-new/LibXML.pm --- XML-LibXML-1.75/LibXML.pm 2011-06-24 18:56:47.000000000 +0200 +++ XML-LibXML-1.75-new/LibXML.pm 2011-06-29 14:42:27.000000000 +0200 @@ -443,6 +443,12 @@ # callback functions # #-------------------------------------------------------------------------# +use Data::Dumper; +sub externalEntityLoader(&) +{ + return _externalEntityLoader($_[0]); +} + sub input_callbacks { my $self = shift; my $icbclass = shift; diff -urN XML-LibXML-1.75/LibXML.xs XML-LibXML-1.75-new/LibXML.xs --- XML-LibXML-1.75/LibXML.xs 2011-06-24 18:38:46.000000000 +0200 +++ XML-LibXML-1.75-new/LibXML.xs 2011-06-29 15:05:55.000000000 +0200 @@ -146,6 +146,9 @@ /* this should keep the default */ static xmlExternalEntityLoader LibXML_old_ext_ent_loader = NULL; +/* global external entity loader */ +SV *EXTERNAL_ENTITY_LOADER_FUNC = (SV *)NULL; + SV* PROXY_NODE_REGISTRY_MUTEX = NULL; /* **************************************************************** @@ -778,8 +781,7 @@ const char * ID, xmlParserCtxtPtr ctxt) { - SV * self; - HV * real_obj; + SV ** func; int count; SV * results; @@ -787,7 +789,8 @@ const char * results_pv; xmlParserInputBufferPtr input_buf; - if (ctxt->_private == NULL) { + if (ctxt->_private == NULL && EXTERNAL_ENTITY_LOADER_FUNC == NULL) + { return xmlNewInputFromFile(ctxt, URL); } @@ -798,11 +801,22 @@ ID = ""; } - self = (SV *)ctxt->_private; - real_obj = (HV *)SvRV(self); - func = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + /* fetch entity loader function */ + if(EXTERNAL_ENTITY_LOADER_FUNC != NULL) + { + func = &EXTERNAL_ENTITY_LOADER_FUNC; + } + else + { + SV * self; + HV * real_obj; + + self = (SV *)ctxt->_private; + real_obj = (HV *)SvRV(self); + func = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + } - if (func != NULL && SvTRUE(*func)) { + if (func != NULL && SvTRUE(*func)) { dTHX; dSP; @@ -899,18 +913,23 @@ if (ctxt) ctxt->linenumbers = 0; } - item = hv_fetch(real_obj, "ext_ent_handler", 15, 0); - if ( item != NULL && SvTRUE(*item)) { - LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); - xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_load_external_entity ); - } - else { - if (parserOptions & XML_PARSE_NONET) { + if(LibXML_old_ext_ent_loader == NULL) + { + item = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + if (item != NULL && SvTRUE(*item)) { LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); - xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_load_external_entity ); } - /* LibXML_old_ext_ent_loader = NULL; */ - } + else + { + if (parserOptions & XML_PARSE_NONET) + { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + /* LibXML_old_ext_ent_loader = NULL; */ + } + } } return real_obj; @@ -921,7 +940,8 @@ #ifndef WITH_SERRORS xmlGetWarningsDefaultValue = 0; #endif - if (LibXML_old_ext_ent_loader != NULL ) { + if (EXTERNAL_ENTITY_LOADER_FUNC == NULL && LibXML_old_ext_ent_loader != NULL) + { xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_old_ext_ent_loader ); } } @@ -2642,6 +2662,26 @@ OUTPUT: RETVAL +SV* +_externalEntityLoader( loader ) + SV* loader + CODE: + { + RETVAL = EXTERNAL_ENTITY_LOADER_FUNC; + if(EXTERNAL_ENTITY_LOADER_FUNC == NULL) + { + EXTERNAL_ENTITY_LOADER_FUNC = newSVsv(loader); + } + + if (LibXML_old_ext_ent_loader == NULL ) + { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader((xmlExternalEntityLoader)LibXML_load_external_entity); + } + } + OUTPUT: + RETVAL + MODULE = XML::LibXML PACKAGE = XML::LibXML::HashTable xmlHashTablePtr @@ -9401,4 +9441,3 @@ #endif OUTPUT: RETVAL - diff -urN XML-LibXML-1.75/lib/XML/LibXML/Parser.pod XML-LibXML-1.75-new/lib/XML/LibXML/Parser.pod --- XML-LibXML-1.75/lib/XML/LibXML/Parser.pod 2011-06-24 18:57:22.000000000 +0200 +++ XML-LibXML-1.75-new/lib/XML/LibXML/Parser.pod 2011-06-29 15:18:26.000000000 +0200 @@ -71,6 +71,11 @@ $parser->load_catalog( $catalog_file ); + # Global external entity loader (similar to ext_ent_handler option + # but this works really globally, also in XML::LibXSLT include etc..) + + XML::LibXML::externalEntityLoader(\&my_loader); + =head1 PARSING A XML document is read into a data structure such as a DOM tree by a piece of diff -urN XML-LibXML-1.75/t/49global_extent.t XML-LibXML-1.75-new/t/49global_extent.t --- XML-LibXML-1.75/t/49global_extent.t 1970-01-01 01:00:00.000000000 +0100 +++ XML-LibXML-1.75-new/t/49global_extent.t 2011-06-29 15:26:01.000000000 +0200 @@ -0,0 +1,51 @@ +# Test file created outside of h2xs framework. +# Run this like so: `perl 49global_extent.t' +# samuel(_dot_)behan(-at-)dob(_dot_)sk 2011/06/29 15:18:43 + +######################### + +# change 'tests => 1' to 'tests => last_test_to_print'; + +use Test::More tests => 1; + +use warnings; +use strict; +use XML::LibXML; +$|=1; + +if (20627 > XML::LibXML::LIBXML_VERSION) { + skip("skipping for libxml2 < 2.6.27") for 1..7; +} else { + +# global entity loader +XML::LibXML::externalEntityLoader(\&handler); + +my $parser = XML::LibXML->new({ + expand_entities => 1, +}); + +sub handler { + return "ENTITY:" . join(",",@_); +} + +my $xml = <<'EOF'; +<?xml version="1.0"?> +<!DOCTYPE foo [ +<!ENTITY a PUBLIC "//foo/bar/b" "file:/dev/null"> +<!ENTITY b SYSTEM "file:///dev/null"> +]> +<root> + <a>&a;</a> + <b>&b;</b> +</root> +EOF +my $xml_out = $xml; +$xml_out =~ s{&a;}{ENTITY:file:/dev/null,//foo/bar/b}; +$xml_out =~ s{&b;}{ENTITY:file:///dev/null,}; + +my $doc = $parser->parse_string($xml); + +# TEST +ok( $doc->toString() eq $xml_out ); + +} \ No newline at end of file
Hi Sam, On Wed Jun 29 09:36:01 2011, SAMSK wrote: Show quoted text
> Hi, > > i've added test 49 and changed formating to conform new guidelines. > Patch attached - hope its better now ;) >
Thanks for the quick response. I'll take a look at what you've written later. Now it's too late as it's 2 AM here. Regards, -- Shlomi Fish Show quoted text
> Sam > > On Wed Jun 29 06:47:40 2011, SHLOMIF wrote:
> > > > Thanks for your patch, but it has several issues: > > > > 1. It does not contain a test script in t/*.t (should be written using > > Test::More and with Test::Count annotations). > > > > 2. It violates some of the style guidelines in the HACKING.txt file > > (which admittedly I just added): > > > >
https://bitbucket.org/shlomif/perl-xml-libxml/src/f51ee01afca7/HACKING.txt Show quoted text
> > > > * Tabs instead of spaces for indentation. > > > > * Single-statement conditional/loop clauses. > > > > ------------ > > > > Please correct these issues and then we can proceed. > > > > Regards, > > > > -- Shlomi Fish
>
Subject: Global external entity handler.
On Wed Jun 29 09:36:01 2011, SAMSK wrote: Show quoted text
> Hi, > > i've added test 49 and changed formating to conform new guidelines. > Patch attached - hope its better now ;) > > Sam > > On Wed Jun 29 06:47:40 2011, SHLOMIF wrote:
> > > > Thanks for your patch, but it has several issues: > > > > 1. It does not contain a test script in t/*.t (should be written using > > Test::More and with Test::Count annotations). > > > > 2. It violates some of the style guidelines in the HACKING.txt file > > (which admittedly I just added): > > > >
https://bitbucket.org/shlomif/perl-xml-libxml/src/f51ee01afca7/HACKING.txt Show quoted text
> > > > * Tabs instead of spaces for indentation. > > > > * Single-statement conditional/loop clauses. > > > > ------------ > > > > Please correct these issues and then we can proceed. > > > > Regards, > > > > -- Shlomi Fish
>
Hi Sam, Thanks for your patch. Here are the remaining issues with it: [QUOTE] diff -urN XML-LibXML-1.75/LibXML.pm XML-LibXML-1.75-new/LibXML.pm --- XML-LibXML-1.75/LibXML.pm 2011-06-24 18:56:47.000000000 +0200 +++ XML-LibXML-1.75-new/LibXML.pm 2011-06-29 14:42:27.000000000 +0200 @@ -443,6 +443,12 @@ # callback functions # #-------------------------------------------------------------------------# +use Data::Dumper; +sub externalEntityLoader(&) +{ + return _externalEntityLoader($_[0]); +} + [/QUOTE] 1. Why do you need to load Data::Dumper? 2. You have a 5 space indent instead of a 4-space indent. [QUOTE] SV *EXTERNAL_ENTITY_LOADER_FUNC = (SV *)NULL; [/QUOTE] 1. Why should the name be in ALL-CAPS? 2. Should it be a global or should it belong to the parser instance? [QUOTE] - SV * self; - HV * real_obj; + [/QUOTE] You have a maverick empty line here. You also have some lines with trailing space. [QUOTE] diff -urN XML-LibXML-1.75/lib/XML/LibXML/Parser.pod XML-LibXML-1.75-new/lib/XML/LibXML/Parser.pod --- XML-LibXML-1.75/lib/XML/LibXML/Parser.pod 2011-06-24 18:57:22.000000000 +0200 +++ XML-LibXML-1.75-new/lib/XML/LibXML/Parser.pod 2011-06-29 15:18:26.000000000 +0200 @@ -71,6 +71,11 @@ $parser->load_catalog( $catalog_file ); + # Global external entity loader (similar to ext_ent_handler option + # but this works really globally, also in XML::LibXSLT include etc..) + + XML::LibXML::externalEntityLoader(\&my_loader); + [/QUOTE] Don't patch Parser.pod directly - instead patch docs/libxml.dbk . You also have a tab character there. [QUOTE] diff -urN XML-LibXML-1.75/t/49global_extent.t XML-LibXML-1.75-new/t/49global_extent.t --- XML-LibXML-1.75/t/49global_extent.t 1970-01-01 01:00:00.000000000 +0100 +++ XML-LibXML-1.75-new/t/49global_extent.t 2011-06-29 15:26:01.000000000 +0200 @@ -0,0 +1,51 @@ +# Test file created outside of h2xs framework. +# Run this like so: `perl 49global_extent.t' +# samuel(_dot_)behan(-at-)dob(_dot_)sk 2011/06/29 15:18:43 + [/QUOTE] No need for this comment. Well, you can keep your name there and maybe a copyright blurb. You can look at https://bitbucket.org/shlomif/perl-xml-libxml/src/5b6ce7779709/t/49_load_html.t for how to put it under the MIT/X11 licence. [QUOTE] +######################### + +# change 'tests => 1' to 'tests => last_test_to_print'; + +use Test::More tests => 1; + +use warnings; +use strict; [/QUOTE] Put the strict/warnings before the use Test::More. [QUOTE] +use XML::LibXML; +$|=1; [/QUOTE] use IO::Handle->autoflush(1) instead (if you really need it.). [QUOTE] + +if (20627 > XML::LibXML::LIBXML_VERSION) { + skip("skipping for libxml2 < 2.6.27") for 1..7; [/QUOTE] You only skip once and you should use skip_all. [QUOTE] +} else { + +# global entity loader +XML::LibXML::externalEntityLoader(\&handler); + +my $parser = XML::LibXML->new({ + expand_entities => 1, +}); + +sub handler { + return "ENTITY:" . join(",",@_); +} + +my $xml = <<'EOF'; +<?xml version="1.0"?> +<!DOCTYPE foo [ +<!ENTITY a PUBLIC "//foo/bar/b" "file:/dev/null"> +<!ENTITY b SYSTEM "file:///dev/null"> +]> +<root> + <a>&a;</a> + <b>&b;</b> +</root> +EOF +my $xml_out = $xml; +$xml_out =~ s{&a;}{ENTITY:file:/dev/null,//foo/bar/b}; +$xml_out =~ s{&b;}{ENTITY:file:///dev/null,}; + +my $doc = $parser->parse_string($xml); + +# TEST +ok( $doc->toString() eq $xml_out ); + [/QUOTE] Please fix or comment on all these issues. Here you can use Test::More::is() or maybe use eq_or_diff from TestHelpers.pm in the repository. Regards, -- Shlomi Fish
On Thu Jun 30 13:11:56 2011, SHLOMIF wrote: Show quoted text
> > Hi Sam, > > Thanks for your patch. Here are the remaining issues with it: > > [QUOTE] > diff -urN XML-LibXML-1.75/LibXML.pm XML-LibXML-1.75-new/LibXML.pm > --- XML-LibXML-1.75/LibXML.pm 2011-06-24 18:56:47.000000000 +0200 > +++ XML-LibXML-1.75-new/LibXML.pm 2011-06-29 14:42:27.000000000 +0200 > @@ -443,6 +443,12 @@ > # callback functions > # >
#------------------------------------------------------------------------- Show quoted text
> # > > +use Data::Dumper; > +sub externalEntityLoader(&) > +{ > + return _externalEntityLoader($_[0]); > +} > + > [/QUOTE] > > 1. Why do you need to load Data::Dumper? > > 2. You have a 5 space indent instead of a 4-space indent. >
fixed Show quoted text
> [QUOTE] > SV *EXTERNAL_ENTITY_LOADER_FUNC = (SV *)NULL; > [/QUOTE] > > 1. Why should the name be in ALL-CAPS? > > 2. Should it be a global or should it belong to the parser instance? >
By (first) implementation I've followed style of original code, where XS globals are all-caps (see LibXML.xs:152). Show quoted text
> [QUOTE] > - SV * self; > - HV * real_obj; > + > [/QUOTE] > > You have a maverick empty line here. > > You also have some lines with trailing space. >
removed. Show quoted text
> [QUOTE] > diff -urN XML-LibXML-1.75/lib/XML/LibXML/Parser.pod > XML-LibXML-1.75-new/lib/XML/LibXML/Parser.pod > --- XML-LibXML-1.75/lib/XML/LibXML/Parser.pod 2011-06-24 > 18:57:22.000000000 +0200 > +++ XML-LibXML-1.75-new/lib/XML/LibXML/Parser.pod 2011-06-29 > 15:18:26.000000000 +0200 > @@ -71,6 +71,11 @@ > > $parser->load_catalog( $catalog_file ); > > + # Global external entity loader (similar to ext_ent_handler option > + # but this works really globally, also in XML::LibXSLT include > etc..) > + > + XML::LibXML::externalEntityLoader(\&my_loader); > + > [/QUOTE] > > Don't patch Parser.pod directly - instead patch docs/libxml.dbk . You > also have a tab character there.
ok, not patching parser.pod. Sorry, no time to play with book xml. Show quoted text
> > [QUOTE] > diff -urN XML-LibXML-1.75/t/49global_extent.t > XML-LibXML-1.75-new/t/49global_extent.t > --- XML-LibXML-1.75/t/49global_extent.t 1970-01-01 01:00:00.000000000 > +0100 > +++ XML-LibXML-1.75-new/t/49global_extent.t 2011-06-29 > 15:26:01.000000000 +0200 > @@ -0,0 +1,51 @@ > +# Test file created outside of h2xs framework. > +# Run this like so: `perl 49global_extent.t' > +# samuel(_dot_)behan(-at-)dob(_dot_)sk 2011/06/29 15:18:43 > + > [/QUOTE] > > No need for this comment. Well, you can keep your name there and maybe > a > copyright blurb. You can look at > https://bitbucket.org/shlomif/perl-xml- > libxml/src/5b6ce7779709/t/49_load_html.t > for how to put it under the MIT/X11 licence. >
removed. Show quoted text
> > [QUOTE] > +######################### > + > +# change 'tests => 1' to 'tests => last_test_to_print'; > + > +use Test::More tests => 1; > + > +use warnings; > +use strict; > [/QUOTE] > > Put the strict/warnings before the use Test::More. >
fixed. Show quoted text
> [QUOTE] > +use XML::LibXML; > +$|=1; > [/QUOTE] > > use IO::Handle->autoflush(1) instead (if you really need it.). >
removed. Show quoted text
> [QUOTE] > + > +if (20627 > XML::LibXML::LIBXML_VERSION) { > + skip("skipping for libxml2 < 2.6.27") for 1..7; > [/QUOTE] > > You only skip once and you should use skip_all. >
fixed. Show quoted text
> [QUOTE] > +} else { > + > +# global entity loader > +XML::LibXML::externalEntityLoader(\&handler); > + > +my $parser = XML::LibXML->new({ > + expand_entities => 1, > +}); > + > +sub handler { > + return "ENTITY:" . join(",",@_); > +} > + > +my $xml = <<'EOF'; > +<?xml version="1.0"?> > +<!DOCTYPE foo [ > +<!ENTITY a PUBLIC "//foo/bar/b" "file:/dev/null"> > +<!ENTITY b SYSTEM "file:///dev/null"> > +]> > +<root> > + <a>&a;</a> > + <b>&b;</b> > +</root> > +EOF > +my $xml_out = $xml; > +$xml_out =~ s{&a;}{ENTITY:file:/dev/null,//foo/bar/b}; > +$xml_out =~ s{&b;}{ENTITY:file:///dev/null,}; > + > +my $doc = $parser->parse_string($xml); > + > +# TEST > +ok( $doc->toString() eq $xml_out ); > + > [/QUOTE] > > Please fix or comment on all these issues. > > Here you can use Test::More::is() or maybe use eq_or_diff from > TestHelpers.pm in the repository. > > Regards, > > -- Shlomi Fish >
Regards Sam
Subject: perl-XML LibXML+global_entity_loader-0.4.diff
diff -urN XML-LibXML-1.75/LibXML.pm XML-LibXML-1.75-new/LibXML.pm --- XML-LibXML-1.75/LibXML.pm 2011-06-24 18:56:47.000000000 +0200 +++ XML-LibXML-1.75-new/LibXML.pm 2011-06-30 19:56:14.000000000 +0200 @@ -443,6 +443,11 @@ # callback functions # #-------------------------------------------------------------------------# +sub externalEntityLoader(&) +{ + return _externalEntityLoader($_[0]); +} + sub input_callbacks { my $self = shift; my $icbclass = shift; diff -urN XML-LibXML-1.75/LibXML.xs XML-LibXML-1.75-new/LibXML.xs --- XML-LibXML-1.75/LibXML.xs 2011-06-24 18:38:46.000000000 +0200 +++ XML-LibXML-1.75-new/LibXML.xs 2011-06-30 20:49:54.000000000 +0200 @@ -146,6 +146,9 @@ /* this should keep the default */ static xmlExternalEntityLoader LibXML_old_ext_ent_loader = NULL; +/* global external entity loader */ +SV *EXTERNAL_ENTITY_LOADER_FUNC = (SV *)NULL; + SV* PROXY_NODE_REGISTRY_MUTEX = NULL; /* **************************************************************** @@ -778,8 +781,6 @@ const char * ID, xmlParserCtxtPtr ctxt) { - SV * self; - HV * real_obj; SV ** func; int count; SV * results; @@ -787,7 +788,8 @@ const char * results_pv; xmlParserInputBufferPtr input_buf; - if (ctxt->_private == NULL) { + if (ctxt->_private == NULL && EXTERNAL_ENTITY_LOADER_FUNC == NULL) + { return xmlNewInputFromFile(ctxt, URL); } @@ -798,11 +800,22 @@ ID = ""; } - self = (SV *)ctxt->_private; - real_obj = (HV *)SvRV(self); - func = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + /* fetch entity loader function */ + if(EXTERNAL_ENTITY_LOADER_FUNC != NULL) + { + func = &EXTERNAL_ENTITY_LOADER_FUNC; + } + else + { + SV * self; + HV * real_obj; + + self = (SV *)ctxt->_private; + real_obj = (HV *)SvRV(self); + func = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + } - if (func != NULL && SvTRUE(*func)) { + if (func != NULL && SvTRUE(*func)) { dTHX; dSP; @@ -899,18 +912,23 @@ if (ctxt) ctxt->linenumbers = 0; } - item = hv_fetch(real_obj, "ext_ent_handler", 15, 0); - if ( item != NULL && SvTRUE(*item)) { - LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); - xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_load_external_entity ); - } - else { - if (parserOptions & XML_PARSE_NONET) { + if(EXTERNAL_ENTITY_LOADER_FUNC == NULL) + { + item = hv_fetch(real_obj, "ext_ent_handler", 15, 0); + if (item != NULL && SvTRUE(*item)) { LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); - xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_load_external_entity ); } - /* LibXML_old_ext_ent_loader = NULL; */ - } + else + { + if (parserOptions & XML_PARSE_NONET) + { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader( xmlNoNetExternalEntityLoader ); + } + /* LibXML_old_ext_ent_loader = NULL; */ + } + } } return real_obj; @@ -921,7 +939,8 @@ #ifndef WITH_SERRORS xmlGetWarningsDefaultValue = 0; #endif - if (LibXML_old_ext_ent_loader != NULL ) { + if (EXTERNAL_ENTITY_LOADER_FUNC == NULL && LibXML_old_ext_ent_loader != NULL) + { xmlSetExternalEntityLoader( (xmlExternalEntityLoader)LibXML_old_ext_ent_loader ); } } @@ -2642,6 +2661,26 @@ OUTPUT: RETVAL +SV* +_externalEntityLoader( loader ) + SV* loader + CODE: + { + RETVAL = EXTERNAL_ENTITY_LOADER_FUNC; + if(EXTERNAL_ENTITY_LOADER_FUNC == NULL) + { + EXTERNAL_ENTITY_LOADER_FUNC = newSVsv(loader); + } + + if (LibXML_old_ext_ent_loader == NULL ) + { + LibXML_old_ext_ent_loader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader((xmlExternalEntityLoader)LibXML_load_external_entity); + } + } + OUTPUT: + RETVAL + MODULE = XML::LibXML PACKAGE = XML::LibXML::HashTable xmlHashTablePtr @@ -9401,4 +9440,3 @@ #endif OUTPUT: RETVAL - diff -urN XML-LibXML-1.75/t/49global_extent.t XML-LibXML-1.75-new/t/49global_extent.t --- XML-LibXML-1.75/t/49global_extent.t 1970-01-01 01:00:00.000000000 +0100 +++ XML-LibXML-1.75-new/t/49global_extent.t 2011-06-30 20:46:10.000000000 +0200 @@ -0,0 +1,50 @@ +# Test file created outside of h2xs framework. +# Run this like so: `perl 49global_extent.t' +# + +######################### + +# change 'tests => 1' to 'tests => last_test_to_print'; + +use warnings; +use strict; + +use Test::More tests => 1; +use XML::LibXML; + +if (20627 > XML::LibXML::LIBXML_VERSION) { + skip_all("skipping for libxml2 < 2.6.27") for 1..7; +} else { + +# global entity loader +XML::LibXML::externalEntityLoader(\&handler); + +my $parser = XML::LibXML->new({ + expand_entities => 1, +}); + +sub handler { + return "ENTITY:" . join(",",@_); +} + +my $xml = <<'EOF'; +<?xml version="1.0"?> +<!DOCTYPE foo [ +<!ENTITY a PUBLIC "//foo/bar/b" "file:/dev/null"> +<!ENTITY b SYSTEM "file:///dev/null"> +]> +<root> + <a>&a;</a> + <b>&b;</b> +</root> +EOF +my $xml_out = $xml; +$xml_out =~ s{&a;}{ENTITY:file:/dev/null,//foo/bar/b}; +$xml_out =~ s{&b;}{ENTITY:file:///dev/null,}; + +my $doc = $parser->parse_string($xml); + +# TEST +is( $doc->toString(), $xml_out ); + +} \ No newline at end of file
Hi Sam, thanks for your patch. I applied it with some tweaks (and while adding some documentation) to the repository here: https://bitbucket.org/shlomif/perl-xml-libxml It will be part of the next CPAN release. Regards, -- Shlomi Fish