Skip Menu |

This queue is for tickets about the XML-TreeBuilder CPAN distribution.

Report information
The Basics
Id: 50060
Status: resolved
Priority: 0/
Queue: XML-TreeBuilder

People
Owner: Nobody in particular
Requestors: Jeff.Fearn [...] gmail.com
Cc:
AdminCc:

Bug Information
Severity: Wishlist
Broken in: 3.09
Fixed in: 4.0



Subject: Support setting NoExpand and ErrorContext [PATCH]
It is very useful to be able to set both NoExpand and ErrorContext for XML::Parser to allow XML to be pre-processed and to generate better error messages for users. Attached is a patch that enables both of these to be set when creating a new TreeBuilder object and to clean up the extra attributes added to the node tree. Cheers, Jeff.
Subject: XML-TreeBuilder-NoExpand.patch
diff -rubB --exclude=.svn --exclude='*.swp' XML-TreeBuilder-3.09/Changes XML-TreeBuilder-patched/Changes --- XML-TreeBuilder-3.09/Changes 2004-06-11 14:28:41.000000000 +1000 +++ XML-TreeBuilder-patched/Changes 2009-03-16 14:30:51.000000000 +1000 @@ -1,5 +1,10 @@ -# Time-stamp: "2004-06-10 20:28:41 ADT" +2009-16-03 Jeff Fearn <jfearn@redhat.com> + Release 3.09.x + + Added NoExpand option to allow entities to be left untouched in xml. + Added ErrorContext option to allow better reporting of error locations. + Expanded tests to test these options. 2004-06-10 Sean M. Burke <sburke@cpan.org> diff -rubB --exclude=.svn --exclude='*.swp' XML-TreeBuilder-3.09/lib/XML/TreeBuilder.pm XML-TreeBuilder-patched/lib/XML/TreeBuilder.pm --- XML-TreeBuilder-3.09/lib/XML/TreeBuilder.pm 2004-06-11 13:59:14.000000000 +1000 +++ XML-TreeBuilder-patched/lib/XML/TreeBuilder.pm 2009-09-28 13:51:25.000000000 +1000 @@ -5,6 +7,7 @@ use strict; use XML::Element (); use XML::Parser (); +use Carp; use vars qw(@ISA $VERSION); $VERSION = '3.09'; @@ -12,8 +15,15 @@ #========================================================================== sub new { - my $class = ref($_[0]) || $_[0]; - # that's the only parameter it knows + my ( $this, $arg ) = @_; + my $class = ref($this) || $this; + + my $NoExpand = ( delete $arg->{'NoExpand'} || undef ); + my $ErrorContext = ( delete $arg->{'ErrorContext'} || undef ); + + if ( %{$arg} ) { + croak "unknown args: " . join( ", ", keys %{$arg} ); + } my $self = XML::Element->new('NIL'); bless $self, $class; # and rebless @@ -21,44 +31,53 @@ $self->{'_store_comments'} = 0; $self->{'_store_pis'} = 0; $self->{'_store_declarations'} = 0; + $self->{'NoExpand'} = $NoExpand if ($NoExpand); + $self->{'ErrorContext'} = $ErrorContext if ($ErrorContext); my @stack; + # Compare the simplicity of this to the sheer nastiness of HTML::TreeBuilder! - $self->{'_xml_parser'} = XML::Parser->new( 'Handlers' => { + $self->{'_xml_parser'} = XML::Parser->new( + 'Handlers' => { + 'Default' => sub { + if ( ( $self->{'NoExpand'} ) && ( $_[1] =~ /&.*\;/ ) ) { + $stack[-1]->push_content( $_[1] ); + } + return; + }, 'Start' => sub { shift; - if(@stack) { + $self->attr('NoExpand', undef); + $self->attr('ErrorContext', undef); + if (@stack) { push @stack, $self->{'_element_class'}->new(@_); $stack[-2]->push_content( $stack[-1] ); - } else { + } + else { $self->tag(shift); - while(@_) { $self->attr(splice(@_,0,2)) }; + while (@_) { $self->attr( splice( @_, 0, 2 ) ) } push @stack, $self; } }, 'End' => sub { pop @stack; return }, - 'Char' => sub { $stack[-1]->push_content($_[1]) }, + 'Char' => sub { $stack[-1]->push_content( $_[1] ) }, 'Comment' => sub { return unless $self->{'_store_comments'}; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~comment', 'text' => $_[1]) - ); + ( @stack ? $stack[-1] : $self ) + ->push_content( $self->{'_element_class'} + ->new( '~comment', 'text' => $_[1] ) ); return; }, 'Proc' => sub { return unless $self->{'_store_pis'}; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~pi', 'text' => "$_[1] $_[2]") - ); + ( @stack ? $stack[-1] : $self ) + ->push_content( $self->{'_element_class'} + ->new( '~pi', 'text' => "$_[1] $_[2]" ) ); return; }, @@ -67,11 +86,11 @@ 'Attlist' => sub { return unless $self->{'_store_declarations'}; shift; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~declaration', - 'text' => join ' ', 'ATTLIST', @_ + ( @stack ? $stack[-1] : $self )->push_content( + $self->{'_element_class'}->new( + '~declaration', + 'text' => join ' ', + 'ATTLIST', @_ ) ); return; @@ -80,11 +99,11 @@ 'Element' => sub { return unless $self->{'_store_declarations'}; shift; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~declaration', - 'text' => join ' ', 'ELEMENT', @_ + ( @stack ? $stack[-1] : $self )->push_content( + $self->{'_element_class'}->new( + '~declaration', + 'text' => join ' ', + 'ELEMENT', @_ ) ); return; @@ -93,17 +112,32 @@ 'Doctype' => sub { return unless $self->{'_store_declarations'}; shift; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~declaration', - 'text' => join ' ', 'DOCTYPE', @_ + ( @stack ? $stack[-1] : $self )->push_content( + $self->{'_element_class'}->new( + '~declaration', + 'text' => join ' ', + 'DOCTYPE', @_ ) ); return; }, - }); + 'Entity' => sub { + return unless $self->{'_store_declarations'}; + shift; + ( @stack ? $stack[-1] : $self )->push_content( + $self->{'_element_class'}->new( + '~declaration', + 'text' => join ' ', + 'ENTITY', @_ + ) + ); + return; + }, + }, + 'NoExpand' => $self->{'NoExpand'}, + 'ErrorContext' => $self->{'ErrorContext'} + ); return $self; } @@ -110,15 +145,15 @@ #========================================================================== sub _elem # universal accessor... { - my($self, $elem, $val) = @_; + my ( $self, $elem, $val ) = @_; my $old = $self->{$elem}; $self->{$elem} = $val if defined $val; return $old; } -sub store_comments { shift->_elem('_store_comments', @_); } -sub store_declarations { shift->_elem('_store_declarations', @_); } -sub store_pis { shift->_elem('_store_pis', @_); } +sub store_comments { shift->_elem( '_store_comments', @_ ); } +sub store_declarations { shift->_elem( '_store_declarations', @_ ); } +sub store_pis { shift->_elem( '_store_pis', @_ ); } #========================================================================== diff -rubB --exclude=.svn --exclude='*.swp' XML-TreeBuilder-3.09/t/10main.t XML-TreeBuilder-patched/t/10main.t --- XML-TreeBuilder-3.09/t/10main.t 2004-06-11 14:22:53.000000000 +1000 +++ XML-TreeBuilder-patched/t/10main.t 2009-09-28 13:59:55.000000000 +1000 @@ -2,7 +2,7 @@ # Time-stamp: "2004-06-10 20:22:53 ADT" use Test; -BEGIN { plan tests => 3 } +BEGIN { plan tests => 4 } use XML::TreeBuilder; @@ -29,8 +29,7 @@ ] ); - -ok $x->same_as($y); +ok($x->same_as($y)); unless( $ENV{'HARNESS_ACTIVE'} ) { $x->dump; @@ -45,6 +44,27 @@ $x->delete; $y->delete; +$x = XML::TreeBuilder->new({ 'NoExpand' => "1", 'ErrorContext' => "2" }); +$x->store_comments(1); +$x->store_pis(1); +$x->store_declarations(1); +$x->parse( + qq{<!-- myorp --><Gee><foo Id="me" xml:foo="lal">Hello World</foo>} . + qq{<lor/><!-- foo --></Gee><!-- glarg -->} +); + +$y = XML::Element->new_from_lol( + ['Gee', + ['~comment', {'text' => ' myorp '}], + ['foo', {'Id'=> 'me', 'xml:foo' => 'lal'}, 'Hello World'], + ['lor'], + ['~comment', {'text' => ' foo '}], + ['~comment', {'text' => ' glarg '}], + ] +); + +ok($x->same_as($y)); + ok 1; print "# Bye from ", __FILE__, "\n";
Sigh, the original patch eats entities. Attached is a version that works ... how embarrassing :( Cheers, Jeff.
diff -rubB --exclude=.svn --exclude='*.swp' XML-TreeBuilder-3.09/Changes XML-TreeBuilder-patched/Changes --- XML-TreeBuilder-3.09/Changes 2004-06-11 14:28:41.000000000 +1000 +++ XML-TreeBuilder-patched/Changes 2009-03-16 14:30:51.000000000 +1000 @@ -1,5 +1,10 @@ -# Time-stamp: "2004-06-10 20:28:41 ADT" +2009-16-03 Jeff Fearn <jfearn@redhat.com> + Release 3.09.x + + Added NoExpand option to allow entities to be left untouched in xml. + Added ErrorContext option to allow better reporting of error locations. + Expanded tests to test these options. 2004-06-10 Sean M. Burke <sburke@cpan.org> diff -rubB --exclude=.svn --exclude='*.swp' XML-TreeBuilder-3.09/lib/XML/TreeBuilder.pm XML-TreeBuilder-patched/lib/XML/TreeBuilder.pm --- XML-TreeBuilder-3.09/lib/XML/TreeBuilder.pm 2004-06-11 13:59:14.000000000 +1000 +++ XML-TreeBuilder-patched/lib/XML/TreeBuilder.pm 2009-09-29 09:21:18.000000000 +1000 @@ -5,6 +7,7 @@ use strict; use XML::Element (); use XML::Parser (); +use Carp; use vars qw(@ISA $VERSION); $VERSION = '3.09'; @@ -12,8 +15,15 @@ #========================================================================== sub new { - my $class = ref($_[0]) || $_[0]; - # that's the only parameter it knows + my ( $this, $arg ) = @_; + my $class = ref($this) || $this; + + my $NoExpand = ( delete $arg->{'NoExpand'} || undef ); + my $ErrorContext = ( delete $arg->{'ErrorContext'} || undef ); + + if ( %{$arg} ) { + croak "unknown args: " . join( ", ", keys %{$arg} ); + } my $self = XML::Element->new('NIL'); bless $self, $class; # and rebless @@ -21,57 +31,76 @@ $self->{'_store_comments'} = 0; $self->{'_store_pis'} = 0; $self->{'_store_declarations'} = 0; + $self->{'NoExpand'} = $NoExpand if ($NoExpand); + $self->{'ErrorContext'} = $ErrorContext if ($ErrorContext); my @stack; + # Compare the simplicity of this to the sheer nastiness of HTML::TreeBuilder! - $self->{'_xml_parser'} = XML::Parser->new( 'Handlers' => { + $self->{'_xml_parser'} = XML::Parser->new( + 'Handlers' => { + 'Default' => sub { + if ( ( $self->{'NoExpand'} ) && ( $_[1] =~ /&.*\;/ ) ) { + $stack[-1]->push_content( $_[1] ); + } + return; + }, 'Start' => sub { shift; - if(@stack) { + if (@stack) { push @stack, $self->{'_element_class'}->new(@_); $stack[-2]->push_content( $stack[-1] ); - } else { + } + else { $self->tag(shift); - while(@_) { $self->attr(splice(@_,0,2)) }; + while (@_) { $self->attr( splice( @_, 0, 2 ) ) } push @stack, $self; } }, 'End' => sub { pop @stack; return }, - 'Char' => sub { $stack[-1]->push_content($_[1]) }, + 'Char' => sub { $stack[-1]->push_content( $_[1] ) }, 'Comment' => sub { return unless $self->{'_store_comments'}; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~comment', 'text' => $_[1]) - ); + ( @stack ? $stack[-1] : $self ) + ->push_content( $self->{'_element_class'} + ->new( '~comment', 'text' => $_[1] ) ); return; }, 'Proc' => sub { return unless $self->{'_store_pis'}; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~pi', 'text' => "$_[1] $_[2]") - ); + ( @stack ? $stack[-1] : $self ) + ->push_content( $self->{'_element_class'} + ->new( '~pi', 'text' => "$_[1] $_[2]" ) ); return; }, + 'Final' => sub { + $self->root()->traverse( + sub { + my ( $node, $start ) = @_; + if ( ref $node ) { # it's an element + $node->attr( 'NoExpand', undef ); + $node->attr( 'ErrorContext', undef ); + } + } + ); + }, + # And now, declarations: 'Attlist' => sub { return unless $self->{'_store_declarations'}; shift; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~declaration', - 'text' => join ' ', 'ATTLIST', @_ + ( @stack ? $stack[-1] : $self )->push_content( + $self->{'_element_class'}->new( + '~declaration', + 'text' => join ' ', + 'ATTLIST', @_ ) ); return; @@ -80,11 +109,11 @@ 'Element' => sub { return unless $self->{'_store_declarations'}; shift; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~declaration', - 'text' => join ' ', 'ELEMENT', @_ + ( @stack ? $stack[-1] : $self )->push_content( + $self->{'_element_class'}->new( + '~declaration', + 'text' => join ' ', + 'ELEMENT', @_ ) ); return; @@ -93,17 +122,32 @@ 'Doctype' => sub { return unless $self->{'_store_declarations'}; shift; - ( - @stack ? $stack[-1] : $self - )->push_content( - $self->{'_element_class'}->new('~declaration', - 'text' => join ' ', 'DOCTYPE', @_ + ( @stack ? $stack[-1] : $self )->push_content( + $self->{'_element_class'}->new( + '~declaration', + 'text' => join ' ', + 'DOCTYPE', @_ ) ); return; }, - }); + 'Entity' => sub { + return unless $self->{'_store_declarations'}; + shift; + ( @stack ? $stack[-1] : $self )->push_content( + $self->{'_element_class'}->new( + '~declaration', + 'text' => join ' ', + 'ENTITY', @_ + ) + ); + return; + }, + }, + 'NoExpand' => $self->{'NoExpand'}, + 'ErrorContext' => $self->{'ErrorContext'} + ); return $self; } @@ -110,15 +155,15 @@ #========================================================================== sub _elem # universal accessor... { - my($self, $elem, $val) = @_; + my ( $self, $elem, $val ) = @_; my $old = $self->{$elem}; $self->{$elem} = $val if defined $val; return $old; } -sub store_comments { shift->_elem('_store_comments', @_); } -sub store_declarations { shift->_elem('_store_declarations', @_); } -sub store_pis { shift->_elem('_store_pis', @_); } +sub store_comments { shift->_elem( '_store_comments', @_ ); } +sub store_declarations { shift->_elem( '_store_declarations', @_ ); } +sub store_pis { shift->_elem( '_store_pis', @_ ); } #========================================================================== diff -rubB --exclude=.svn --exclude='*.swp' XML-TreeBuilder-3.09/t/10main.t XML-TreeBuilder-patched/t/10main.t --- XML-TreeBuilder-3.09/t/10main.t 2009-09-28 14:00:50.000000000 +1000 +++ XML-TreeBuilder-patched/t/10main.t 2009-09-28 14:00:54.000000000 +1000 @@ -2,7 +2,7 @@ # Time-stamp: "2004-06-10 20:22:53 ADT" use Test; -BEGIN { plan tests => 3 } +BEGIN { plan tests => 4 } use XML::TreeBuilder; @@ -29,8 +29,7 @@ ] ); - -ok $x->same_as($y); +ok($x->same_as($y)); unless( $ENV{'HARNESS_ACTIVE'} ) { $x->dump; @@ -43,6 +44,27 @@ $x->delete; $y->delete; +$x = XML::TreeBuilder->new({ 'NoExpand' => "1", 'ErrorContext' => "2" }); +$x->store_comments(1); +$x->store_pis(1); +$x->store_declarations(1); +$x->parse( + qq{<!-- myorp --><Gee><foo Id="me" xml:foo="lal">Hello World</foo>} . + qq{<lor/><!-- foo --></Gee><!-- glarg -->} +); + +$y = XML::Element->new_from_lol( + ['Gee', + ['~comment', {'text' => ' myorp '}], + ['foo', {'Id'=> 'me', 'xml:foo' => 'lal'}, 'Hello World'], + ['lor'], + ['~comment', {'text' => ' foo '}], + ['~comment', {'text' => ' glarg '}], + ] +); + +ok($x->same_as($y)); + ok 1; print "# Bye from ", __FILE__, "\n";
This functionality was shipped in version 4.0