Subject: | XML::LibXML::SAX::parse_chunk() |
XML-LibXML-1.57, XML-LibXML-1.58_1
perl 5.8.1
OS: SuSE Linux 8.2
XML::LibXML::SAX::parse_chunk() pops twice the end_document() event and thus confuses the handler to build a proper document. I found that one end_document() call comes from parse_xml_chunk() in LibXML.pm:
if ( defined $self->{SAX} ) {
eval {
$self->_parse_sax_xml_chunk( @_ );
# this is required for XML::GenericChunk.
# in normal case is_filter is not defined, an thus the parsing
# will be terminated. in case of a SAX filter the parsing is not
# finished at that state. therefore we must not reset the parsing
unless ( $self->{IS_FILTER} ) {
$result = $self->{HANDLER}->end_document();
}
};
}
and the other is from XML::LibXML::SAX::_parse().
The bug is easily reproduced with this script:
#!/usr/bin/perl
use XML::LibXML::SAX;
use XML::LibXML::SAX::Builder;
$builder = XML::LibXML::SAX::Builder->new();
$parser = XML::LibXML::SAX->new(Handler => $builder);
$builder->start_document();
$parser->parse_chunk( "<test><some_tag>alabala</some_tag></test>");
$builder->end_document();
$result = $builder->result();
if ( defined($result) ) {
print "The following document was built:\n" . $result->toString(1);
}
else {
print "No document was built!\n";
}
diff -aur XML-LibXML-cvs/lib/XML/LibXML/SAX.pm XML-LibXML-cvs-patched/lib/XML/LibXML/SAX.pm
--- XML-LibXML-cvs/lib/XML/LibXML/SAX.pm 2004-02-14 20:35:12.000000000 +0200
+++ XML-LibXML-cvs-patched/lib/XML/LibXML/SAX.pm 2004-06-04 16:36:25.000000000 +0300
@@ -27,7 +27,8 @@
$self->{ParserOptions}{LibParser} = XML::LibXML->new;
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_fh;
$self->{ParserOptions}{ParseFuncParam} = $fh;
- return $self->_parse;
+ $self->_parse;
+ return $self->end_document({});
}
sub _parse_string {
@@ -36,7 +37,8 @@
$self->{ParserOptions}{LibParser} = XML::LibXML->new() unless defined $self->{ParserOptions}{LibParser};
$self->{ParserOptions}{ParseFunc} = \&XML::LibXML::parse_string;
$self->{ParserOptions}{ParseFuncParam} = $string;
- return $self->_parse;
+ $self->_parse;
+ return $self->end_document({});
}
sub _parse_systemid {
@@ -65,8 +67,6 @@
if ( $args->{LibParser}->{SAX}->{State} == 1 ) {
croak( "SAX Exception not implemented, yet; Data ended before document ended\n" );
}
-
- return $self->end_document({});
}
diff -aur XML-LibXML-cvs/LibXML.pm XML-LibXML-cvs-patched/LibXML.pm
--- XML-LibXML-cvs/LibXML.pm 2004-06-03 16:13:46.000000000 +0300
+++ XML-LibXML-cvs-patched/LibXML.pm 2004-06-04 16:34:47.000000000 +0300
@@ -467,14 +467,6 @@
if ( defined $self->{SAX} ) {
eval {
$self->_parse_sax_xml_chunk( @_ );
-
- # this is required for XML::GenericChunk.
- # in normal case is_filter is not defined, an thus the parsing
- # will be terminated. in case of a SAX filter the parsing is not
- # finished at that state. therefore we must not reset the parsing
- unless ( $self->{IS_FILTER} ) {
- $result = $self->{HANDLER}->end_document();
- }
};
}
else {