Subject: | First 1994 characters decoded differently from the rest |
XML::SAX 0.99 decodes the first 1994 string characters, but not after
that. This may be related to a buggy fix for bug RT#37147.
Attached is a test demonstrating the bug.
For the record, I'm running Perl v5.14.2 on Debian x86_64-linux-gnu-
thread-multi.
Subject: | show_bug.pl |
#!/usr/bin/env perl
# To the extent possible under law, the person who associated CC0 with this
# work has waived all copyright and related or neighboring rights to this work.
#
# http://creativecommons.org/publicdomain/zero/1.0/
use strict;
use warnings;
package MySaxHandler;
use base qw(XML::SAX::Base);
sub characters {
my $self = shift;
my $rh = shift;
$self->{saved_characters} .= $rh->{Data};
}
package main;
use Test::More;
use Encode;
use XML::SAX::ParserFactory;
for (my $counter = 0; $counter <= 998; $counter++) {
my $test = ('f ' x $counter) . chr(0x2665);
my $octets = Encode::encode( "utf8",
'<?xml version="1.0" encoding="UTF-8"?><root><element>' . $test . '</element></root>'
);
my $handler = MySaxHandler->new();
my $p = XML::SAX::ParserFactory->parser(Handler => $handler);
$p->parse_string($octets);
my $res = $handler->{saved_characters};
like(
$res,
qr/\x{2665}/,
"Result contains Unicode character U+2665 (after " . ($counter * 2) . " characters)"
);
my $heart_octets = Encode::encode("utf8", "\x{2665}");
unlike(
$res,
qr/[$heart_octets]/,
"Result is not an octet string (after " . ($counter * 2) . " characters)"
);
}
done_testing();