Subject: | default handler not passed correct arguments if string contains bad or no markup |
I tried to subclass HTML::Parser, and it seems that the default handler
is only invoked when there's correct markup -- and that defeats the
purpose :-(
[me@myhost]$ perl -V
Summary of my perl5 (revision 5.0 version 6 subversion 1) configuration:
Platform:
osname=linux, osvers=2.4.8-1.5, archname=i686-linux
uname='linux sv.wafu.ne.jp 2.4.8-1.5 #7 thu nov 29 18:35:39 jst 2001 i686 unknown '
config_args=''
hint=previous, useposix=true, d_sigaction=define
usethreads=undef use5005threads=undef useithreads=undef usemultiplicity=undef
useperlio=undef d_sfio=undef uselargefiles=define usesocks=undef
use64bitint=undef use64bitall=undef uselongdouble=undef
Compiler:
cc='cc', ccflags ='-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64',
optimize='-O2',
cppflags='-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64'
ccversion='', gccversion='2.96 20000731 (Red Hat Linux 7.1 2.96-81)', gccosandvers=''
intsize=4, longsize=4, ptrsize=4, doublesize=8, byteorder=1234
d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=12
ivtype='long', ivsize=4, nvtype='double', nvsize=8, Off_t='off_t', lseeksize=8
alignbytes=4, usemymalloc=n, prototype=define
Linker and Libraries:
ld='cc', ldflags =' -L/usr/local/lib'
libpth=/usr/local/lib /lib /usr/lib
libs=-lnsl -lndbm -lgdbm -ldl -lm -lc -lcrypt -lutil
perllibs=-lnsl -ldl -lm -lc -lcrypt -lutil
libc=/lib/libc-2.2.2.so, so=so, useshrplib=true, libperl=libperl.so
Dynamic Linking:
dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-rdynamic -Wl,-rpath,/usr/local/lib/perl5/5.6.1/i686-linux/CORE'
cccdlflags='-fpic', lddlflags='-shared -L/usr/local/lib'
Characteristics of this binary (from libperl):
Compile-time options: USE_LARGE_FILES
Built under linux
Compiled at Jan 10 2002 12:05:49
@INC:
/usr/local/lib/perl5/5.6.1/i686-linux
/usr/local/lib/perl5/5.6.1
/usr/local/lib/perl5/site_perl/5.6.1/i686-linux
/usr/local/lib/perl5/site_perl/5.6.1
/usr/local/lib/perl5/site_perl
.
[me@myhost]$
[me@myhost]$ perl -MHTML::Parser -e 'print $HTML::Parser::VERSION, "\n"'
3.25
package MyParser;
use base qw/ HTML::Parser /;
sub new
{
my $class = shift;
my $self = $class->SUPER::new(
api_version => 3,
start_h => [ \&start, 'self,text,tagname,@attr' ],
end_h => [ \&end, 'self,text,tagname' ],
default_h => [ \&default, 'self,text' ],
);
$self;
}
sub start
{
print "start_h => @_\n";
}
sub end
{
print "end_h => @_\n";
}
sub default
{
print "default_h => @_\n";
}
1;
## ==== no markup =====
[me@myhost ]$ perl -I../lib -MMyParser -e 'my $p = MyParser->new(); print $p->clean( "foobar" ), "\n"'
default_h =>
[me@myhost ]
## ==== bad markup =====
[me@myhost ]$ perl -I../lib -MMyParser -e 'my $p = MyParser->new(); print $p->clean( "<strong>foobar" ), "\n"'
default_h => nsbbs::parser=HASH(0x8056e68)
start_h => nsbbs::parser=HASH(0x8056e68) <strong> strong
[me@myhost ]$
## ==== ok markup =====
[me@myhost ]$ perl -I../lib -MMyParser -e 'my $p = MyParser->new(); print $p->clean( "<strong>foobar</strong>" ), "\n"'
default_h => nsbbs::parser=HASH(0x8054e28)
start_h => nsbbs::parser=HASH(0x8054e28) <strong> strong
default_h => nsbbs::parser=HASH(0x8054e28) foobar
end_h => nsbbs::parser=HASH(0x8054e28) </strong> strong
[me@myhost ]$