URLs are internally stored in a strange hash named $$NL, which causes
perl to throw the error message shown below if the module encounters
some strangely formatted and too long URLs.
sf@PENROSE:~$ perl _parse_test.pl
Identifier too long at (eval 2) line 1, <DATA> line 5.
#!/usr/bin/perl
# _parse_test.pl
use strict;
use warnings;
use HTML::LinkExtractor;
my @links;
my $url = "http://example.com/";
my $code = sub {
my ( $X, $tag ) = @_;
if( $tag->{'tag'} eq 'a' ) {
push @links, [$tag->{'tag'}, $tag->{'href'}, $tag->{'_TEXT'},
$tag->{'rel'}];
}
};
my $p = HTML::LinkExtractor->new($code, $url, 1);
my $content = '';
while(<DATA>) {
$content .= $_;
}
$p->parse( \$content );
__DATA__
<html><head><title>Title</title></head>
<body>
<a
href="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARhttp://marcel/zgf/aktionen.html">test</a>
</body>
</html>