The attached patch makes LWP::Protocol::collect() invoke
HTML::HeadParser also for application/xhtml+xml (XHTML) and
application/vnd.wap.xhtml+xml (XHTML Mobile Profile) documents in
addition to text/html.
Subject: | headparser.patch |
Index: lib/HTTP/Headers.pm
===================================================================
RCS file: /cvsroot/libwww-perl/lwp5/lib/HTTP/Headers.pm,v
retrieving revision 1.64
diff -u -r1.64 Headers.pm
--- lib/HTTP/Headers.pm 8 Dec 2005 12:11:48 -0000 1.64
+++ lib/HTTP/Headers.pm 10 Feb 2008 21:50:36 -0000
@@ -292,6 +292,19 @@
wantarray ? @ct : $ct[0];
}
+sub _is_html {
+ my $self = shift;
+ return $self->content_type eq 'text/html' || $self->_is_xhtml;
+}
+
+sub _is_xhtml {
+ my $ct = shift->content_type;
+ for (qw(application/xhtml+xml application/vnd.wap.xhtml+xml)) {
+ return 1 if $_ eq $ct;
+ }
+ return 0;
+}
+
sub referer {
my $self = shift;
if (@_ && $_[0] =~ /#/) {
Index: lib/LWP/Protocol.pm
===================================================================
RCS file: /cvsroot/libwww-perl/lwp5/lib/LWP/Protocol.pm,v
retrieving revision 1.46
diff -u -r1.46 Protocol.pm
--- lib/LWP/Protocol.pm 19 Jul 2007 20:26:11 -0000 1.46
+++ lib/LWP/Protocol.pm 10 Feb 2008 21:50:38 -0000
@@ -101,9 +101,10 @@
my($ua, $parse_head, $max_size) = @{$self}{qw(ua parse_head max_size)};
my $parser;
- if ($parse_head && $response->content_type eq 'text/html') {
+ if ($parse_head && $response->_is_html) {
require HTML::HeadParser;
$parser = HTML::HeadParser->new($response->{'_headers'});
+ $parser->xml_mode(1) if $response->_is_xhtml;
$parser->utf8_mode(1) if $] >= 5.008 && $HTML::Parser::VERSION >= 3.40;
}
my $content_size = 0;