Subject: | empty tags like <img src="foo"> and <br /> are output incorrectly with a closing tag |
perl -MpQuery -le 'print pQuery("<html><body><img src=\"/test.html\"
/></body></html>")->find("img")->toHtml;'
<img /="/" src="/test.html"></img>
Notice the extra /="/" attribute and the extra </img> closing tag.
The attached patch uses HTML::Tagset to find out which tags have no
closing tag and outputs them appropriately.
After the patch, the above is rendered like this:
<img src="/test.html" />
Cheers,
Cees Hek
Subject: | pQuery-DOM-_to_html.patch |
--- pQuery/DOM.pm.orig 2009-04-29 11:33:17.000000000 +1000
+++ pQuery/DOM.pm 2009-04-29 11:38:25.000000000 +1000
@@ -5,6 +5,7 @@
use base 'HTML::TreeBuilder';
use base 'HTML::Element';
+use HTML::Tagset ();
# This is a copy of HTML::TreeBuilder::new. Sadly. TreeBuilder should be
# easier to subclass. The only change is s/HTML::Element/pQuery::DOM/g.
@@ -319,15 +320,19 @@
$$html .= qq{ class="$elem->{class}"}
if $elem->{class};
for (sort keys %$elem) {
+ next if $_ eq '/';
next if /^(_|id$|class$)/i;
$$html .= qq{ $_="$elem->{$_}"};
}
-
- $$html .= '>';
- for my $child (@{$elem->{_content} || []}) {
- _to_html($child, $html);
+
+ $$html .= $elem->{'/'} ? ' />' : '>';
+
+ if (! $HTML::Tagset::emptyElement{$elem->{_tag}} ) {
+ for my $child (@{$elem->{_content} || []}) {
+ _to_html($child, $html);
+ }
+ $$html .= '</' . $elem->{_tag} . '>';
}
- $$html .= '</' . $elem->{_tag} . '>';
}
sub _find {