Subject: | whitespace after <br> between <span> tags not preserved |
Parsing HTML and using textContent, whitespace is lost if it is after a <br/> which is between 2 <span> tags. If there is no <br/>, or if the space is before the <br/>, the whitespace is preserved.
#!/usr/bin/perl
use strict;
use warnings;
use XML::LibXML;
my $html1 = <<END1;
<div><span>two</span><br/>
<span>words</span></div>
END1
my $html2 = <<END2;
<div><span>two</span> <br/>
<span>words</span></div>
END2
my $html3 = "<div><span>two</span> <br/><span>words</span></div>";
my $html4 = "<div><span>two</span><br/> <span>words</span></div>";
my $parser = XML::LibXML->new();
foreach my $html ($html1, $html2, $html3, $html4) {
my $doc = $parser->load_html(string => $html)->getDocumentElement;
my $div = $doc->findnodes('//div')->[0];
print $div->textContent, "\n";
}
Subject: | span-br-space-test.pl |
#!/usr/bin/perl
use strict;
use warnings;
use XML::LibXML;
my $html1 = <<END1;
<div><span>two</span><br/>
<span>words</span></div>
END1
my $html2 = <<END2;
<div><span>two</span> <br/>
<span>words</span></div>
END2
my $html3 = "<div><span>two</span> <br/><span>words</span></div>";
my $html4 = "<div><span>two</span><br/> <span>words</span></div>";
my $parser = XML::LibXML->new();
foreach my $html ($html1, $html2, $html3, $html4) {
my $doc = $parser->load_html(string => $html)->getDocumentElement;
my $div = $doc->findnodes('//div')->[0];
print $div->textContent, "\n";
}