Subject: | pQuery->toHtml permitted to form broken html |
When programmatically adding data via DOM methods, the system provides
easy ways to potential XSS Leaks.
Text added via pQuery::DOM->text( $foo ), turns up in the output HTML
unescaped.
my $pq = pQuery($html);
$pq->find('li')->each(sub{
$_->text("<bad");
});
print $pq->toHtml();
Should rightly output <bad
but instead outputs <bad as-is.
this escaping is expected Behaviour on javascript implementations of
->text() so this should rightly duplicate that.
->innerHtml is the recognised way to add verbatim HTML to the tree.
Attached Diff is my changes to my local copy to make this work for me
"as-expected", and diff includes entity encoding test case.
My entity encoding implementation is somewhat incomplete however, but
gets rid of the primary nasties.
Subject: | entity.diff |
diff --git a/Authors/KFREDRIC/pQuery-0.7/lib/pQuery.pm b/Authors/KFREDRIC/pQuery-0.7/lib/pQuery.pm
index 91beb5c..45bb289 100644
--- a/Authors/KFREDRIC/pQuery-0.7/lib/pQuery.pm
+++ b/Authors/KFREDRIC/pQuery-0.7/lib/pQuery.pm
@@ -169,7 +169,6 @@ sub text {
# TODO - Get/set text value
my $this = shift;
my @text;
-
$this->each(sub {
my $text = '';
_to_text($_, \$text);
@@ -177,7 +176,7 @@ sub text {
$text =~ s/^\s+|\s+$//g;
push @text, $text;
});
-
+
return wantarray ? @text : join(' ', grep /\S/, @text);
}
diff --git a/Authors/KFREDRIC/pQuery-0.7/lib/pQuery/DOM.pm b/Authors/KFREDRIC/pQuery-0.7/lib/pQuery/DOM.pm
index 7e0b6fb..76bea87 100644
--- a/Authors/KFREDRIC/pQuery-0.7/lib/pQuery/DOM.pm
+++ b/Authors/KFREDRIC/pQuery-0.7/lib/pQuery/DOM.pm
@@ -303,31 +303,84 @@ sub attributes {
################################################################################
# Helper Functions
################################################################################
+sub _quote_string {
+ # Todo
+ my $string = shift;
+ # Todo: Implement Properly, with "Encoding" specifier.
+ $string =~ s/&/&/g;
+ $string =~ s/"/"/g;
+ return $string;
+}
+sub _quote_body {
+ my $string = shift;
+ # Todo: Implement Properly with full character set encoding for "Encoding" specifier.
+ $string =~ s/&/&/g;
+ $string =~ s/"/"/g;
+ $string =~ s/</</g;
+ $string =~ s/>/>/g;
+ return $string;
+}
+sub _quote_comment {
+ my $string = shift;
+ # Todo: Make Properly
+ # This exists because -- and --> are invalid inside the context of a comment.
+ # This causes broken pages, broken code, and exploits
+ $string =~ s/-/&emdash;/g;
+ $string =~ s/</</g;
+ $string =~ s/>/>/g;
+ return $string;
+}
+
+sub _to_tag {
+ my $opts = shift;
+ my $attrs = "";
+ if( not defined $opts->{attr} )
+ {
+ $opts->{attr} = {}
+ }
+ for my $attr ( keys %{$opts->{attr}} )
+ {
+ # Todo: Entity Encode Attribte Content.
+ $attrs .= sprintf q/ %s="%s"/ , $attr , _quote_string($opts->{attr}->{$attr});
+ }
+ if( length( $opts->{content} ) eq 0 )
+ {
+ # Todo: Encoding Style for empty Tags.
+ # Using XML-Style Self-Closing instead
+ return sprintf q|<%s%s/>|, $opts->{tag}, $attrs;
+ }
+ return sprintf q|<%s%s>%s</%s>|, $opts->{tag}, $attrs, $opts->{content}, $opts->{tag} ;
+}
+
sub _to_html {
my ($elem, $html) = @_;
+
+ # Bare text elements. ( NOHTML )
if (not ref $elem) {
- $$html .= $elem;
+ $$html .= _quote_body($elem);
return;
}
+ # Comments
if ($elem->{_tag} eq '~comment') {
- $$html .= '<!--' . $elem->{text} . '-->';
+ $$html .= '<!--' . _quote_comment($elem->{text}) . '-->';
return;
}
- $$html .= '<' . $elem->{_tag};
- $$html .= qq{ id="$elem->{id}"}
- if $elem->{id};
- $$html .= qq{ class="$elem->{class}"}
- if $elem->{class};
- for (sort keys %$elem) {
- next if /^(_|id$|class$)/i;
- $$html .= qq{ $_="$elem->{$_}"};
+ # Nodes
+ my $nodeData = {
+ tag => $elem->{_tag},
+ attr => {},
+ };
+ for ( sort keys %$elem )
+ {
+ next if /^_/i;
+ $nodeData->{attr}->{$_} = $elem->{$_} if $elem->{$_};
}
-
- $$html .= '>';
+ my $contents = "";
for my $child (@{$elem->{_content} || []}) {
- _to_html($child, $html);
+ _to_html($child, \$contents);
}
- $$html .= '</' . $elem->{_tag} . '>';
+ $nodeData->{content} = $contents;
+ $$html .= _to_tag( $nodeData );
}
sub _find {
diff --git a/Authors/KFREDRIC/pQuery-0.7/t/entity.t b/Authors/KFREDRIC/pQuery-0.7/t/entity.t
new file mode 100644
index 0000000..da77019
--- /dev/null
+++ b/Authors/KFREDRIC/pQuery-0.7/t/entity.t
@@ -0,0 +1,22 @@
+use t::TestpQuery tests => 2;
+
+use pQuery;
+
+open FILE, 't/document1.html' or die $!;
+my $html = do {local $/; <FILE>};
+close FILE;
+chomp $html;
+
+my $output = '';
+
+my $pq = pQuery($html);
+
+$pq->find('li')->each(sub {
+ $_->text("<bad");
+});
+
+$output = $pq->toHtml();
+like $output, qr/<bad/, "Output Contains Escaped Entity";
+unlike $output , qr/<bad/ , "Output Does Not Contain Bare Entity" ;
+
+