Subject: | bugs for `<a rel`, `align`, `<ul type`, `<nav>` (with fix & tests) |
With the attached patch, Defang will:
* handle non-lowercase `align` attribute values
* correctly recognise list `type` attribute
* accept `<nav>`
* accept `rel` attribute on `<a>`
Subject: | 0001-various-fixes.patch |
From 239fa5642684b9e92590c0cb5bf44d5c22853de9 Mon Sep 17 00:00:00 2001
From: Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
Date: Wed, 3 Jun 2020 10:10:08 +0100
Subject: [PATCH] various fixes
* handle non-lowercase `align` attribute values
* fix regex for list `type` attribute
* accept `<nav>`
* accept `rel` attribute on `<a>`
---
lib/HTML/Defang.pm | 8 ++++++--
t/01_basic.t | 16 +++++++++++++++-
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/lib/HTML/Defang.pm b/lib/HTML/Defang.pm
index 8520e56..6c4a5a0 100644
--- a/lib/HTML/Defang.pm
+++ b/lib/HTML/Defang.pm
@@ -162,13 +162,14 @@ my $RECStyleNaked = qr/\G(\s*)()()()($StyleRules)()(\s*)/o;
my $RECStyleSelected = qr/\G(\s*)((?:$StyleSelectors)?)(\s*)(\{)($StyleRules)(\})(\s*)/o;
my $Fonts = qr/["']?([A-Za-z0-9\s-]+)["']?/;
-my $Alignments = qr/(absbottom|absmiddle|all|autocentre|baseline|bottom|center|justify|left|middle|none|right|texttop|top)/;
+my $Alignments = qr/(absbottom|absmiddle|all|autocentre|baseline|bottom|center|justify|left|middle|none|right|texttop|top)/i;
my $Executables = '([^@]\.com|'.
'.*\.(exe|cmd|bat|pif|scr|sys|sct|lnk|dll'.
'|vbs?|vbe|hta|shb|shs|hlp|chm|eml|wsf|wsh|js'.
'|asx|wm.|mdb|mht|msi|msp|cpl|lib|reg))';
my $SrcBanStd = qr/^([A-Za-z]*script|.*\&\{|mocha|about|opera|mailto:|hcp:|\/(dev|proc)|\\|file|smb|cid:${Executables}(@|\?|$))/i;
+my $link_one_type = qr{alternate|author|bookmark|external|help|license|next|nofollow|noopener|noreferrer|prev|search|tag}i;
my %Rules =
(
@@ -202,9 +203,10 @@ my %Rules =
"meta:name" => qr/^(author|progid|originator|generator|keywords|description|content-type|pragma|expires)$/i,
# mime-type: Not javascript
"mime-type" => qr/^(cite|text\/(plain|css|html|xml))$/i,
- "list-type" => qr/^(none,a,i,upper-alpha,lower-alpha,upper-roman,lower-roman,decimal,disc,square,circle,round)$/i,
+ "list-type" => qr/^(none|a|i|upper-alpha|lower-alpha|upper-roman|lower-roman|decimal|disc|square|circle|round)$/i,
# "rel" => qr/^((copyright|author|stylesheet)\s*)+$/i,
"rel" => qr/^((copyright|author)\s*)+$/i, # XXX external stylesheets can contain scripting, so kill them
+ "rel-a" => qr/^(?:$link_one_type)(?:\s+(?:$link_one_type))*$/i,
"rules" => qr/^(none|groups|rows|cols|all)$/i,
"scope" => qr/^(row|col|rowgroup|colgroup)$/i,
"shape" => qr/^(rect|rectangle|circ|circle|poly|polygon)$/i,
@@ -364,6 +366,7 @@ my %Tags = (
"type" => "mime-type",
"eudora" => "eudora",
"notrack" => "anything",
+ "rel" => "rel-a",
},
"address" => 1,
"area" =>
@@ -517,6 +520,7 @@ my %Tags = (
"marquee" => 0,
"menu" => \%ListAttributes,
"multicol" => 0,
+ "nav" => 2,
"nextid" => 0,
"nobr" => 0,
"noembed" => 1,
diff --git a/t/01_basic.t b/t/01_basic.t
index 7759011..4b46377 100755
--- a/t/01_basic.t
+++ b/t/01_basic.t
@@ -4,7 +4,7 @@ BEGIN { # CPAN users don't have ME::*, so use eval
eval 'use ME::FindLibs'
}
-use Test::More tests => 93;
+use Test::More tests => 97;
use HTML::Defang;
use strict;
@@ -536,3 +536,17 @@ $Res = $Defang->defang($H);
like($Res, qr{^1:<!--${DefangString}unknownTag title="something with in it"-->}, "Defang unknown tag with --'s in it");
like($Res, qr{^2:<b><!--${DefangString}noscript--><!--$CommentStartText </noscript><img src=xx: onerror=alert\(document\.domain\) $CommentEndText--><!--/${DefangString}noscript-->}m, "Defang noscript tag");
+$H = <<EOF;
+<nav>
+<ul type="disc">
+<li>
+<p align="LEFT">
+<a rel="noopener noreferrer" href="foo">thing
+EOF
+
+$Res = $Defang->defang($H);
+
+like($Res, qr{<nav>},'nav element');
+like($Res, qr{\btype="disc"},'list type attribute');
+like($Res, qr{\balign="left"}i,'alignment attribute');
+like($Res, qr{\brel="noopener noreferrer"},'rel attribute');
--
2.26.2