Updated patch with more tests and proper subst in stringify.
--
Best regards, Ruslan.
From 735693a520f6145ec47134f3633ac4ae7e6df0ff Mon Sep 17 00:00:00 2001
From: Ruslan Zakirov <ruz@bestpractical.com>
Date: Sat, 3 Nov 2012 13:54:23 +0400
Subject: [PATCH] proper parsing/stringification of quoted strings
MIME::Field::ParamVal was not parsing properly quoted
params. If param is a quoted string then it may contain
quoted pairs - characters escaped with \ including \ and ".
Regular expression was naive to stop at first \" and ignore
the rest.
Also, stringify method was not escaping \ and ",
so result can be invalid and not parsable.
See relevant RFC BNF syntax and quotes.
From RFC2045:
parameter := attribute "=" value
value := token / quoted-string
From RFC2822 (not exact syntax, only relevant):
quoted-string = ... qcontent ...
qcontent = qtext / quoted-pair
quoted-pair = ("\" text) / obs-qp
"Where any quoted-pair appears, it is to be interpreted as the text
character alone. That is to say, the "\" character that appears
as part of a quoted-pair is semantically "invisible"."
From RFC822:
quoted-pair = "\" CHAR ; may quote any char
quoted-string = <"> *(qtext/quoted-pair) <">; Regular qtext or
; quoted chars.
"The quote character (backslash) and characters that delimit
syntactic units are not, generally, to be taken as data that
are part of the delimited or quoted unit(s). In particular,
the quotation-marks that define a quoted-string, the
parentheses that define a comment and the backslash that
quotes a following character are NOT part of the quoted-
string, comment or quoted character. A quotation-mark that is
to be part of a quoted-string, a parenthesis that is to be
part of a comment and a backslash that is to be part of either
must each be preceded by the quote-character backslash ("\").
Note that the syntax allows any character to be quoted within
a quoted-string or comment; however only certain characters
MUST be quoted to be included as data. These characters are
the ones that are not part of the alternate text group (i.e.,
ctext or qtext)."
---
lib/MIME/Field/ParamVal.pm | 13 ++++++++++---
t/ticket-80433.t | 15 +++++++++++++++
testmsgs/multi-2evil.ref | 4 ++--
3 files changed, 27 insertions(+), 5 deletions(-)
create mode 100644 t/ticket-80433.t
diff --git a/lib/MIME/Field/ParamVal.pm b/lib/MIME/Field/ParamVal.pm
index cd5c1d2..c441b21 100644
--- a/lib/MIME/Field/ParamVal.pm
+++ b/lib/MIME/Field/ParamVal.pm
@@ -105,6 +105,8 @@ my $TSPECIAL = '()<>@,;:\</[]?="';
my $TOKEN = '[^ \x00-\x1f\x80-\xff' . "\Q$TSPECIAL\E" . ']+';
+my $QUOTED_STRING = '"([^\\\\"]*(?:\\\\.(?:[^\\\\"]*))*)"';
+
# Encoded token:
my $ENCTOKEN = "=\\?[^?]*\\?[A-Za-z]\\?[^?]+\\?=";
@@ -239,14 +241,18 @@ sub parse_params {
$raw =~ m/\G$SPCZ(\;$SPCZ)+/og or last; # skip leading separator
$raw =~ m/\G($PARAMNAME)\s*=\s*/og or last; # give up if not a param
$param = lc($1);
- $raw =~ m/\G(?:("([^"]*)")|($ENCTOKEN)|($BADTOKEN)|($TOKEN))/g or last; # give up if no value"
- my ($qstr, $str, $enctoken, $badtoken, $token) = ($1, $2, $3, $4, $5);
+ $raw =~ m/\G(?:$QUOTED_STRING|($ENCTOKEN)|($BADTOKEN)|($TOKEN))/g or last; # give up if no value"
+ my ($qstr, $enctoken, $badtoken, $token) = ($1, $2, $3, $4, $5);
+ if (defined($qstr)) {
+ # unescape
+ $qstr =~ s/\\(.)/$1/g;
+ }
if (defined($badtoken)) {
# Strip leading/trailing whitespace from badtoken
$badtoken =~ s/^\s+//;
$badtoken =~ s/\s+\z//;
}
- $val = defined($qstr) ? $str :
+ $val = defined($qstr) ? $qstr :
(defined($enctoken) ? $enctoken :
(defined($badtoken) ? $badtoken : $token));
@@ -374,6 +380,7 @@ sub stringify {
foreach $key (sort keys %$self) {
next if ($key !~ /^[a-z][a-z-_0-9]*$/); # only lowercase ones!
defined($val = $self->{$key}) or next;
+ $val =~ s/(["\\])/\\$1/g;
$str .= qq{; $key="$val"};
}
$str;
diff --git a/t/ticket-80433.t b/t/ticket-80433.t
new file mode 100644
index 0000000..cfe04a9
--- /dev/null
+++ b/t/ticket-80433.t
@@ -0,0 +1,15 @@
+use strict;
+use warnings;
+
+use Test::More tests => 2;
+
+# proper quoting of params and parsing them
+
+use MIME::Field::ParamVal;
+
+my $field = MIME::Field::ParamVal->parse(
+ 'inline; filename="f\oo\"bar\"b\az\\\\"'
+);
+
+is($field->param('filename'), 'foo"bar"baz\\');
+is($field->stringify, 'inline; filename="foo\\"bar\\"baz\\\\"');
diff --git a/testmsgs/multi-2evil.ref b/testmsgs/multi-2evil.ref
index 02bde46..177a826 100644
--- a/testmsgs/multi-2evil.ref
+++ b/testmsgs/multi-2evil.ref
@@ -20,8 +20,8 @@
"Charset" => "us-ascii"
},
"Part_2" => {
- "Filename" => qq{/evil/because:of\\path\\3d-compress.gif},
- "BodyFilename" => "3d-compress.gif",
+ "Filename" => qq{/evil/because:ofpath3d-compress.gif},
+ "BodyFilename" => "because_ofpath3d-compress.gif",
"Size" => 419,
"Disposition" => "inline",
"Type" => "image/gif",
--
1.8.0