Bug #115891 for Data-Dump: [PATCH] Replace \w and \d in regexes with ASCII-only char classes

CC:	Benct Philip Jonsson <bpjonsson [...] gmail.com>
Subject:	[PATCH] Replace \w and \d in regexes with ASCII-only char classes
Date:	Tue, 5 Jul 2016 18:06:21 +0200
To:	bug-Data-Dump [...] rt.cpan.org
From:	Benct Philip Jonsson <bpjonsson [...] gmail.com>

Because some regexes in Dump.pm contained \w and \d some hash keys containing non-ASCII characters would not get quoted on newer perls since those characters matched the extended version of \w and \d. --- lib/Data/Dump.pm | 12 ++++++------ t/unikeys.t | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) create mode 100644 t/unikeys.t diff --git a/lib/Data/Dump.pm b/lib/Data/Dump.pm index 1905723..fce166a 100644 --- a/lib/Data/Dump.pm +++ b/lib/Data/Dump.pm @@ -298,7 +298,7 @@ sub _dump } my $text_keys = 0; for (@orig_keys) { - $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/; + $text_keys++, last unless /^[-+]?(?:0|[1-9][0-9]*)(?:\.[0-9]+)?\z/; } if ($text_keys) { @@ -310,8 +310,8 @@ sub _dump my $quote; for my $key (@orig_keys) { - next if $key =~ /^-?[a-zA-Z_]\w*\z/; - next if $key =~ /^-?[1-9]\d{0,8}\z/; + next if $key =~ /^-?[a-zA-Z_][a-zA-Z0-9_]*\z/; + next if $key =~ /^-?[1-9][0-9]{0,8}\z/; $quote++; last; } @@ -448,10 +448,10 @@ sub format_list my $v = $_[$i]; while ($j < @_) { # XXX allow string increment too? - if ($v eq "0" || $v =~ /^-?[1-9]\d{0,9}\z/) { + if ($v eq "0" || $v =~ /^-?[1-9][0-9]{0,9}\z/) { $v++; } - elsif ($v =~ /^"([A-Za-z]{1,3}\d*)"\z/) { + elsif ($v =~ /^"([A-Za-z]{1,3}[0-9]*)"\z/) { $v = $1; $v++; $v = qq("$v"); @@ -544,7 +544,7 @@ sub quote { s/([\a\b\t\n\f\r\e])/$esc{$1}/g; # no need for 3 digits in escape for these - s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg; + s/([\0-\037])(?![0-9])/sprintf('\\%o',ord($1))/eg; s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg; s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg; diff --git a/t/unikeys.t b/t/unikeys.t new file mode 100644 index 0000000..690ce79 --- /dev/null +++ b/t/unikeys.t @@ -0,0 +1,27 @@ +use strict; +use warnings; + +# Because some regexes in Dump.pm contained \w and \d +# some hash keys containing non-ASCII characters would not get +# quoted on newer perls since those characters matched the +# extended version of \w and \d. This tests that that is +# not the case anymore. + +use Test; +BEGIN { plan tests => 2 } + +use utf8; +use Data::Dump qw[ pp ]; + +my $h = +{ + 'föŋ' => 'föŋ', # some Latin-1 and some other Unicode character + '9१२३' => '9१२३', # some Devanagari digits +}; + +my $dump = pp $h; +my $h1 = eval $dump; + +while ( my($k,$v) = each %$h1 ) { + ok $k eq $v; +}; + -- 1.9.1