CC: | Benct Philip Jonsson <bpjonsson [...] gmail.com> |
Subject: | [PATCH] Replace \w and \d in regexes with ASCII-only char classes |
Date: | Tue, 5 Jul 2016 18:06:21 +0200 |
To: | bug-Data-Dump [...] rt.cpan.org |
From: | Benct Philip Jonsson <bpjonsson [...] gmail.com> |
Because some regexes in Dump.pm contained \w and \d
some hash keys containing non-ASCII characters would not get
quoted on newer perls since those characters matched the
extended version of \w and \d.
---
lib/Data/Dump.pm | 12 ++++++------
t/unikeys.t | 27 +++++++++++++++++++++++++++
2 files changed, 33 insertions(+), 6 deletions(-)
create mode 100644 t/unikeys.t
diff --git a/lib/Data/Dump.pm b/lib/Data/Dump.pm
index 1905723..fce166a 100644
--- a/lib/Data/Dump.pm
+++ b/lib/Data/Dump.pm
@@ -298,7 +298,7 @@ sub _dump
}
my $text_keys = 0;
for (@orig_keys) {
- $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/;
+ $text_keys++, last unless /^[-+]?(?:0|[1-9][0-9]*)(?:\.[0-9]+)?\z/;
}
if ($text_keys) {
@@ -310,8 +310,8 @@ sub _dump
my $quote;
for my $key (@orig_keys) {
- next if $key =~ /^-?[a-zA-Z_]\w*\z/;
- next if $key =~ /^-?[1-9]\d{0,8}\z/;
+ next if $key =~ /^-?[a-zA-Z_][a-zA-Z0-9_]*\z/;
+ next if $key =~ /^-?[1-9][0-9]{0,8}\z/;
$quote++;
last;
}
@@ -448,10 +448,10 @@ sub format_list
my $v = $_[$i];
while ($j < @_) {
# XXX allow string increment too?
- if ($v eq "0" || $v =~ /^-?[1-9]\d{0,9}\z/) {
+ if ($v eq "0" || $v =~ /^-?[1-9][0-9]{0,9}\z/) {
$v++;
}
- elsif ($v =~ /^"([A-Za-z]{1,3}\d*)"\z/) {
+ elsif ($v =~ /^"([A-Za-z]{1,3}[0-9]*)"\z/) {
$v = $1;
$v++;
$v = qq("$v");
@@ -544,7 +544,7 @@ sub quote {
s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
# no need for 3 digits in escape for these
- s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
+ s/([\0-\037])(?![0-9])/sprintf('\\%o',ord($1))/eg;
s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
diff --git a/t/unikeys.t b/t/unikeys.t
new file mode 100644
index 0000000..690ce79
--- /dev/null
+++ b/t/unikeys.t
@@ -0,0 +1,27 @@
+use strict;
+use warnings;
+
+# Because some regexes in Dump.pm contained \w and \d
+# some hash keys containing non-ASCII characters would not get
+# quoted on newer perls since those characters matched the
+# extended version of \w and \d. This tests that that is
+# not the case anymore.
+
+use Test;
+BEGIN { plan tests => 2 }
+
+use utf8;
+use Data::Dump qw[ pp ];
+
+my $h = +{
+ 'föŋ' => 'föŋ', # some Latin-1 and some other Unicode character
+ '9१२३' => '9१२३', # some Devanagari digits
+};
+
+my $dump = pp $h;
+my $h1 = eval $dump;
+
+while ( my($k,$v) = each %$h1 ) {
+ ok $k eq $v;
+};
+
--
1.9.1