Subject: | Fix for JSON::Converter to make it replace utf8 characters with the appropriate js escape sequences |
This is my test input string:
my $data = "This is a utf8 \x0b string\nwith a \"reversed
Euro\" '\x{44D}' (0x044D) symbol.";
This is my test:
my $json = JSON->new()->objToJson([$data]);
The original JSON::Converter sets $json to this:
"This is a utf8 \u000b string\nwith a \"reversed Euro\" 'Ñ' (0x044D)
symbol."
That's ok if your client know's the string is utf8 encoded, but no
matter what utf8 setting I pass into the JSON->new(), the 0x044D
character remains unescaped.
The patched JSON::Converter sets $json to this:
"This is a utf8 \x0B string\nwith a \"reversed Euro\" '\u044D' (0x044D)
symbol."
I'm using Perl 5.8.
B.t.w. perldoc utf8 says in bold "Do not use this pragma for anything
else than telling Perl that your script is written in UTF-8.", yet I
see utf8::* functions being called all over the place.
Subject: | diff.txt |
--- Converter.pm.original Sun Jun 04 22:00:44 2006
+++ Converter.pm Fri Mar 23 22:44:07 2007
@@ -5,6 +5,7 @@
use vars qw($VERSION $USE_UTF8);
use strict;
+use Encode ();
use JSON ();
@@ -64,8 +65,6 @@
local $JSON::Converter::selfconvert = $self->{selfconvert};
local $JSON::Converter::utf8 = $self->{utf8};
- local *_stringfy = *_stringfy_single_quote if($self->{singlequote});
-
return $self->_toJson($obj);
}
@@ -118,12 +117,12 @@
$self->_downIndent();
my $del = $self->{_delstr};
return "{$pre"
- . join(",$pre", map { _stringfy($_) . $del .$res{$_} }
+ . join(",$pre", map { $self->_stringfy($_) . $del .$res{$_} }
(defined $JSON::Converter::keysort ? ( sort $JSON::Converter::keysort (keys %res)) : (keys %res) )
). "$post}";
}
else{
- return '{'. join(',',map { _stringfy($_) .':' .$res{$_} }
+ return '{'. join(',',map { $self->_stringfy($_) .':' .$res{$_} }
(defined $JSON::Converter::keysort ?
( sort $JSON::Converter::keysort (keys %res)) : (keys %res) )
) .'}';
@@ -185,12 +184,12 @@
return 'true' if($value =~ /^[Tt][Rr][Uu][Ee]$/);
return 'false' if($value =~ /^[Ff][Aa][Ll][Ss][Ee]$/);
}
- return _stringfy($value);
+ return $self->_stringfy($value);
}
elsif($JSON::Converter::execcoderef and ref($value) eq 'CODE'){
my $ret = $value->();
return 'null' if(!defined $ret);
- return $self->_toJson($ret) || _stringfy($ret);
+ return $self->_toJson($ret) || $self->_stringfy($ret);
}
elsif( blessed($value) and $value->isa('JSON::NotString') ){
return defined $value->{value} ? $value->{value} : 'null';
@@ -216,27 +215,32 @@
sub _stringfy {
- my ($arg) = @_;
+ my ($self, $arg) = @_;
+ if ($self->{singlequote}) {
+ $arg =~ s/([\\'\n\r\t\f\b])/$esc{$1}/eg;
+ }
+ else {
$arg =~ s/([\\"\n\r\t\f\b])/$esc{$1}/eg;
- $arg =~ s/([\x00-\x07\x0b\x0e-\x1f])/'\\u00' . unpack('H2',$1)/eg;
-
- $JSON::Converter::utf8 and utf8::decode($arg);
-
+ }
+ if ($self->{utf8}) { # does this mean allow utf8 characters in output?
+ $arg =~ s/([\x00-\x07\x0b\x0e-\x1f])/'\u00' . unpack('H2',$1)/eg;
+ }
+ else {
+ # See perluniintro.pod
+ $arg = join('',
+ map { $_ > 255 ? # if wide character...
+ sprintf('\u%04X', $_) : # \u....
+ chr($_) =~ /[[:cntrl:]]/ ? # else if control character ...
+ sprintf('\x%02X', $_) : # \x..
+ chr($_) # else as themselves
+ } unpack('U*', $arg) # unpack Unicode characters
+ );
+ }
return '"' . $arg . '"';
+ # See also: http://www.json.org/
}
-sub _stringfy_single_quote {
- my $arg = shift;
- $arg =~ s/([\\\n'\r\t\f\b])/$esc{$1}/eg;
- $arg =~ s/([\x00-\x07\x0b\x0e-\x1f])/'\\u00' . unpack('H2',$1)/eg;
-
- $JSON::Converter::utf8 and utf8::decode($arg);
-
- return "'" . $arg ."'";
-};
-
-
##############################################################################
sub _initConvert {