Hi
I think I have a proper fix for encoding, see attached patch.
Also attached is a pretty basic test case that checks that UTF-8
encoding works fine for strings, and I have tested that some internal
client and server apps here at work now correctly handle data with
snowmen when encoding is set to UTF-8.
There are a couple more calls to utf8_downgrade in RPC::XML::Server and
RPC::XML::Client that should no longer be needed, but I didn't do enough
tests on that to be sure, and it shouldn't do any harm anyway.
--
Knut Arne Bjørndal, Easy Connect AS
bobkare@cpan.org, knut.arne.bjorndal@easyconnect.no
diff --git a/lib/RPC/XML.pm b/lib/RPC/XML.pm
index b2eda88..874041e 100644
--- a/lib/RPC/XML.pm
+++ b/lib/RPC/XML.pm
@@ -30,6 +30,7 @@ use vars qw(@EXPORT_OK %EXPORT_TAGS $VERSION $ERROR
use subs qw(time2iso8601 smart_encode utf8_downgrade);
use base 'Exporter';
+use Encode;
use Scalar::Util qw(blessed reftype);
## no critic (ProhibitSubroutinePrototypes)
@@ -385,7 +386,7 @@ sub as_string
substr $class, 0, 8, 'dateTime';
}
- return "<$class>$$self</$class>";
+ return Encode::encode($RPC::XML::ENCODING, "<$class>$$self</$class>", Encode::FB_CROAK);
}
# Serialization for simple types is just a matter of sending as_string over
@@ -394,7 +395,6 @@ sub serialize
my ($self, $fh) = @_;
my $str = $self->as_string;
- RPC::XML::utf8_downgrade($str);
print {$fh} $str;
return;
@@ -406,7 +406,7 @@ sub length ## no critic (ProhibitBuiltinHomonyms)
{
my $self = shift;
- RPC::XML::utf8_downgrade(my $str = $self->as_string);
+ my $str = $self->as_string;
return length $str;
}
@@ -502,6 +502,8 @@ sub as_string
($value = defined ${$self} ? ${$self} : q{} )
=~ s/$RPC::XML::XMLRE/$RPC::XML::XMLMAP{$1}/ge;
+ $value = Encode::encode($RPC::XML::ENCODING, $value, Encode::FB_CROAK);
+
return "<$class>$value</$class>";
}
@@ -799,6 +801,7 @@ sub as_string
for (keys %{$self})
{
($key = $_) =~ s/$RPC::XML::XMLRE/$RPC::XML::XMLMAP{$1}/ge;
+ $key = Encode::encode($RPC::XML::ENCODING, $key, Encode::FB_CROAK);
$clean{$key} = $self->{$_}->as_string;
}
@@ -823,7 +826,7 @@ sub serialize
for (keys %{$self})
{
($key = $_) =~ s/$RPC::XML::XMLRE/$RPC::XML::XMLMAP{$1}/ge;
- RPC::XML::utf8_downgrade($key);
+ $key = Encode::encode($RPC::XML::ENCODING, $key, Encode::FB_CROAK);
print {$fh} "<member><name>$key</name><value>";
$self->{$_}->serialize($fh);
print {$fh} '</value></member>';
@@ -843,7 +846,7 @@ sub length ## no critic (ProhibitBuiltinHomonyms)
{
$len += 45; # For all the constant XML presence
$len += $self->{$key}->length;
- RPC::XML::utf8_downgrade($key);
+ $key = Encode::encode($RPC::XML::ENCODING, $key, Encode::FB_CROAK);
$len += length $key;
}
@@ -1357,7 +1360,7 @@ sub serialize
{
my ($self, $fh) = @_;
my $name = $self->{name};
- RPC::XML::utf8_downgrade($name);
+ $name = Encode::encode($RPC::XML::ENCODING, $name, Encode::FB_CROAK);
print {$fh} qq(<?xml version="1.0" encoding="$RPC::XML::ENCODING"?>);
diff --git a/t/utf8.t b/t/utf8.t
index e69de29..27bcd34 100755
--- a/t/utf8.t
+++ b/t/utf8.t
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+# Test UTF-8 encoding in RPC::XML
+
+use Test::More tests => 2;
+use Encode;
+
+use RPC::XML;
+$RPC::XML::ENCODING = 'UTF-8';
+
+# \x{2603}\x{2602} is a snowman with umbrella
+my $data_ref = RPC::XML::struct->new(
+ simplekey => "\x{2603}\x{2602}",
+ "\x{2603}\x{2602}" => 'simplevalue',
+ "\x{2603}\x{2602}\x{2603}\x{2602}" => "\x{2603}\x{2602}\x{2603}\x{2602}",
+);
+
+my $encoded = $data_ref->as_string;
+
+is($data_ref->length, 259, 'Length is correct');
+ok(Encode::decode('UTF-8', $encoded, Encode::FB_CROAK), 'decode from UTF-8 works');