Subject: | UTF-8 double encoding with already encoded text |
If some fields already contain multi-byte characters those will be
double utf-8 encoded in Solr.pm.
My suggestion is to encode the value when creating the Field objects,
but only if the value is not already encoded. See patch.
All tests pass on my computer (Ubuntu Jaunty).
Subject: | webservice-solr-utf8.patch |
diff --git a/lib/WebService/Solr.pm b/lib/WebService/Solr.pm
index 3fac899..14f1af1 100644
--- a/lib/WebService/Solr.pm
+++ b/lib/WebService/Solr.pm
@@ -2,7 +2,6 @@ package WebService::Solr;
use Moose;
-use Encode qw(encode);
use URI;
use LWP::UserAgent;
use WebService::Solr::Response;
@@ -150,7 +149,7 @@ sub _send_update {
my $req = HTTP::Request->new(
POST => $url,
HTTP::Headers->new( Content_Type => 'text/xml; charset=utf-8' ),
- '<?xml version="1.0" encoding="UTF-8"?>' . encode('utf8', $xml)
+ '<?xml version="1.0" encoding="UTF-8"?>' . $xml
);
my $http_response = $self->agent->request($req);
diff --git a/lib/WebService/Solr/Field.pm b/lib/WebService/Solr/Field.pm
index a851e33..5fa6409 100644
--- a/lib/WebService/Solr/Field.pm
+++ b/lib/WebService/Solr/Field.pm
@@ -14,6 +14,9 @@ sub BUILDARGS {
my ( $self, $name, $value, $opts ) = @_;
$opts ||= {};
+ utf8::encode($value)
+ unless utf8::is_utf8($value) || !defined $value;
+
return { name => $name, value => $value, %$opts };
}