Subject: | Check for utf-8 in the Pod::POM::parse_text method |
Added the check for the input data to the 'parse_text' method, we don't decode data which is already in utf8.
If method got data with _utf8_on flag, it fails:
"Cannot decode string with wide characters at <skkiped>/lib/5.20.1/darwin-thread-multi-2level/Encode.pm line 195."
Here is the example of such issue: https://rt.cpan.org/Ticket/Display.html?id=91351.
Subject: | 0001-Added-the-check-for-the-input-data-to-the-parse_text.patch |
From 2f9c611e174071f9d53cec78764ceef2b8b2a497 Mon Sep 17 00:00:00 2001
From: Artem Krivopolenov <akrivopolenov@gmail.com>
Date: Fri, 29 May 2015 18:33:27 +0300
Subject: [PATCH 2/2] Added the check for the input data to the 'parse_text'
method, we don't decode data which is already in utf8.
Here is the example of such issue: https://rt.cpan.org/Ticket/Display.html?id=91351.
---
lib/Pod/POM.pm | 4 +++-
t/htmlview.t | 10 +++++++++-
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/lib/Pod/POM.pm b/lib/Pod/POM.pm
index 3dc3bda..67c8cca 100644
--- a/lib/Pod/POM.pm
+++ b/lib/Pod/POM.pm
@@ -180,7 +180,9 @@ sub parse_text {
my($encline,$chunk) = splice @encchunks, 0, 2;
require Encode;
my($encoding) = $encline =~ /^=encoding\s+(\S+)/;
- Encode::from_to($chunk, $encoding, "utf8");
+ if ($encoding ne 'utf8' || !Encode::is_utf8($chunk)) {
+ Encode::from_to($chunk, $encoding, "utf8");
+ }
Encode::_utf8_on($chunk);
# $text .= "xxx$encline";
$text .= $chunk;
diff --git a/t/htmlview.t b/t/htmlview.t
index 5a200a4..c586757 100644
--- a/t/htmlview.t
+++ b/t/htmlview.t
@@ -2,6 +2,9 @@
use strict;
use lib qw( ./lib ../lib );
+use utf8;
+
+use Encode;
use Pod::POM;
use Pod::POM::View::HTML;
use Pod::POM::Test;
@@ -13,6 +16,7 @@ $Pod::POM::DEFAULT_VIEW = 'Pod::POM::View::HTML';
my $text;
{ local $/ = undef;
$text = <DATA>;
+ Encode::_utf8_on($text);
}
my ($test, $expect) = split(/\s*-------+\s*/, $text);
@@ -52,6 +56,8 @@ sub diff {
#print $pom;
__DATA__
+=encoding utf8
+
=head1 NAME
Test
@@ -67,6 +73,7 @@ Test
This is the description.
Here is a verbatim section.
+ And we ⥠utf-8 here.
This is some more regular text.
@@ -213,7 +220,8 @@ as well.
<h1>DESCRIPTION</h1>
<p>This is the description.</p>
-<pre> Here is a verbatim section.</pre>
+<pre> Here is a verbatim section.
+ And we ♥ utf-8 here.</pre>
<p>This is some more regular text.</p>
<p>Here is some <b>bold</b> text, some <i>italic</i> and something that looks
--
2.4.2