commit da2490bb79e6813f4c3f0c1c7f628fcff552c3fd
Author: Gisle Aas <gisle@aas.no>
Date: Mon Nov 17 11:36:23 2008 +0100
Suppress "Parsing of undecoded UTF-8 will give garbage" warning with attr_encoded [RT#29089]
diff --git a/hparser.c b/hparser.c
index 58123c8..7e9e8c1 100644
--- a/hparser.c
+++ b/hparser.c
@@ -726,8 +726,12 @@ argspec_compile(SV* src, PSTATE* p_state)
p_state->skipped_text = newSVpvn("", 0);
}
}
- if (a == ARG_ATTR || a == ARG_ATTRARR || a == ARG_DTEXT) {
- p_state->argspec_entity_decode++;
+ if (a == ARG_ATTR || a == ARG_ATTRARR) {
+ if (p_state->argspec_entity_decode != ARG_DTEXT)
+ p_state->argspec_entity_decode = ARG_ATTR;
+ }
+ else if (a == ARG_DTEXT) {
+ p_state->argspec_entity_decode = ARG_DTEXT;
}
}
else {
@@ -1832,6 +1836,7 @@ parse(pTHX_
/* Print warnings if we find unexpected Unicode BOM forms */
#ifdef UNICODE_HTML_PARSER
if (p_state->argspec_entity_decode &&
+ !(p_state->attr_encoded && p_state->argspec_entity_decode == ARG_ATTR) &&
!p_state->utf8_mode && (
(!utf8 && len >= 3 && strnEQ(beg, "\xEF\xBB\xBF", 3)) ||
(utf8 && len >= 6 && strnEQ(beg, "\xC3\xAF\xC2\xBB\xC2\xBF", 6)) ||
diff --git a/hparser.h b/hparser.h
index 572696c..bf29ed2 100644
--- a/hparser.h
+++ b/hparser.h
@@ -113,7 +113,7 @@ struct p_state {
/* other configuration stuff */
SV* bool_attr_val;
struct p_handler handlers[EVENT_COUNT];
- bool argspec_entity_decode;
+ int argspec_entity_decode;
/* filters */
HV* report_tags;
diff --git a/t/unicode.t b/t/unicode.t
index 5363464..911c547 100644
--- a/t/unicode.t
+++ b/t/unicode.t
@@ -7,7 +7,7 @@ BEGIN {
plan skip_all => "This perl does not support Unicode" if $] < 5.008;
}
-plan tests => 103;
+plan tests => 105;
my @warn;
$SIG{__WARN__} = sub {
@@ -182,3 +182,17 @@ ok(HTML::Entities::_probably_utf8_chunk("f\xE2\x99\xA5o\xE2"));
ok(HTML::Entities::_probably_utf8_chunk("f\xE2\x99\xA5o\xE2\x99"));
ok(!HTML::Entities::_probably_utf8_chunk("f\xE2"));
ok(!HTML::Entities::_probably_utf8_chunk("f\xE2\x99"));
+
+$p = HTML::Parser->new(
+ api_version => 3,
+ default_h => [\@parsed, 'event, text, tag, attr'],
+ attr_encoded => 1,
+);
+
+@warn = ();
+@parsed = ();
+
+$p->parse($doc)->eof;
+
+ok(!@warn);
+is(@parsed, 9);