A patch for Text::Unaccent 1.07 (
http://www.senga.org/unac/) to safely handle
non-string scalars - e.g. undef, references, etc.
It's not as "complete" as maybe it could be - it doesn't "stringize"
overloaded scalars, for example - but at least it stops the lockups and core
dumps.
diff -aur Text-Unaccent-1.07/Unaccent.xs Text-Unaccent-1.07-patched/Unaccent.xs
--- Text-Unaccent-1.07/Unaccent.xs 2002-09-02 15:16:06.000000000 +0100
+++ Text-Unaccent-1.07-patched/Unaccent.xs 2004-03-29 19:35:40.000000000 +0100
@@ -65,7 +65,7 @@
PROTOTYPE: $$
CODE:
STRLEN in_length;
- in_length = SvCUR(ST(1));
+ in_length = (SvPOK(ST(1)) ? SvCUR(ST(1)) : 0);
if(unac_string(charset,
in, in_length,
&buffer, &buffer_length) == 0) {
@@ -83,7 +83,7 @@
PROTOTYPE: $
CODE:
STRLEN in_length;
- in_length = SvCUR(ST(0));
+ in_length = (SvPOK(ST(1)) ? SvCUR(ST(1)) : 0);
if(unac_string_utf16(in, in_length,
&buffer, &buffer_length) == 0) {
RETVAL = newSVpv(buffer, buffer_length);
diff -aur Text-Unaccent-1.07/t/unac.t Text-Unaccent-1.07-patched/t/unac.t
--- Text-Unaccent-1.07/t/unac.t 2002-09-02 15:16:06.000000000 +0100
+++ Text-Unaccent-1.07-patched/t/unac.t 2004-03-29 19:36:04.000000000 +0100
@@ -19,7 +19,7 @@
use Text::Unaccent;
-plan test => 4;
+plan test => 8;
ok(unac_string("ISO-8859-1", "��), "ete", "removing accents from ��(1)");
ok(unac_string("ISO-8859-1", "��), "ete", "removing accents from ��(2)");
@@ -30,6 +30,11 @@
#
ok(unac_debug($Text::Unaccent::DEBUG_HIGH), undef, "setting debug level");
+ok(unac_string("UTF-8", $a="abc"), "abc", "SvROK test (string)");
+ok(unac_string("UTF-8", $a=[]), "", "SvROK test (ref)");
+ok(unac_string("UTF-8", $a="abc"), "abc", "SvROK test (string)");
+ok(unac_string("UTF-8", $a=undef), "", "SvROK test (undef)");
+
# Local Variables: ***
# mode: perl ***
# End: ***