diff -ur YAML-LibYAML-0.27/LibYAML/LibYAML.xs YAML-LibYAML-utf8/LibYAML/LibYAML.xs
--- YAML-LibYAML-0.27/LibYAML/LibYAML.xs 2007-06-25 03:11:42.000000000 -0400
+++ YAML-LibYAML-utf8/LibYAML/LibYAML.xs 2008-06-20 13:08:46.000000000 -0400
@@ -9,11 +9,11 @@
PROTOTYPES: DISABLE
void
-Load (yaml_str)
- char *yaml_str
+Load (yaml_sv)
+ SV *yaml_sv
PPCODE:
PL_markstack_ptr++;
- Load(yaml_str);
+ Load(yaml_sv);
return;
void
@@ -23,3 +23,4 @@
PL_markstack_ptr++;
Dump(dummy);
return;
+
diff -ur YAML-LibYAML-0.27/LibYAML/perl_libyaml.c YAML-LibYAML-utf8/LibYAML/perl_libyaml.c
--- YAML-LibYAML-0.27/LibYAML/perl_libyaml.c 2008-06-07 01:30:30.000000000 -0400
+++ YAML-LibYAML-utf8/LibYAML/perl_libyaml.c 2008-06-20 13:08:46.000000000 -0400
@@ -116,11 +116,20 @@
* It takes a yaml stream and turns it into 0 or more Perl objects.
*/
void
-Load(char *yaml_str)
+Load(SV *yaml_sv)
{
dXSARGS;
perl_yaml_loader_t loader;
SV *node;
+ char *yaml_str;
+ STRLEN yaml_len;
+
+ /* If UTF8, make copy and downgrade */
+ if (SvPV_nolen(yaml_sv) && SvUTF8(yaml_sv)) {
+ yaml_sv = sv_mortalcopy(yaml_sv);
+ }
+ yaml_str = SvPVbyte(yaml_sv, yaml_len);
+
sp = mark;
if (0 && (items || ax)) {} /* XXX Quiet the -Wall warnings for now. */
@@ -129,7 +138,7 @@
yaml_parser_set_input_string(
&loader.parser,
(unsigned char *)yaml_str,
- strlen((char *)yaml_str)
+ yaml_len
);
/* Get the first event. Must be a STREAM_START */
@@ -250,8 +259,8 @@
while ((key_node = load_node(loader))) {
assert(SvPOK(key_node));
value_node = load_node(loader);
- hv_store(
- hash, SvPV_nolen(key_node), sv_len(key_node), value_node, 0
+ hv_store_ent(
+ hash, key_node, value_node, 0
);
}
@@ -329,7 +338,9 @@
! strnEQ(tag, prefix, strlen(prefix))
) croak(ERRMSG "bad tag found for scalar: '%s'", tag);
class = tag + strlen(prefix);
- return sv_setref_pvn(newSV(0), class, string, strlen(string));
+ scalar = sv_setref_pvn(newSV(0), class, string, strlen(string));
+ SvUTF8_on(scalar);
+ return scalar;
}
if (loader->event.data.scalar.style == YAML_PLAIN_SCALAR_STYLE) {
@@ -343,6 +354,7 @@
return &PL_sv_no;
}
scalar = newSVpvn(string, length);
+ SvUTF8_on(scalar);
if (anchor)
hv_store(loader->anchors, anchor, strlen(anchor), SvREFCNT_inc(scalar), 0);
return scalar;
@@ -363,6 +375,7 @@
char *prefix = TAG_PERL_PREFIX "regexp:";
SV *regexp = newSVpvn(string, length);
+ SvUTF8_on(regexp);
ENTER;
SAVETMPS;
@@ -467,7 +480,7 @@
yaml_emitter_set_output(
&dumper.emitter,
&append_output,
- yaml
+ (void *) yaml
);
yaml_stream_start_event_initialize(
&event_stream_start,
@@ -494,6 +507,7 @@
/* Put the YAML stream scalar on the XS output stack */
if (yaml) {
+ SvUTF8_off(yaml);
sv_2mortal(yaml);
XPUSHs(yaml);
}
@@ -799,7 +813,7 @@
else {
string = SvPV(node, string_len);
if (
- (strlen(string) == 0) ||
+ (string_len == 0) ||
strEQ(string, "~") ||
strEQ(string, "true") ||
strEQ(string, "false") ||
@@ -808,6 +822,13 @@
) {
style = YAML_SINGLE_QUOTED_SCALAR_STYLE;
}
+ if (!SvUTF8(node)) {
+ /* copy to new SV and promote to utf8 */
+ SV *utf8sv = sv_mortalcopy(node);
+
+ /* get string and length out of utf8 */
+ string = SvPVutf8(utf8sv, string_len);
+ }
}
yaml_scalar_event_initialize(
&event_scalar,
@@ -913,9 +934,9 @@
}
int
-append_output(SV *yaml, unsigned char *buffer, unsigned int size)
+append_output(void *yaml, unsigned char *buffer, unsigned int size)
{
- sv_catpvn(yaml, (const char *)buffer, (STRLEN)size);
+ sv_catpvn((SV *)yaml, (const char *)buffer, (STRLEN)size);
return 1;
}
diff -ur YAML-LibYAML-0.27/LibYAML/perl_libyaml.h YAML-LibYAML-utf8/LibYAML/perl_libyaml.h
--- YAML-LibYAML-0.27/LibYAML/perl_libyaml.h 2008-06-07 01:30:05.000000000 -0400
+++ YAML-LibYAML-utf8/LibYAML/perl_libyaml.h 2008-06-20 13:08:46.000000000 -0400
@@ -55,7 +55,7 @@
Dump(SV *, ...);
void
-Load(char *);
+Load(SV *);
SV *
load_node(perl_yaml_loader_t *);
@@ -118,5 +118,5 @@
int
-append_output(SV *, unsigned char *, unsigned int size);
+append_output(void *, unsigned char *, unsigned int size);
diff -ur YAML-LibYAML-0.27/t/file.t YAML-LibYAML-utf8/t/file.t
--- YAML-LibYAML-0.27/t/file.t 2007-06-21 18:18:00.000000000 -0400
+++ YAML-LibYAML-utf8/t/file.t 2008-06-20 13:08:46.000000000 -0400
@@ -1,4 +1,4 @@
-use t::TestYAML tests => 6;
+use t::TestYAML tests => 7;
use YAML::XS qw'LoadFile';
@@ -21,3 +21,11 @@
my ($t1_, $t2_) = LoadFile($test_file);
is_deeply [$t1_, $t2_], [$t1, $t2], 'File roundtrip ok';
+
+my $t3 = {"foo\x{123}" => [1..4]};
+my $t4 = "howdy ho \x{5050}";
+YAML::XS::DumpFile($test_file, $t3, $t4);
+
+my ($t3_, $t4_) = LoadFile($test_file);
+
+is_deeply [$t3_, $t4_], [$t3, $t4], 'Unicode roundtrip ok';
diff -ur YAML-LibYAML-0.27/t/regexp.t YAML-LibYAML-utf8/t/regexp.t
--- YAML-LibYAML-0.27/t/regexp.t 2007-06-22 21:12:32.000000000 -0400
+++ YAML-LibYAML-utf8/t/regexp.t 2008-06-20 13:08:46.000000000 -0400
@@ -1,4 +1,4 @@
-use t::TestYAMLTests tests => 15;
+use t::TestYAMLTests tests => 18;
use Devel::Peek();
my $rx1 = qr/5050/;
@@ -30,6 +30,15 @@
--- !!perl/regexp:Bossy (?mi-xs:^edcba)
...
+my $unicode = "\x{100}";
+my $rx5 = qr/\Q$unicode\E/;
+my $yaml5 = Dump $rx5;
+
+is $yaml5, <<"...", 'Unicode regexp dumps';
+--- !!perl/regexp (?-xism:\xC4\x80)
+...
+
+
my $rx1_ = Load($yaml1);
is ref($rx1_), 'Regexp', 'Can Load a regular regexp';
is $rx1_, '(?-xism:5050)', 'Loaded regexp value is correct';
@@ -48,3 +57,7 @@
my $rx4_ = Load("--- !!perl/regexp (?msix:123)\n");
is ref($rx4_), 'Regexp', 'Can Load a regexp with all flags';
is $rx4_, '(?msix:123)', 'Loaded regexp with all flags value is correct';
+
+my $rx5_ = Load("--- !!perl/regexp (?msix:\xC4\x80)\n");
+is ref($rx5_), 'Regexp', 'Can Load a unicode regexp';
+is $rx5_, "(?msix:\x{100})", 'Loaded unicode regexp value is correct';
diff -ur YAML-LibYAML-0.27/t/utf8.t YAML-LibYAML-utf8/t/utf8.t
--- YAML-LibYAML-0.27/t/utf8.t 2007-06-24 13:26:44.000000000 -0400
+++ YAML-LibYAML-utf8/t/utf8.t 2008-06-20 13:08:46.000000000 -0400
@@ -1,4 +1,10 @@
-use t::TestYAMLTests tests => 4;
+use t::TestYAMLTests tests => 8;
+use utf8;
+
+is Dump("\x{100}"), "--- \xC4\x80\n", 'Dumping wide char works';
+is Load("--- \xC4\x80\n"), "\x{100}", 'Loading UTF-8 works';
+is Load("\xFE\xFF\0-\0-\0-\0 \x01\x00\0\n"), "\x{100}", 'Loading UTF-16BE works';
+is Load("\xFF\xFE-\0-\0-\0 \0\x00\x01\n\0"), "\x{100}", 'Loading UTF-16LE works';
my $hash = {
'åºå' => 'OpCafé',
@@ -24,6 +30,8 @@
'
...
+utf8::encode($yaml);
+
is Dump($hash), $yaml, 'Dumping Chinese hash works';
is_deeply Load($yaml), $hash, 'Loading Chinese hash works';
@@ -53,5 +61,7 @@
åå: Plagger
...
+utf8::encode($yaml2);
+
is Dump($hash2), $yaml2, 'Dumping Japanese hash works';
is_deeply Load($yaml2), $hash2, 'Loading Japanese hash works';