Skip Menu |

Preferred bug tracker

Please visit the preferred bug tracker to report your issue.

This queue is for tickets about the YAML-LibYAML CPAN distribution.

Report information
The Basics
Id: 36734
Status: resolved
Priority: 0/
Queue: YAML-LibYAML

People
Owner: Nobody in particular
Requestors: jeffrey.klein [...] priorityhealth.com
Cc:
AdminCc:

Bug Information
Severity: Important
Broken in:
  • 0.26
  • 0.27
Fixed in: (no value)



Subject: YAML::XS dies on high-bit non-utf8 strings
YAML::XS::Dump will die or abort if given a character between \x80 and \xFF, with out the perl utf8 flag set. Tested in perl v5.8.7 and v5.10.0, YAML::XS v.026 and v0.27 #--- file use YAML::XS; my $a = "\x80"; my $b = $a; utf8::upgrade($a); print "a = b\n" if $a eq $b; eval { print Dump($a) } or warn "dump a: $@"; eval { print Dump($b) } or warn "dump b: $@"; eval { print Dump({a => $a}) } or warn "dump {a}: $@"; eval { print Dump({b => $b}) } or warn "dump {b}: $@"; __END__ #---output perl -I 'blib/lib' -I 'blib/arch' yaml.t a = b --- "\x80" dump b: YAML::XS Error: Emit scalar '', error: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS --- a: "\x80" *** glibc detected *** double free or corruption (fasttop): 0x08be2858 *** Aborted
The attached diff fixes the above issue, but causes the Dump() tests in t/utf8.t to fail.
--- perl_libyaml.c.bak 2008-06-13 17:07:42.000000000 -0400 +++ perl_libyaml.c 2008-06-13 17:07:59.000000000 -0400 @@ -797,7 +797,7 @@ style = YAML_PLAIN_SCALAR_STYLE; } else { - string = SvPV(node, string_len); + string = SvPVutf8(node, string_len); if ( (strlen(string) == 0) || strEQ(string, "~") ||
This is a more thorough patch. Tests are included.
diff -ur YAML-LibYAML-0.27/LibYAML/LibYAML.xs YAML-LibYAML-utf8/LibYAML/LibYAML.xs --- YAML-LibYAML-0.27/LibYAML/LibYAML.xs 2007-06-25 03:11:42.000000000 -0400 +++ YAML-LibYAML-utf8/LibYAML/LibYAML.xs 2008-06-20 13:08:46.000000000 -0400 @@ -9,11 +9,11 @@ PROTOTYPES: DISABLE void -Load (yaml_str) - char *yaml_str +Load (yaml_sv) + SV *yaml_sv PPCODE: PL_markstack_ptr++; - Load(yaml_str); + Load(yaml_sv); return; void @@ -23,3 +23,4 @@ PL_markstack_ptr++; Dump(dummy); return; + diff -ur YAML-LibYAML-0.27/LibYAML/perl_libyaml.c YAML-LibYAML-utf8/LibYAML/perl_libyaml.c --- YAML-LibYAML-0.27/LibYAML/perl_libyaml.c 2008-06-07 01:30:30.000000000 -0400 +++ YAML-LibYAML-utf8/LibYAML/perl_libyaml.c 2008-06-20 13:08:46.000000000 -0400 @@ -116,11 +116,20 @@ * It takes a yaml stream and turns it into 0 or more Perl objects. */ void -Load(char *yaml_str) +Load(SV *yaml_sv) { dXSARGS; perl_yaml_loader_t loader; SV *node; + char *yaml_str; + STRLEN yaml_len; + + /* If UTF8, make copy and downgrade */ + if (SvPV_nolen(yaml_sv) && SvUTF8(yaml_sv)) { + yaml_sv = sv_mortalcopy(yaml_sv); + } + yaml_str = SvPVbyte(yaml_sv, yaml_len); + sp = mark; if (0 && (items || ax)) {} /* XXX Quiet the -Wall warnings for now. */ @@ -129,7 +138,7 @@ yaml_parser_set_input_string( &loader.parser, (unsigned char *)yaml_str, - strlen((char *)yaml_str) + yaml_len ); /* Get the first event. Must be a STREAM_START */ @@ -250,8 +259,8 @@ while ((key_node = load_node(loader))) { assert(SvPOK(key_node)); value_node = load_node(loader); - hv_store( - hash, SvPV_nolen(key_node), sv_len(key_node), value_node, 0 + hv_store_ent( + hash, key_node, value_node, 0 ); } @@ -329,7 +338,9 @@ ! strnEQ(tag, prefix, strlen(prefix)) ) croak(ERRMSG "bad tag found for scalar: '%s'", tag); class = tag + strlen(prefix); - return sv_setref_pvn(newSV(0), class, string, strlen(string)); + scalar = sv_setref_pvn(newSV(0), class, string, strlen(string)); + SvUTF8_on(scalar); + return scalar; } if (loader->event.data.scalar.style == YAML_PLAIN_SCALAR_STYLE) { @@ -343,6 +354,7 @@ return &PL_sv_no; } scalar = newSVpvn(string, length); + SvUTF8_on(scalar); if (anchor) hv_store(loader->anchors, anchor, strlen(anchor), SvREFCNT_inc(scalar), 0); return scalar; @@ -363,6 +375,7 @@ char *prefix = TAG_PERL_PREFIX "regexp:"; SV *regexp = newSVpvn(string, length); + SvUTF8_on(regexp); ENTER; SAVETMPS; @@ -467,7 +480,7 @@ yaml_emitter_set_output( &dumper.emitter, &append_output, - yaml + (void *) yaml ); yaml_stream_start_event_initialize( &event_stream_start, @@ -494,6 +507,7 @@ /* Put the YAML stream scalar on the XS output stack */ if (yaml) { + SvUTF8_off(yaml); sv_2mortal(yaml); XPUSHs(yaml); } @@ -799,7 +813,7 @@ else { string = SvPV(node, string_len); if ( - (strlen(string) == 0) || + (string_len == 0) || strEQ(string, "~") || strEQ(string, "true") || strEQ(string, "false") || @@ -808,6 +822,13 @@ ) { style = YAML_SINGLE_QUOTED_SCALAR_STYLE; } + if (!SvUTF8(node)) { + /* copy to new SV and promote to utf8 */ + SV *utf8sv = sv_mortalcopy(node); + + /* get string and length out of utf8 */ + string = SvPVutf8(utf8sv, string_len); + } } yaml_scalar_event_initialize( &event_scalar, @@ -913,9 +934,9 @@ } int -append_output(SV *yaml, unsigned char *buffer, unsigned int size) +append_output(void *yaml, unsigned char *buffer, unsigned int size) { - sv_catpvn(yaml, (const char *)buffer, (STRLEN)size); + sv_catpvn((SV *)yaml, (const char *)buffer, (STRLEN)size); return 1; } diff -ur YAML-LibYAML-0.27/LibYAML/perl_libyaml.h YAML-LibYAML-utf8/LibYAML/perl_libyaml.h --- YAML-LibYAML-0.27/LibYAML/perl_libyaml.h 2008-06-07 01:30:05.000000000 -0400 +++ YAML-LibYAML-utf8/LibYAML/perl_libyaml.h 2008-06-20 13:08:46.000000000 -0400 @@ -55,7 +55,7 @@ Dump(SV *, ...); void -Load(char *); +Load(SV *); SV * load_node(perl_yaml_loader_t *); @@ -118,5 +118,5 @@ int -append_output(SV *, unsigned char *, unsigned int size); +append_output(void *, unsigned char *, unsigned int size); diff -ur YAML-LibYAML-0.27/t/file.t YAML-LibYAML-utf8/t/file.t --- YAML-LibYAML-0.27/t/file.t 2007-06-21 18:18:00.000000000 -0400 +++ YAML-LibYAML-utf8/t/file.t 2008-06-20 13:08:46.000000000 -0400 @@ -1,4 +1,4 @@ -use t::TestYAML tests => 6; +use t::TestYAML tests => 7; use YAML::XS qw'LoadFile'; @@ -21,3 +21,11 @@ my ($t1_, $t2_) = LoadFile($test_file); is_deeply [$t1_, $t2_], [$t1, $t2], 'File roundtrip ok'; + +my $t3 = {"foo\x{123}" => [1..4]}; +my $t4 = "howdy ho \x{5050}"; +YAML::XS::DumpFile($test_file, $t3, $t4); + +my ($t3_, $t4_) = LoadFile($test_file); + +is_deeply [$t3_, $t4_], [$t3, $t4], 'Unicode roundtrip ok'; diff -ur YAML-LibYAML-0.27/t/regexp.t YAML-LibYAML-utf8/t/regexp.t --- YAML-LibYAML-0.27/t/regexp.t 2007-06-22 21:12:32.000000000 -0400 +++ YAML-LibYAML-utf8/t/regexp.t 2008-06-20 13:08:46.000000000 -0400 @@ -1,4 +1,4 @@ -use t::TestYAMLTests tests => 15; +use t::TestYAMLTests tests => 18; use Devel::Peek(); my $rx1 = qr/5050/; @@ -30,6 +30,15 @@ --- !!perl/regexp:Bossy (?mi-xs:^edcba) ... +my $unicode = "\x{100}"; +my $rx5 = qr/\Q$unicode\E/; +my $yaml5 = Dump $rx5; + +is $yaml5, <<"...", 'Unicode regexp dumps'; +--- !!perl/regexp (?-xism:\xC4\x80) +... + + my $rx1_ = Load($yaml1); is ref($rx1_), 'Regexp', 'Can Load a regular regexp'; is $rx1_, '(?-xism:5050)', 'Loaded regexp value is correct'; @@ -48,3 +57,7 @@ my $rx4_ = Load("--- !!perl/regexp (?msix:123)\n"); is ref($rx4_), 'Regexp', 'Can Load a regexp with all flags'; is $rx4_, '(?msix:123)', 'Loaded regexp with all flags value is correct'; + +my $rx5_ = Load("--- !!perl/regexp (?msix:\xC4\x80)\n"); +is ref($rx5_), 'Regexp', 'Can Load a unicode regexp'; +is $rx5_, "(?msix:\x{100})", 'Loaded unicode regexp value is correct'; diff -ur YAML-LibYAML-0.27/t/utf8.t YAML-LibYAML-utf8/t/utf8.t --- YAML-LibYAML-0.27/t/utf8.t 2007-06-24 13:26:44.000000000 -0400 +++ YAML-LibYAML-utf8/t/utf8.t 2008-06-20 13:08:46.000000000 -0400 @@ -1,4 +1,10 @@ -use t::TestYAMLTests tests => 4; +use t::TestYAMLTests tests => 8; +use utf8; + +is Dump("\x{100}"), "--- \xC4\x80\n", 'Dumping wide char works'; +is Load("--- \xC4\x80\n"), "\x{100}", 'Loading UTF-8 works'; +is Load("\xFE\xFF\0-\0-\0-\0 \x01\x00\0\n"), "\x{100}", 'Loading UTF-16BE works'; +is Load("\xFF\xFE-\0-\0-\0 \0\x00\x01\n\0"), "\x{100}", 'Loading UTF-16LE works'; my $hash = { '店名' => 'OpCafé', @@ -24,6 +30,8 @@ ' ... +utf8::encode($yaml); + is Dump($hash), $yaml, 'Dumping Chinese hash works'; is_deeply Load($yaml), $hash, 'Loading Chinese hash works'; @@ -53,5 +61,7 @@ 名前: Plagger ... +utf8::encode($yaml2); + is Dump($hash2), $yaml2, 'Dumping Japanese hash works'; is_deeply Load($yaml2), $hash2, 'Loading Japanese hash works';
thorough patch applied in YAML-LibYAML-0.28. Thanks!