Subject: | Text::CSV_XS produces garbage on some data |
Hello
Recently I found, that some columns in my csv contains unreadable data.
It looks like "ÐкаÑеÑинбÑÑг". I attached minimal example to
reproduce this problem. I tested it on debian sid with perl 5.14.2 and
Text::CSV_XS 0.91. The strange thing is that if you'll set
`always_quote' to false, then all works fine. At the same time this test
works correctly with Text::CSV_PP independently of `always_quote' parameter.
Subject: | te.pl |
use strict;
use Text::CSV_XS;
my $csv = Text::CSV_XS->new({eol => "\n", always_quote => 0});
my $row = [
"\x{415}\x{43a}\x{430}\x{442}\x{435}\x{440}\x{438}\x{43d}\x{431}\x{443}\x{440}\x{433}",
"\x{410}\x{417}\x{421} \x{2116}303",
" \x{421}\x{432}\x{435}\x{440}\x{434}\x{43b}\x{43e}\x{432}\x{441}\x{43a}\x{430}\x{44f} \x{43e}\x{431}\x{43b}\x{430}\x{441}\x{442}\x{44c}, \x{433}. \x{415}\x{43a}\x{430}\x{442}\x{435}\x{440}\x{438}\x{43d}\x{431}\x{443}\x{440}\x{433}, \x{414}\x{443}\x{431}\x{43b}\x{435}\x{440} \x{421}\x{438}\x{431}\x{438}\x{440}\x{441}\x{43a}\x{43e}\x{433}\x{43e} \x{442}\x{440}\x{430}\x{43a}\x{442}\x{430}, 5 \x{43a}\x{43c} \x{43b}\x{435}\x{432}\x{430}\x{44f} \x{441}\x{442}\x{43e}\x{440}\x{43e}\x{43d}\x{430} ",
'',
'G-95',
'95',
'92',
'80',
'',
"\x{414}\x{422}",
'',
'',
"\x{41c}\x{430}\x{433}\x{430}\x{437}\x{438}\x{43d}",
"\x{41a}\x{430}\x{444}\x{435}",
"\x{422}\x{443}\x{430}\x{43b}\x{435}\x{442}",
'',
"\x{41a}\x{440}\x{443}\x{433}\x{43b}\x{43e}\x{441}\x{443}\x{442}\x{43e}\x{447}\x{43d}\x{430}\x{44f} \x{440}\x{430}\x{431}\x{43e}\x{442}\x{430}",
'',
"\x{41f}\x{440}\x{438}\x{43d}\x{438}\x{43c}\x{430}\x{44e}\x{442} \x{43a}\x{430}\x{440}\x{442}\x{44b} \x{ab}\x{41d}\x{430}\x{43c} \x{43f}\x{43e} \x{43f}\x{443}\x{442}\x{438}\x{bb}",
'',
'',
"\x{422}\x{435}\x{440}\x{43c}\x{438}\x{43d}\x{430}\x{43b} \x{43e}\x{43f}\x{43b}\x{430}\x{442}\x{44b}",
'',
"\x{41e}\x{43f}\x{43b}\x{430}\x{442}\x{430} \x{43a}\x{430}\x{440}\x{442}\x{43e}\x{439} \x{413}\x{41f}\x{41d}",
"\x{41e}\x{43f}\x{43b}\x{430}\x{442}\x{430} \x{43a}\x{430}\x{440}\x{442}\x{430}\x{43c}\x{438} \"MasterCard\"",
"\x{41e}\x{43f}\x{43b}\x{430}\x{442}\x{430} \x{43a}\x{430}\x{440}\x{442}\x{430}\x{43c}\x{438} \"Visa\"",
"\x{41e}\x{43f}\x{43b}\x{430}\x{442}\x{430} \x{43a}\x{430}\x{440}\x{442}\x{430}\x{43c}\x{438} \"Unioncard\"",
'',
"\x{420}\x{443}\x{447}\x{43d}\x{430}\x{44f} \x{43c}\x{43e}\x{439}\x{43a}\x{430}",
'',
"\x{41f}\x{43e}\x{434}\x{43a}\x{430}\x{447}\x{43a}\x{430} \x{448}\x{438}\x{43d}",
'',
"\x{417}\x{430}\x{43c}\x{435}\x{43d}\x{430} \x{43c}\x{430}\x{441}\x{43b}\x{430}",
'',
'',
'',
'',
'207',
"\x{435}\x{43a}\x{430}\x{442}\x{435}\x{440}\x{438}\x{43d}\x{431}\x{443}\x{440}\x{433}",
0,
1,
"\x{410}\x{417}\x{421} \x{413}\x{430}\x{437}\x{43f}\x{440}\x{43e}\x{43c}\x{43d}\x{435}\x{444}\x{442}\x{44c}-\x{423}\x{440}\x{430}\x{43b}, \x{41a}\x{438}\x{440}\x{43e}\x{432}\x{441}\x{43a}\x{438}\x{439} \x{440}\x{430}\x{439}\x{43e}\x{43d}, \x{2116}303",
"\x{415}\x{43a}\x{430}\x{442}\x{435}\x{440}\x{438}\x{43d}\x{431}\x{443}\x{440}\x{433}",
"\x{421}\x{438}\x{431}\x{438}\x{440}\x{441}\x{43a}\x{438}\x{439} \x{442}\x{440}\x{430}\x{43a}\x{442} \x{434}\x{443}\x{431}\x{43b}\x{435}\x{440} 5 \x{43a}\x{43c}, 1",
'1267165676583540',
"\x{410}\x{417}\x{421} \x{413}\x{430}\x{437}\x{43f}\x{440}\x{43e}\x{43c}\x{43d}\x{435}\x{444}\x{442}\x{44c}-\x{423}\x{440}\x{430}\x{43b}, \x{41a}\x{438}\x{440}\x{43e}\x{432}\x{441}\x{43a}\x{438}\x{439} \x{440}\x{430}\x{439}\x{43e}\x{43d}, \x{2116}303",
"\x{415}\x{43a}\x{430}\x{442}\x{435}\x{440}\x{438}\x{43d}\x{431}\x{443}\x{440}\x{433}"
];
binmode STDOUT, ':utf8';
$csv->print(\*STDOUT, $row);