CC: | paulm [...] paulm.com |
Subject: | DBD::mysql incorrectly handles Unicode chars with a codepoint > ffff |
DBD::mysql's 'mysql_enable_utf8' flag works fine for characters with a
codepoint of 'ffff' or below - above that, it's failing to either encode
or decode them properly. Attached script demonstrates.
Subject: | test.pl |
#!/usr/bin/perl
use strict;
use warnings;
use DBI;
my @labels = (
# On the right side of the ffff/10000 border
["\x{61}", '[97/61]' ], # LATIN SMALL LETTER A
["\x{2012}", '[8210/2012]' ], # FIGURE DASH
["\x{ffee}", '[65518/ffee]' ], # HALFWIDTH WHITE CIRCLE
# On the wrong side of the ffff/10000 border
["\x{10000}", '[65536/10000]'], # LINEAR B SYLLABLE B008 A
["\x{10412}", '[66578/10412]'], # DESERET CAPITAL LETTER BEE
);
my $db = DBI->connect(
'DBI:mysql:database=idl_test', 'root', '',
{ mysql_enable_utf8 => 1 }
);
for ( @labels ) {
my ( $label, $explanation ) = @$_;
# Insert our label
$db->do("DELETE FROM testtest");
$db->prepare("
INSERT INTO testtest VALUES (?)
")->execute( $label );
# Retrieve our label
my $retrieve_sql = $db->prepare("SELECT * FROM testtest");
$retrieve_sql->execute();
my $result = $retrieve_sql->fetchrow_arrayref->[0];
# Was it right?
if ( $result eq $label ) {
print "Result and label are equal: " . explode( $label ) . "\n";
} else {
print "Result and label are NOT equal:\n";
print "Label was: " . explode( $label ) . "\n";
print "Result was: " . explode( $result ) . "\n";
}
print '-' x 75 . "\n";
}
sub explode {
my $string = shift;
my @chars = split(//, $string);
my $explain;
for my $char ( @chars ) {
my $ord = ord($char);
my $hex = sprintf("%x", $ord);
$explain .= "[$ord/$hex]";
}
return $explain;
}
__DATA__
CREATE TABLE testtest (
label varchar(255)
) charset=utf8;