If you insert a string into a BLOB column and that string has the utf-8 flag set, the data will be
UTF-8 encoded when going into the database. This happens even if binding the column as
SQL_BLOB. I've attached a patch to add a failing test to the unicode test.
As a workaround in my code I'm calling utf8::downgrade() on my blob string before inserting.
Subject: | sqlite-utf8-flag-failure.patch |
Index: t/12_unicode.t
===================================================================
--- t/12_unicode.t (revision 11260)
+++ t/12_unicode.t (working copy)
@@ -13,7 +13,7 @@
use Test::More;
BEGIN {
if ( $] >= 5.008005 ) {
- plan( tests => 19 );
+ plan( tests => 26 );
} else {
plan( skip_all => 'Unicode is not supported before 5.8.5' );
}
@@ -105,6 +105,23 @@
warn "($lengths->[0]->[0] != $lengths->[0]->[1])";
}
+# Test that passing a string with the utf-8 flag on is handled properly in a BLOB field
+SCOPE: {
+ my $dbh = connect_ok( dbfile => 'foo' );
+
+ ok( utf8::upgrade($bytestring), 'bytestring upgraded to utf-8' );
+ ok( utf8::is_utf8($bytestring), 'bytestring has utf-8 flag' );
+
+ ($textback, $bytesback) = database_roundtrip($dbh, $utfstring, $bytestring);
+ ok( $bytesback eq $bytestring, 'No blob corruption with utf-8 flag on' );
+
+ ok( utf8::downgrade($bytestring), 'bytestring downgraded to bytes' );
+ ok( !utf8::is_utf8($bytestring), 'bytestring does not have utf-8 flag' );
+
+ ($textback, $bytesback) = database_roundtrip($dbh, $utfstring, $bytestring);
+ ok( $bytesback eq $bytestring, 'No blob corruption with utf-8 flag off' );
+}
+
sub database_roundtrip {
my ($dbh, $ain, $bin) = @_;
$dbh->do("DELETE FROM table1");