Subject: | PATCH: Add Unicode support for DBD::Sybase |
The attached patch adds a syb_enable_utf8 attribute that adds Unicode
support for DBD::Sybase.
Subject: | dbd-sybase-utf8.patch |
--- a/Sybase.h Fri Sep 10 09:54:33 2010 -0500
+++ b/Sybase.h Fri Sep 17 10:17:13 2010 -0500
@@ -67,6 +67,12 @@
#endif
#endif
+#if defined(CS_UNICHAR_TYPE)
+#if defined (is_utf8_string)
+#define DBD_CAN_HANDLE_UTF8
+#endif
+#endif
+
/*#define CTLIB_VERSION CS_VERSION_100 */
#ifndef MAX
diff -r b45d349098dd -r 086750b0b182 Sybase.pm
--- a/Sybase.pm Fri Sep 10 09:54:33 2010 -0500
+++ b/Sybase.pm Fri Sep 17 10:17:13 2010 -0500
@@ -1260,6 +1260,15 @@
Default: off
+=item syb_enable_utf8 (bool)
+
+If this attribute is set then DBD::Sybase will convert UNIVARCHAR, UNICHAR,
+and UNITEXT data to Perl's internal utf-8 encoding when they are
+retrieved. Updating a unicode column will cause Sybase to convert any incoming
+data from utf-8 to its internal utf-16 encoding.
+
+Default: off
+
=back
=head2 Statement Handle Attributes
diff -r b45d349098dd -r 086750b0b182 dbdimp.c
--- a/dbdimp.c Fri Sep 10 09:54:33 2010 -0500
+++ b/dbdimp.c Fri Sep 17 10:17:13 2010 -0500
@@ -135,6 +135,9 @@
#endif
static int syb_get_date_fmt(imp_dbh_t *imp_dbh, char *fmt);
static int cmd_execute(SV *sth, imp_sth_t *imp_sth);
+#if defined(DBD_CAN_HANDLE_UTF8)
+static int is_high_bit_set(const unsigned char *val, STRLEN size);
+#endif
static CS_BINARY *to_binary(char *str, STRLEN *outlen);
static int get_server_version(SV *dbh, imp_dbh_t *imp_dbh, CS_CONNECTION *con);
static void clear_cache(SV *sth, imp_sth_t *imp_sth);
@@ -949,6 +952,13 @@
}
if (retcode == CS_SUCCEED) {
+ if ((retcode = cs_locale(context, CS_SET, locale, CS_SYB_CHARSET, "utf8",
+ CS_NULLTERM, NULL)) != CS_SUCCEED) {
+ warn("cs_locale(CS_SYB_CHARSET) failed");
+ }
+ }
+
+ if (retcode == CS_SUCCEED) {
CS_INT type = CS_DATES_SHORT;
if ((retcode = cs_dt_info(context, CS_SET, locale, CS_DT_CONVFMT,
CS_UNUSED, (CS_VOID*)&type, CS_SIZEOF(CS_INT), NULL))
@@ -1096,6 +1106,7 @@
"syb_disconnect_in_child");
imp_dbh->host[0] = 0;
imp_dbh->port[0] = 0;
+ imp_dbh->enable_utf8 = fetchSvAttrib(attribs, "syb_enable_utf8");
imp_dbh->blkLogin[0] = 0;
@@ -2207,6 +2218,15 @@
}
return TRUE;
}
+ if (kl == 15 && strEQ(key, "syb_enable_utf8")) {
+ on = SvTRUE(valuesv);
+ if (on) {
+ imp_dbh->enable_utf8 = 1;
+ } else {
+ imp_dbh->enable_utf8 = 0;
+ }
+ return TRUE;
+ }
if (kl == 16 && strEQ(key, "syb_row_callback")) {
if (valuesv == &PL_sv_undef) {
imp_dbh->row_cb = NULL;
@@ -2393,6 +2413,13 @@
retsv = &PL_sv_undef;
}
}
+ if (kl == 15 && strEQ(key, "syb_enable_utf8")) {
+ if (imp_dbh->enable_utf8) {
+ retsv = newSViv(1);
+ } else {
+ retsv = newSViv(0);
+ }
+ }
if (kl == 16 && strEQ(key, "syb_row_callback")) {
if (imp_dbh->row_cb) {
retsv = newSVsv(imp_dbh->row_cb);
@@ -3904,6 +3931,19 @@
*SvEND(sv) = '\0';
}
}
+#if defined(DBD_CAN_HANDLE_UTF8)
+ if (imp_dbh->enable_utf8
+ && (imp_sth->coldata[i].realType == CS_UNICHAR_TYPE ||
+ imp_sth->coldata[i].realType == CS_UNITEXT_TYPE)) {
+ U8 *value = SvPV_nolen(sv);
+ STRLEN len = SvLEN(sv);
+
+ SvUTF8_off(sv);
+ if (is_high_bit_set(value, len) && is_utf8_string(value, len)) {
+ SvUTF8_on(sv);
+ }
+ }
+#endif
break;
case CS_FLOAT_TYPE:
sv_setnv(sv, imp_sth->coldata[i].value.f);
@@ -4051,6 +4091,15 @@
return av;
}
+#if defined(DBD_CAN_HANDLE_UTF8)
+static int is_high_bit_set(const unsigned char *val, STRLEN size)
+{
+ while (*val && size--)
+ if (*val++ & 0x80) return 1;
+ return 0;
+}
+#endif
+
#if defined(NO_BLK)
static int sth_blk_finish(imp_dbh_t *imp_dbh, imp_sth_t *imp_sth, SV *sth)
{
diff -r b45d349098dd -r 086750b0b182 dbdimp.h
--- a/dbdimp.h Fri Sep 10 09:54:33 2010 -0500
+++ b/dbdimp.h Fri Sep 17 10:17:13 2010 -0500
@@ -108,6 +108,8 @@
SV *row_cb;
SV *kerbGetTicket;
+ int enable_utf8;
+
int showEed;
int showSql;
int flushFinish;
diff -r b45d349098dd -r 086750b0b182 t/utf8.t
--- a/t/utf8.t Fri Sep 10 09:54:33 2010 -0500
+++ b/t/utf8.t Fri Sep 17 10:17:13 2010 -0500
@@ -12,17 +12,13 @@
BEGIN {
plan skip_all => 'This test requires Perl 5.8+'
unless $] >= 5.008;
+
}
-plan 'no_plan';
-
+use DBI;
+use DBD::Sybase;
use Encode ();
-BEGIN {
- use_ok('DBI');
- use_ok('DBD::Sybase');
-}
-
binmode( $_, 'utf8' )
for map { Test::Builder->new->$_() }
qw( output failure_output todo_output );
@@ -31,14 +27,24 @@
( $Uid, $Pwd, $Srv, $Db ) = _test::get_info();
-my $dbh = DBI->connect( "dbi:Sybase:server=$Srv;database=$Db", $Uid, $Pwd,
- { PrintError => 1 } );
+my $dbh = DBI->connect(
+ "dbi:Sybase:server=$Srv;database=$Db", $Uid, $Pwd,
+ { PrintError => 1 }
+);
+
+unless ( $dbh->{syb_server_version} ge '12.5' ) {
+ plan skip_all => 'This test requires ASE 12.5';
+}
+
+plan tests => 11;
$dbh->do("create table #utf8test (uv univarchar(250), ut unitext)");
+$dbh->{syb_enable_utf8} = 1;
+
+my $ascii = 'Some text';
+my $utf8 = "\x{263A} - smiley1 - \x{263B} - smiley2";
{
- my $ascii = 'Some text';
-
my $quoted = $dbh->quote($ascii);
$dbh->do("insert into #utf8test (uv, ut) values ($quoted, $quoted)");
@@ -72,8 +78,6 @@
{
$dbh->do("delete from #utf8test");
- my $utf8 = "\x{263A} - smiley1 - \x{263B} - smiley2";
-
my $quoted = $dbh->quote($utf8);
$dbh->do("insert into #utf8test (uv, ut) values ($quoted, $quoted)");
@@ -103,3 +107,63 @@
'ut column was returned with utf8 flag on'
);
}
+
+$dbh->{syb_enable_utf8} = 0;
+
+{
+ my $rows = $dbh->selectall_arrayref(
+ "select * from #utf8test",
+ { Slice => {} }
+ );
+
+ ok(
+ !Encode::is_utf8( $rows->[0]{uv} ),
+ 'uv column was returned with utf8 flag off (syb_enable_utf8 was false)'
+ );
+
+ ok(
+ !Encode::is_utf8( $rows->[0]{ut} ),
+ 'ut column was returned with utf8 flag off (syb_enable_utf8 was false)'
+ );
+}
+
+{
+ my $dbh2 = DBI->connect(
+ "dbi:Sybase:server=$Srv;database=$Db",
+ $Uid, $Pwd, {
+ PrintError => 1,
+ syb_enable_utf8 => 1
+ }
+ );
+
+ $dbh2->do("create table #utf8test (uv univarchar(250), ut unitext)");
+
+ my $quoted = $dbh->quote($utf8);
+ $dbh2->do("insert into #utf8test (uv, ut) values ($quoted, $quoted)");
+
+ my $rows = $dbh2->selectall_arrayref(
+ "select * from #utf8test",
+ { Slice => {} }
+ );
+
+ is_deeply(
+ $rows,
+ [
+ {
+ uv => $utf8,
+ ut => $utf8,
+ }
+ ],
+ "got expected row back from #utf8test"
+ );
+
+ ok(
+ Encode::is_utf8( $rows->[0]{uv} ),
+ 'uv column was returned with utf8 flag on (syb_enable_utf8 passed to connect)'
+ );
+
+ ok(
+ Encode::is_utf8( $rows->[0]{ut} ),
+ 'ut column was returned with utf8 flag on (syb_enable_utf8 passed to connect)'
+ );
+}