Subject: | Digest::SHA uses SvPV instead of SvPVbyte |
Digest::SHA silently corrupts its output when the input happens to be
upgraded, i.e. the same two perl strings cause different output.
This is caused by Digest::SHA accessing the strings using SvPV without
checking the choice of storage format. It should use SvPVbyte instead.
Patch (including tests) attached.
- Eric
Subject: | 0001-Handle-UTF8-1-strings-properly.patch |
From 92ee28af35ff2740252f67812d02fb6530935e92 Mon Sep 17 00:00:00 2001
From: Eric Brine <ikegami@adaelis.com>
Date: Sun, 18 Nov 2012 20:43:46 -0500
Subject: [PATCH] Handle UTF8=1 strings properly
---
MANIFEST | 1 +
SHA.xs | 12 +++++--
lib/Digest/SHA.pm | 5 +++
t/wide_strs.t | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 97 insertions(+), 4 deletions(-)
create mode 100644 t/wide_strs.t
diff --git a/MANIFEST b/MANIFEST
index 9f22adc..02735a8 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -36,5 +36,6 @@ t/sha224.t
t/sha256.t
t/sha384.t
t/sha512.t
+t/wide_strs.t
t/woodbury.t
typemap
diff --git a/SHA.xs b/SHA.xs
index 8124a5d..264ef6d 100644
--- a/SHA.xs
+++ b/SHA.xs
@@ -2,6 +2,10 @@
#include "perl.h"
#include "XSUB.h"
+#ifndef SvPVbyte
+#define SvPVbyte(sv, len) SvPV(sv, len)
+#endif
+
#include "src/sha.c"
#include "src/hmac.c"
@@ -87,7 +91,7 @@ PPCODE:
if ((state = shaopen(ix2alg[ix])) == NULL)
XSRETURN_UNDEF;
for (i = 0; i < items; i++) {
- data = (unsigned char *) (SvPV(ST(i), len));
+ data = (unsigned char *) (SvPVbyte(ST(i), len));
while (len > MAX_WRITE_SIZE) {
shawrite(data, MAX_WRITE_SIZE << 3, state);
data += MAX_WRITE_SIZE;
@@ -141,11 +145,11 @@ PREINIT:
HMAC *state;
char *result;
PPCODE:
- key = (unsigned char *) (SvPV(ST(items-1), len));
+ key = (unsigned char *) (SvPVbyte(ST(items-1), len));
if ((state = hmacopen(ix2alg[ix], key, len)) == NULL)
XSRETURN_UNDEF;
for (i = 0; i < items - 1; i++) {
- data = (unsigned char *) (SvPV(ST(i), len));
+ data = (unsigned char *) (SvPVbyte(ST(i), len));
while (len > MAX_WRITE_SIZE) {
hmacwrite(data, MAX_WRITE_SIZE << 3, state);
data += MAX_WRITE_SIZE;
@@ -193,7 +197,7 @@ PREINIT:
PPCODE:
state = INT2PTR(SHA *, SvIV(SvRV(SvRV(self))));
for (i = 1; i < items; i++) {
- data = (unsigned char *) (SvPV(ST(i), len));
+ data = (unsigned char *) (SvPVbyte(ST(i), len));
while (len > MAX_WRITE_SIZE) {
shawrite(data, MAX_WRITE_SIZE << 3, state);
data += MAX_WRITE_SIZE;
diff --git a/lib/Digest/SHA.pm b/lib/Digest/SHA.pm
index a341991..ecfd039 100644
--- a/lib/Digest/SHA.pm
+++ b/lib/Digest/SHA.pm
@@ -208,6 +208,11 @@ In programs:
$digest = sha384_hex($data);
$digest = sha512_base64($data);
+ # Unicode code points must first be
+ # serialised into bytes by encoding.
+ use Encode qw( encode_utf8 );
+ $digest = sha256(encode_utf8($text));
+
# Object-oriented
use Digest::SHA;
diff --git a/t/wide_strs.t b/t/wide_strs.t
new file mode 100644
index 0000000..6aff6d3
--- /dev/null
+++ b/t/wide_strs.t
@@ -0,0 +1,83 @@
+use strict;
+
+if ($] < 5.008) {
+ # This version of Perl does not support wide strings,
+ # so there's no need to make sure they're handled properly.
+ print "1..1\n";
+ print "ok 1 # skip: No wide strings in Perl $]\n";
+ exit(0);
+}
+
+my $MODULE;
+my @hmac_sha_sub_names;
+my @sha_sub_names;
+
+BEGIN {
+ $MODULE = (-d "src") ? "Digest::SHA" : "Digest::SHA::PurePerl";
+ eval "require $MODULE" || die $@;
+
+ @hmac_sha_sub_names = qw(
+ hmac_sha1 hmac_sha1_base64 hmac_sha1_hex
+ hmac_sha224 hmac_sha224_base64 hmac_sha224_hex
+ hmac_sha256 hmac_sha256_base64 hmac_sha256_hex
+ hmac_sha384 hmac_sha384_base64 hmac_sha384_hex
+ hmac_sha512 hmac_sha512_base64 hmac_sha512_hex);
+ @sha_sub_names = qw(
+ sha1 sha1_base64 sha1_hex
+ sha224 sha224_base64 sha224_hex
+ sha256 sha256_base64 sha256_hex
+ sha384 sha384_base64 sha384_hex
+ sha512 sha512_base64 sha512_hex);
+ $MODULE->import(@hmac_sha_sub_names, @sha_sub_names);
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ }
+}
+
+my $test_num = 1;
+sub ok {
+ my ($ok) = @_;
+ print "not " unless $ok;
+ print "ok ", $test_num++, "\n";
+ return 1;
+}
+
+print "1..".(@hmac_sha_sub_names*3 + @sha_sub_names*2 + 1)."\n";
+
+my $non_byte_data = chr(0x2660);
+my $non_byte_key = chr(0x2660) . ("\x00" x 31);
+
+my $data = "\x80\x81\x82\x83";
+utf8::downgrade( my $data_d = $data );
+utf8::upgrade( my $data_u = $data );
+
+my $key = "\x80\x81\x82\x83" x 8;
+utf8::downgrade( my $key_d = $key );
+utf8::upgrade( my $key_u = $key );
+
+for my $sub_name (@hmac_sha_sub_names) {
+ my $sub_ref = \&$sub_name;
+ ok( !eval { $sub_ref->($non_byte_data, $key_d); 1 } && $@ =~ /Wide character/ );
+ ok( !eval { $sub_ref->($data_d, $non_byte_key); 1 } && $@ =~ /Wide character/ );
+
+ my $digest_d = eval { $sub_ref->($data_d, $key_d) };
+ my $digest_u = eval { $sub_ref->($data_u, $key_u) };
+ ok( defined($digest_d) && defined($digest_u) && $digest_d eq $digest_u );
+}
+
+for my $sub_name (@sha_sub_names) {
+ my $sub_ref = \&$sub_name;
+ ok( !eval { $sub_ref->($non_byte_data); 1 } && $@ =~ /Wide character/ );
+
+ my $digest_d = eval { $sub_ref->($data_d) };
+ my $digest_u = eval { $sub_ref->($data_u) };
+ ok( defined($digest_d) && defined($digest_u) && $digest_d eq $digest_u );
+}
+
+ok( !eval { Digest::SHA->new(256)->add($non_byte_data); 1 } && $@ =~ /Wide character/ );
+
+1;
--
1.7.9