Subject: | encode_punycode heap overflow |
When running the tests of Net-IDN-Encode-2.300 on OpenBSD with
additional malloc(3) checks, Perl crashes.
This happens during test t/uts46_to_ascii-trans.t:
t/uts46_encode_utf8.t ..... ok
perl(92732) in free(): chunk canary corrupted 0x17858bb94100 0x65@0x65
t/uts46_to_ascii-trans.t ..
Failed 3600/3770 subtests
(2 TODO tests unexpectedly succeeded)
t/uts46_to_ascii.t ........ ok
Subtest 171 seems to trigger the heap overflow:
ok 170 - to_ascii('xn--53h') [data/IdnaTest.txt:275]
perl(4839) in free(): chunk canary corrupted 0x1d2e5083ce80 0x65@0x65
Abort trap
Here is the man page snippet that describes what is going on:
``chunk canary corrupted address offset@length''
A byte after the requested size has been overwritten, indicating
a heap overflow. The offset at which corruption was detected is
printed before the @, and the requested length of the allocation
after the @.
I have looked at the XS code and only the length of the input string
is checked. The size of the output string is increased from time
to time, but not always when the output pointer is moved forward.
So I have put the realloc in a function grow_string() and call it
every time before a value is written to *re_p. This patch fixes the
test.
--- lib/Net/IDN/Punycode.xs.orig Fri Jul 17 21:10:51 2015
+++ lib/Net/IDN/Punycode.xs Tue Nov 22 14:34:09 2016
@@ -49,6 +49,20 @@ static int adapt(int delta, int numpoints, int first)
return k + (((BASE-TMIN+1) * delta) / (delta+SKEW));
};
+static void
+grow_string(SV *const sv, char **start, char **current, char **end, STRLEN add)
+{
+ STRLEN len;
+
+ if(*current + add <= *end)
+ return;
+
+ len = (*current - *start + add + 15) & ~15;
+ *start = SvGROW(sv, len);
+ *current = *start + len;
+ *end = *start + SvLEN(sv);
+}
+
MODULE = Net::IDN::Punycode PACKAGE = Net::IDN::Punycode
SV*
@@ -81,15 +95,20 @@ encode_punycode(input)
/* copy basic code points */
while(in_p < in_e) {
- if( isBASE(*in_p) )
+ if( isBASE(*in_p) ) {
+ grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
*re_p++ = *in_p;
+ }
in_p++;
}
h = re_p - re_s;
/* add DELIM if needed */
- if(h) *re_p++ = DELIM;
+ if(h) {
+ grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
+ *re_p++ = DELIM;
+ }
for(;;) {
/* find smallest code point not yet handled */
@@ -138,20 +157,14 @@ encode_punycode(input)
q = delta;
for(k = BASE;; k += BASE) {
- if(re_p >= re_e) {
- length_guess = re_e - re_s + 16;
- re_e = SvGROW(RETVAL, length_guess);
- re_p = re_e + (re_p - re_s);
- re_s = re_e;
- re_e = re_s + SvLEN(RETVAL);
- }
-
t = TMIN_MAX(k - bias);
if(q < t) break;
+ grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
*re_p++ = enc_digit[t + ((q-t) % (BASE-t))];
q = (q-t) / (BASE-t);
}
if(q > BASE) croak("input exceeds punycode limit");
+ grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
*re_p++ = enc_digit[q];
bias = adapt(delta, h+1, first);
delta = first = 0;
@@ -162,6 +175,7 @@ encode_punycode(input)
++delta;
++n;
}
+ grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
*re_p = 0;
SvCUR_set(RETVAL, re_p - re_s);
ST(0) = RETVAL;
@@ -201,6 +215,7 @@ decode_punycode(input)
c = *in_p; /* we don't care whether it's UTF-8 */
if(!isBASE(c)) croak("non-base character in input for decode_punycode");
if(c == DELIM) skip_p = in_p;
+ grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
*re_p++ = c; /* copy it */
}
@@ -236,18 +251,11 @@ decode_punycode(input)
u8 = UNISKIP(n); /* how many bytes we need */
- if(re_p + u8 >= re_e) {
- length_guess = re_e - re_p + u8 + 16;
- re_e = SvGROW(RETVAL, length_guess);
- re_p = re_e + (re_p - re_s);
- re_s = re_e;
- re_e = re_s + SvLEN(RETVAL);
- }
-
j = i;
for(skip_p = re_s; j > 0; j--) /* find position in UTF-8 */
skip_p+=UTF8SKIP(skip_p);
+ grow_string(RETVAL, &re_s, &re_p, &re_e, u8);
if(skip_p < re_p) /* move succeeding chars */
Move(skip_p, skip_p + u8, re_p - skip_p, char);
re_p += u8;
@@ -255,6 +263,7 @@ decode_punycode(input)
}
if(!first) SvUTF8_on(RETVAL); /* UTF-8 chars have been inserted */
+ grow_string(RETVAL, &re_s, &re_p, &re_e, 1);
*re_p = 0;
SvCUR_set(RETVAL, re_p - re_s);
ST(0) = RETVAL;