Subject: | Problems with undef |
Date: | Fri, 08 May 2009 15:11:07 +0100 |
To: | bug-URI-Escape-XS [...] rt.cpan.org |
From: | Karl Royer <karl.royer [...] bigmite.co.uk> |
After using URI::Escape::XS extensivly we have found it will cause a
segmentation fault when supplied an undef value.
We used this class extensively to decode encoded data used in a logging
system.
I have done a few changes to fix this bug and further improve
performance by using a quicker
hex convert technique.
I have also added a further 4 tests for undef and an empty string.
I have included the code below, followed by the new test.
===============================================
/*
* $Id: XS.xs,v 0.3 2009/01/16 06:38:52 dankogai Exp $
*/
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
/* #include "ppport.h" */
/* #include <URI::Escape::XS> */
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <ctype.h>
static char escapes[256] =
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
static unsigned int hexconvert[256] =
{
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,20,20,20,20,20,20,
20,10,11,12,13,14,15,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,10,11,12,13,14,15,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
};
SV *encode_uri_component(SV *str){
int slen;
SV *result;
int i;
U8 *src, *dst;
int other = 0;
/*
* First test to see we had an undef value
*/
if ((str == NULL) || !SvOK(str))
return &PL_sv_undef;
/*
* Get length, and if zero return zero length string
*/
slen = SvCUR(str);
/*
* Allocate new sting at least 3 times length orig
*/
result = newSV(slen * 3 + 1);
SvPOK_on(result);
src = (U8 *)SvPV_nolen(str);
dst = (U8 *)SvPV_nolen(result);
for (i = 0; i < slen; i++){
unsigned char c = *src++;
if (escapes[c]) {
sprintf(dst, "%%%02X",c);
dst += 3;
other++;
}
else{
*dst++ = c;
}
}
*dst = '\0'; /* for sure; */
SvCUR_set(result, slen + (2 * other) );
return result;
}
SV *decode_uri_component(SV *uri){
int slen;
SV *result;
U8 buf[16];
U8 *dst, *src, *bp, *odst, *srcend;
unsigned int hi, lo;
unsigned int a,b,c,d;
/*
* If undef or null return undef
*/
if ((uri == NULL) || !SvOK(uri))
return &PL_sv_undef;
/*
* Allocate result the correct len (well least as long....
*/
slen = SvCUR(uri);
result = newSV(slen + 1);
SvPOK_on(result);
dst = (U8 *)SvPV_nolen(result);
odst = dst;
src = (U8 *)SvPV_nolen(uri);
srcend = src + slen;
while (src < srcend) {
unsigned char c = *src++;
if (c == '%') {
/*
* if next digits are hex then %xx convert
*/
int a = hexconvert[src[0]];
int b = hexconvert[src[1]];
if ((a != 20) && (b != 20)) {
*dst++ = (a << 4) | b;
src += 2;
continue;
}
/*
* No is it %uxxxx ???
*/
if (*src == 'u') {
int a = hexconvert[src[1]];
int b = hexconvert[src[2]];
int c = hexconvert[src[3]];
int d = hexconvert[src[4]];
if ((a != 20) && (b != 20) && (c != 20) && (d != 20)) {
hi = ( a << 12 ) | (b << 8) | (c << 4) | d;
src += 5;
if (hi < 0xD800 || 0xDFFF < hi) {
bp = uvchr_to_utf8(buf, (UV)hi);
strncpy(dst,buf, bp - buf);
dst += bp - buf;
continue;
}
/*
* Is the range invalid ?? warn...
*/
if (0xDC00 <= hi) {
warn("U+%04X is an invalid surrogate hi\n", hi);
continue;
}
if ((*src == '%') && (src[1] == 'u')) {
int a = hexconvert[src[2]];
int b = hexconvert[src[3]];
int c = hexconvert[src[4]];
int d = hexconvert[src[5]];
if ((a != 20) && (b != 20) && (c != 20) && (d != 20)) {
lo = ( a << 12 ) | (b << 8) | (c << 4) | d;
if (lo < 0xDC00 || 0xDFFF < lo){
warn("U+%04X is an invalid lo surrogate", lo);
continue;
}
lo += 0x10000
+ (hi - 0xD800) * 0x400 - 0xDC00;
bp = uvchr_to_utf8(buf, (UV)lo);
strncpy(dst,buf, bp - buf);
dst += bp - buf;
src += 6;
continue;
}
warn("lo surrogate is missing for U+%04X", hi);
continue;
}
warn("lo surrogate is missing for U+%04X", hi);
continue;
}
}
}
*dst++ = c;
}
*dst = '\0'; /* for sure; */
SvCUR_set(result, (dst - odst) );
return result;
}
MODULE = URI::Escape::XS PACKAGE = URI::Escape::XS
PROTOTYPES: ENABLE
SV *
encodeURIComponent(str)
SV *str;
CODE:
RETVAL = encode_uri_component(str);
OUTPUT:
RETVAL
SV *
decodeURIComponent(str)
SV *str;
CODE:
RETVAL = decode_uri_component(str);
OUTPUT:
RETVAL
==================================
Here is the new test
05-null.t
=====================================
#!perl -w
#
# $Id: 04-idn.t,v 1.2 2009/03/24 14:24:09 dankogai Exp dankogai $
#
# Original as URI-1.35/t/escape.t
#
use URI::Escape::XS;
use Test::More tests => 6;
my $d;
is encodeURIComponent($d) => $d, 'encodeURIComponent(null)';
is decodeURIComponent($d) => $d, 'decodeURIComponent(null)';
$d = '';
is length(encodeURIComponent($d)) => 0, 'length
encodeURIComponent(\'\')';
is defined(encodeURIComponent($d)) => 1, 'defined
encodeURIComponent(\'\')';
is length(decodeURIComponent($d)) => 0, 'length
decodeURIComponent(\'\')';
is defined(decodeURIComponent($d)) => 1, 'defined
decodeURIComponent(\'\')';
=====================================================
Yours,
Karl Royer
--
Research & Dev Tech Ltd
Tel: +44 7860 189458
Fax: +44 870 0560360
Email: karl.royer@bigmite.co.uk
Web: http://www.bigmite.co.uk/
VAT Reg. No. GB 683 7217 12
Registered in England Company No. 03397152
Reg Office: Hollow Tree Lodge, Marshside, Canterbury Kent CT3 4EE
This e-mail is confidential and may contain legally privileged
information. If you are not named above as the addressee it may be
unlawful for you to read, copy, distribute, disclose or otherwise use
the information in this e-mail message. If you are not the intended
recipient of this e-mail message, please telephone or e-mail us
immediately.