Skip Menu |

This queue is for tickets about the URI-Escape-XS CPAN distribution.

Report information
The Basics
Id: 45855
Status: resolved
Priority: 0/
Queue: URI-Escape-XS

People
Owner: Nobody in particular
Requestors: karl.royer [...] bigmite.co.uk
Cc:
AdminCc:

Bug Information
Severity: (no value)
Broken in: (no value)
Fixed in: (no value)



Subject: Problems with undef
Date: Fri, 08 May 2009 15:11:07 +0100
To: bug-URI-Escape-XS [...] rt.cpan.org
From: Karl Royer <karl.royer [...] bigmite.co.uk>
After using URI::Escape::XS extensivly we have found it will cause a segmentation fault when supplied an undef value. We used this class extensively to decode encoded data used in a logging system. I have done a few changes to fix this bug and further improve performance by using a quicker hex convert technique. I have also added a further 4 tests for undef and an empty string. I have included the code below, followed by the new test. =============================================== /* * $Id: XS.xs,v 0.3 2009/01/16 06:38:52 dankogai Exp $ */ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" /* #include "ppport.h" */ /* #include <URI::Escape::XS> */ # include <stdio.h> # include <stdlib.h> # include <string.h> # include <ctype.h> static char escapes[256] = /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; static unsigned int hexconvert[256] = { 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,20,20,20,20,20,20, 20,10,11,12,13,14,15,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,10,11,12,13,14,15,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, 20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20, }; SV *encode_uri_component(SV *str){ int slen; SV *result; int i; U8 *src, *dst; int other = 0; /* * First test to see we had an undef value */ if ((str == NULL) || !SvOK(str)) return &PL_sv_undef; /* * Get length, and if zero return zero length string */ slen = SvCUR(str); /* * Allocate new sting at least 3 times length orig */ result = newSV(slen * 3 + 1); SvPOK_on(result); src = (U8 *)SvPV_nolen(str); dst = (U8 *)SvPV_nolen(result); for (i = 0; i < slen; i++){ unsigned char c = *src++; if (escapes[c]) { sprintf(dst, "%%%02X",c); dst += 3; other++; } else{ *dst++ = c; } } *dst = '\0'; /* for sure; */ SvCUR_set(result, slen + (2 * other) ); return result; } SV *decode_uri_component(SV *uri){ int slen; SV *result; U8 buf[16]; U8 *dst, *src, *bp, *odst, *srcend; unsigned int hi, lo; unsigned int a,b,c,d; /* * If undef or null return undef */ if ((uri == NULL) || !SvOK(uri)) return &PL_sv_undef; /* * Allocate result the correct len (well least as long.... */ slen = SvCUR(uri); result = newSV(slen + 1); SvPOK_on(result); dst = (U8 *)SvPV_nolen(result); odst = dst; src = (U8 *)SvPV_nolen(uri); srcend = src + slen; while (src < srcend) { unsigned char c = *src++; if (c == '%') { /* * if next digits are hex then %xx convert */ int a = hexconvert[src[0]]; int b = hexconvert[src[1]]; if ((a != 20) && (b != 20)) { *dst++ = (a << 4) | b; src += 2; continue; } /* * No is it %uxxxx ??? */ if (*src == 'u') { int a = hexconvert[src[1]]; int b = hexconvert[src[2]]; int c = hexconvert[src[3]]; int d = hexconvert[src[4]]; if ((a != 20) && (b != 20) && (c != 20) && (d != 20)) { hi = ( a << 12 ) | (b << 8) | (c << 4) | d; src += 5; if (hi < 0xD800 || 0xDFFF < hi) { bp = uvchr_to_utf8(buf, (UV)hi); strncpy(dst,buf, bp - buf); dst += bp - buf; continue; } /* * Is the range invalid ?? warn... */ if (0xDC00 <= hi) { warn("U+%04X is an invalid surrogate hi\n", hi); continue; } if ((*src == '%') && (src[1] == 'u')) { int a = hexconvert[src[2]]; int b = hexconvert[src[3]]; int c = hexconvert[src[4]]; int d = hexconvert[src[5]]; if ((a != 20) && (b != 20) && (c != 20) && (d != 20)) { lo = ( a << 12 ) | (b << 8) | (c << 4) | d; if (lo < 0xDC00 || 0xDFFF < lo){ warn("U+%04X is an invalid lo surrogate", lo); continue; } lo += 0x10000 + (hi - 0xD800) * 0x400 - 0xDC00; bp = uvchr_to_utf8(buf, (UV)lo); strncpy(dst,buf, bp - buf); dst += bp - buf; src += 6; continue; } warn("lo surrogate is missing for U+%04X", hi); continue; } warn("lo surrogate is missing for U+%04X", hi); continue; } } } *dst++ = c; } *dst = '\0'; /* for sure; */ SvCUR_set(result, (dst - odst) ); return result; } MODULE = URI::Escape::XS PACKAGE = URI::Escape::XS PROTOTYPES: ENABLE SV * encodeURIComponent(str) SV *str; CODE: RETVAL = encode_uri_component(str); OUTPUT: RETVAL SV * decodeURIComponent(str) SV *str; CODE: RETVAL = decode_uri_component(str); OUTPUT: RETVAL ================================== Here is the new test 05-null.t ===================================== #!perl -w # # $Id: 04-idn.t,v 1.2 2009/03/24 14:24:09 dankogai Exp dankogai $ # # Original as URI-1.35/t/escape.t # use URI::Escape::XS; use Test::More tests => 6; my $d; is encodeURIComponent($d) => $d, 'encodeURIComponent(null)'; is decodeURIComponent($d) => $d, 'decodeURIComponent(null)'; $d = ''; is length(encodeURIComponent($d)) => 0, 'length encodeURIComponent(\'\')'; is defined(encodeURIComponent($d)) => 1, 'defined encodeURIComponent(\'\')'; is length(decodeURIComponent($d)) => 0, 'length decodeURIComponent(\'\')'; is defined(decodeURIComponent($d)) => 1, 'defined decodeURIComponent(\'\')'; ===================================================== Yours, Karl Royer -- Research & Dev Tech Ltd Tel: +44 7860 189458 Fax: +44 870 0560360 Email: karl.royer@bigmite.co.uk Web: http://www.bigmite.co.uk/ VAT Reg. No. GB 683 7217 12 Registered in England Company No. 03397152 Reg Office: Hollow Tree Lodge, Marshside, Canterbury Kent CT3 4EE This e-mail is confidential and may contain legally privileged information. If you are not named above as the addressee it may be unlawful for you to read, copy, distribute, disclose or otherwise use the information in this e-mail message. If you are not the intended recipient of this e-mail message, please telephone or e-mail us immediately.
Thank you. fixed in 0.06. Dan the Maintainer Thereof