Skip Menu |

This queue is for tickets about the Module-Install CPAN distribution.

Report information
The Basics
Id: 28793
Status: resolved
Priority: 0/
Queue: Module-Install

People
Owner: Nobody in particular
Requestors: avar [...] cpan.org
Cc:
AdminCc:

Bug Information
Severity: Normal
Broken in: (no value)
Fixed in: (no value)



Subject: [PARTIAL PATCH] author_from() does not support POD escapes
author_from() only supports the lt/gt E<> escapes. The attached patch adds support for this but chr() will map e.g. 240 to a single bit with the value 240, i.e. invalid UTF-8. I'm not sure what's the correct way to have chr() generate UTF-8 in 5.4 and above (can't use Encode.pm).
Subject: m-i-metadata.patch
diff -ru Module-Install-0.67/lib/Module/Install/Metadata.pm Module-Install/lib/Module/Install/Metadata.pm --- Module-Install-0.67/lib/Module/Install/Metadata.pm 2007-05-09 06:52:46.000000000 +0000 +++ Module-Install/lib/Module/Install/Metadata.pm 2007-08-11 01:04:23.000000000 +0000 @@ -266,6 +266,281 @@ } } +# Copied directly from Pod::Escapes-1.04 for use in author_from, this +# is probably the most sane way to support E<escapes> all the way down +# to 5.004 +our %Name2character_number = ( + # General XML/XHTML: + 'lt' => 60, + 'gt' => 62, + 'quot' => 34, + 'amp' => 38, + 'apos' => 39, + + # POD-specific: + 'sol' => 47, + 'verbar' => 124, + + 'lchevron' => 171, # legacy for laquo + 'rchevron' => 187, # legacy for raquo + + # Remember, grave looks like \ (as in virtu\) + # acute looks like / (as in re/sume/) + # circumflex looks like ^ (as in papier ma^che/) + # umlaut/dieresis looks like " (as in nai"ve, Chloe") + + # From the XHTML 1 .ent files: + 'nbsp' , 160, + 'iexcl' , 161, + 'cent' , 162, + 'pound' , 163, + 'curren' , 164, + 'yen' , 165, + 'brvbar' , 166, + 'sect' , 167, + 'uml' , 168, + 'copy' , 169, + 'ordf' , 170, + 'laquo' , 171, + 'not' , 172, + 'shy' , 173, + 'reg' , 174, + 'macr' , 175, + 'deg' , 176, + 'plusmn' , 177, + 'sup2' , 178, + 'sup3' , 179, + 'acute' , 180, + 'micro' , 181, + 'para' , 182, + 'middot' , 183, + 'cedil' , 184, + 'sup1' , 185, + 'ordm' , 186, + 'raquo' , 187, + 'frac14' , 188, + 'frac12' , 189, + 'frac34' , 190, + 'iquest' , 191, + 'Agrave' , 192, + 'Aacute' , 193, + 'Acirc' , 194, + 'Atilde' , 195, + 'Auml' , 196, + 'Aring' , 197, + 'AElig' , 198, + 'Ccedil' , 199, + 'Egrave' , 200, + 'Eacute' , 201, + 'Ecirc' , 202, + 'Euml' , 203, + 'Igrave' , 204, + 'Iacute' , 205, + 'Icirc' , 206, + 'Iuml' , 207, + 'ETH' , 208, + 'Ntilde' , 209, + 'Ograve' , 210, + 'Oacute' , 211, + 'Ocirc' , 212, + 'Otilde' , 213, + 'Ouml' , 214, + 'times' , 215, + 'Oslash' , 216, + 'Ugrave' , 217, + 'Uacute' , 218, + 'Ucirc' , 219, + 'Uuml' , 220, + 'Yacute' , 221, + 'THORN' , 222, + 'szlig' , 223, + 'agrave' , 224, + 'aacute' , 225, + 'acirc' , 226, + 'atilde' , 227, + 'auml' , 228, + 'aring' , 229, + 'aelig' , 230, + 'ccedil' , 231, + 'egrave' , 232, + 'eacute' , 233, + 'ecirc' , 234, + 'euml' , 235, + 'igrave' , 236, + 'iacute' , 237, + 'icirc' , 238, + 'iuml' , 239, + 'eth' , 240, + 'ntilde' , 241, + 'ograve' , 242, + 'oacute' , 243, + 'ocirc' , 244, + 'otilde' , 245, + 'ouml' , 246, + 'divide' , 247, + 'oslash' , 248, + 'ugrave' , 249, + 'uacute' , 250, + 'ucirc' , 251, + 'uuml' , 252, + 'yacute' , 253, + 'thorn' , 254, + 'yuml' , 255, + + 'fnof' , 402, + 'Alpha' , 913, + 'Beta' , 914, + 'Gamma' , 915, + 'Delta' , 916, + 'Epsilon' , 917, + 'Zeta' , 918, + 'Eta' , 919, + 'Theta' , 920, + 'Iota' , 921, + 'Kappa' , 922, + 'Lambda' , 923, + 'Mu' , 924, + 'Nu' , 925, + 'Xi' , 926, + 'Omicron' , 927, + 'Pi' , 928, + 'Rho' , 929, + 'Sigma' , 931, + 'Tau' , 932, + 'Upsilon' , 933, + 'Phi' , 934, + 'Chi' , 935, + 'Psi' , 936, + 'Omega' , 937, + 'alpha' , 945, + 'beta' , 946, + 'gamma' , 947, + 'delta' , 948, + 'epsilon' , 949, + 'zeta' , 950, + 'eta' , 951, + 'theta' , 952, + 'iota' , 953, + 'kappa' , 954, + 'lambda' , 955, + 'mu' , 956, + 'nu' , 957, + 'xi' , 958, + 'omicron' , 959, + 'pi' , 960, + 'rho' , 961, + 'sigmaf' , 962, + 'sigma' , 963, + 'tau' , 964, + 'upsilon' , 965, + 'phi' , 966, + 'chi' , 967, + 'psi' , 968, + 'omega' , 969, + 'thetasym' , 977, + 'upsih' , 978, + 'piv' , 982, + 'bull' , 8226, + 'hellip' , 8230, + 'prime' , 8242, + 'Prime' , 8243, + 'oline' , 8254, + 'frasl' , 8260, + 'weierp' , 8472, + 'image' , 8465, + 'real' , 8476, + 'trade' , 8482, + 'alefsym' , 8501, + 'larr' , 8592, + 'uarr' , 8593, + 'rarr' , 8594, + 'darr' , 8595, + 'harr' , 8596, + 'crarr' , 8629, + 'lArr' , 8656, + 'uArr' , 8657, + 'rArr' , 8658, + 'dArr' , 8659, + 'hArr' , 8660, + 'forall' , 8704, + 'part' , 8706, + 'exist' , 8707, + 'empty' , 8709, + 'nabla' , 8711, + 'isin' , 8712, + 'notin' , 8713, + 'ni' , 8715, + 'prod' , 8719, + 'sum' , 8721, + 'minus' , 8722, + 'lowast' , 8727, + 'radic' , 8730, + 'prop' , 8733, + 'infin' , 8734, + 'ang' , 8736, + 'and' , 8743, + 'or' , 8744, + 'cap' , 8745, + 'cup' , 8746, + 'int' , 8747, + 'there4' , 8756, + 'sim' , 8764, + 'cong' , 8773, + 'asymp' , 8776, + 'ne' , 8800, + 'equiv' , 8801, + 'le' , 8804, + 'ge' , 8805, + 'sub' , 8834, + 'sup' , 8835, + 'nsub' , 8836, + 'sube' , 8838, + 'supe' , 8839, + 'oplus' , 8853, + 'otimes' , 8855, + 'perp' , 8869, + 'sdot' , 8901, + 'lceil' , 8968, + 'rceil' , 8969, + 'lfloor' , 8970, + 'rfloor' , 8971, + 'lang' , 9001, + 'rang' , 9002, + 'loz' , 9674, + 'spades' , 9824, + 'clubs' , 9827, + 'hearts' , 9829, + 'diams' , 9830, + 'OElig' , 338, + 'oelig' , 339, + 'Scaron' , 352, + 'scaron' , 353, + 'Yuml' , 376, + 'circ' , 710, + 'tilde' , 732, + 'ensp' , 8194, + 'emsp' , 8195, + 'thinsp' , 8201, + 'zwnj' , 8204, + 'zwj' , 8205, + 'lrm' , 8206, + 'rlm' , 8207, + 'ndash' , 8211, + 'mdash' , 8212, + 'lsquo' , 8216, + 'rsquo' , 8217, + 'sbquo' , 8218, + 'ldquo' , 8220, + 'rdquo' , 8221, + 'bdquo' , 8222, + 'dagger' , 8224, + 'Dagger' , 8225, + 'permil' , 8240, + 'lsaquo' , 8249, + 'rsaquo' , 8250, + 'euro' , 8364, +); + sub author_from { my ( $self, $file ) = @_; my $content = $self->_slurp($file); @@ -278,8 +553,9 @@ ([^\n]*) /ixms) { my $author = $1 || $2; - $author =~ s{E<lt>}{<}g; - $author =~ s{E<gt>}{>}g; + + $author =~ s/E<([a-z0-9]+)>/chr($Name2character_number{$1}) || "E<$1>"/gei; + $self->author($author); } else {
Hi. Added other unescaping code in the trunk; will (hopefully) be fixed in the next release. Thanks. On 2007-8-10 Fri 21:26:18, AVAR wrote: Show quoted text
> author_from() only supports the lt/gt E<> escapes. The attached patch > adds support for this but chr() will map e.g. 240 to a single bit with > the value 240, i.e. invalid UTF-8. > > I'm not sure what's the correct way to have chr() generate UTF-8 in
5.4 Show quoted text
> and above (can't use Encode.pm).
Hi. 0.95 is out. If you still have the problem, reopen this. Thanks.