Subject: | [PARTIAL PATCH] author_from() does not support POD escapes |
author_from() only supports the lt/gt E<> escapes. The attached patch
adds support for this but chr() will map e.g. 240 to a single bit with
the value 240, i.e. invalid UTF-8.
I'm not sure what's the correct way to have chr() generate UTF-8 in 5.4
and above (can't use Encode.pm).
Subject: | m-i-metadata.patch |
diff -ru Module-Install-0.67/lib/Module/Install/Metadata.pm Module-Install/lib/Module/Install/Metadata.pm
--- Module-Install-0.67/lib/Module/Install/Metadata.pm 2007-05-09 06:52:46.000000000 +0000
+++ Module-Install/lib/Module/Install/Metadata.pm 2007-08-11 01:04:23.000000000 +0000
@@ -266,6 +266,281 @@
}
}
+# Copied directly from Pod::Escapes-1.04 for use in author_from, this
+# is probably the most sane way to support E<escapes> all the way down
+# to 5.004
+our %Name2character_number = (
+ # General XML/XHTML:
+ 'lt' => 60,
+ 'gt' => 62,
+ 'quot' => 34,
+ 'amp' => 38,
+ 'apos' => 39,
+
+ # POD-specific:
+ 'sol' => 47,
+ 'verbar' => 124,
+
+ 'lchevron' => 171, # legacy for laquo
+ 'rchevron' => 187, # legacy for raquo
+
+ # Remember, grave looks like \ (as in virtu\)
+ # acute looks like / (as in re/sume/)
+ # circumflex looks like ^ (as in papier ma^che/)
+ # umlaut/dieresis looks like " (as in nai"ve, Chloe")
+
+ # From the XHTML 1 .ent files:
+ 'nbsp' , 160,
+ 'iexcl' , 161,
+ 'cent' , 162,
+ 'pound' , 163,
+ 'curren' , 164,
+ 'yen' , 165,
+ 'brvbar' , 166,
+ 'sect' , 167,
+ 'uml' , 168,
+ 'copy' , 169,
+ 'ordf' , 170,
+ 'laquo' , 171,
+ 'not' , 172,
+ 'shy' , 173,
+ 'reg' , 174,
+ 'macr' , 175,
+ 'deg' , 176,
+ 'plusmn' , 177,
+ 'sup2' , 178,
+ 'sup3' , 179,
+ 'acute' , 180,
+ 'micro' , 181,
+ 'para' , 182,
+ 'middot' , 183,
+ 'cedil' , 184,
+ 'sup1' , 185,
+ 'ordm' , 186,
+ 'raquo' , 187,
+ 'frac14' , 188,
+ 'frac12' , 189,
+ 'frac34' , 190,
+ 'iquest' , 191,
+ 'Agrave' , 192,
+ 'Aacute' , 193,
+ 'Acirc' , 194,
+ 'Atilde' , 195,
+ 'Auml' , 196,
+ 'Aring' , 197,
+ 'AElig' , 198,
+ 'Ccedil' , 199,
+ 'Egrave' , 200,
+ 'Eacute' , 201,
+ 'Ecirc' , 202,
+ 'Euml' , 203,
+ 'Igrave' , 204,
+ 'Iacute' , 205,
+ 'Icirc' , 206,
+ 'Iuml' , 207,
+ 'ETH' , 208,
+ 'Ntilde' , 209,
+ 'Ograve' , 210,
+ 'Oacute' , 211,
+ 'Ocirc' , 212,
+ 'Otilde' , 213,
+ 'Ouml' , 214,
+ 'times' , 215,
+ 'Oslash' , 216,
+ 'Ugrave' , 217,
+ 'Uacute' , 218,
+ 'Ucirc' , 219,
+ 'Uuml' , 220,
+ 'Yacute' , 221,
+ 'THORN' , 222,
+ 'szlig' , 223,
+ 'agrave' , 224,
+ 'aacute' , 225,
+ 'acirc' , 226,
+ 'atilde' , 227,
+ 'auml' , 228,
+ 'aring' , 229,
+ 'aelig' , 230,
+ 'ccedil' , 231,
+ 'egrave' , 232,
+ 'eacute' , 233,
+ 'ecirc' , 234,
+ 'euml' , 235,
+ 'igrave' , 236,
+ 'iacute' , 237,
+ 'icirc' , 238,
+ 'iuml' , 239,
+ 'eth' , 240,
+ 'ntilde' , 241,
+ 'ograve' , 242,
+ 'oacute' , 243,
+ 'ocirc' , 244,
+ 'otilde' , 245,
+ 'ouml' , 246,
+ 'divide' , 247,
+ 'oslash' , 248,
+ 'ugrave' , 249,
+ 'uacute' , 250,
+ 'ucirc' , 251,
+ 'uuml' , 252,
+ 'yacute' , 253,
+ 'thorn' , 254,
+ 'yuml' , 255,
+
+ 'fnof' , 402,
+ 'Alpha' , 913,
+ 'Beta' , 914,
+ 'Gamma' , 915,
+ 'Delta' , 916,
+ 'Epsilon' , 917,
+ 'Zeta' , 918,
+ 'Eta' , 919,
+ 'Theta' , 920,
+ 'Iota' , 921,
+ 'Kappa' , 922,
+ 'Lambda' , 923,
+ 'Mu' , 924,
+ 'Nu' , 925,
+ 'Xi' , 926,
+ 'Omicron' , 927,
+ 'Pi' , 928,
+ 'Rho' , 929,
+ 'Sigma' , 931,
+ 'Tau' , 932,
+ 'Upsilon' , 933,
+ 'Phi' , 934,
+ 'Chi' , 935,
+ 'Psi' , 936,
+ 'Omega' , 937,
+ 'alpha' , 945,
+ 'beta' , 946,
+ 'gamma' , 947,
+ 'delta' , 948,
+ 'epsilon' , 949,
+ 'zeta' , 950,
+ 'eta' , 951,
+ 'theta' , 952,
+ 'iota' , 953,
+ 'kappa' , 954,
+ 'lambda' , 955,
+ 'mu' , 956,
+ 'nu' , 957,
+ 'xi' , 958,
+ 'omicron' , 959,
+ 'pi' , 960,
+ 'rho' , 961,
+ 'sigmaf' , 962,
+ 'sigma' , 963,
+ 'tau' , 964,
+ 'upsilon' , 965,
+ 'phi' , 966,
+ 'chi' , 967,
+ 'psi' , 968,
+ 'omega' , 969,
+ 'thetasym' , 977,
+ 'upsih' , 978,
+ 'piv' , 982,
+ 'bull' , 8226,
+ 'hellip' , 8230,
+ 'prime' , 8242,
+ 'Prime' , 8243,
+ 'oline' , 8254,
+ 'frasl' , 8260,
+ 'weierp' , 8472,
+ 'image' , 8465,
+ 'real' , 8476,
+ 'trade' , 8482,
+ 'alefsym' , 8501,
+ 'larr' , 8592,
+ 'uarr' , 8593,
+ 'rarr' , 8594,
+ 'darr' , 8595,
+ 'harr' , 8596,
+ 'crarr' , 8629,
+ 'lArr' , 8656,
+ 'uArr' , 8657,
+ 'rArr' , 8658,
+ 'dArr' , 8659,
+ 'hArr' , 8660,
+ 'forall' , 8704,
+ 'part' , 8706,
+ 'exist' , 8707,
+ 'empty' , 8709,
+ 'nabla' , 8711,
+ 'isin' , 8712,
+ 'notin' , 8713,
+ 'ni' , 8715,
+ 'prod' , 8719,
+ 'sum' , 8721,
+ 'minus' , 8722,
+ 'lowast' , 8727,
+ 'radic' , 8730,
+ 'prop' , 8733,
+ 'infin' , 8734,
+ 'ang' , 8736,
+ 'and' , 8743,
+ 'or' , 8744,
+ 'cap' , 8745,
+ 'cup' , 8746,
+ 'int' , 8747,
+ 'there4' , 8756,
+ 'sim' , 8764,
+ 'cong' , 8773,
+ 'asymp' , 8776,
+ 'ne' , 8800,
+ 'equiv' , 8801,
+ 'le' , 8804,
+ 'ge' , 8805,
+ 'sub' , 8834,
+ 'sup' , 8835,
+ 'nsub' , 8836,
+ 'sube' , 8838,
+ 'supe' , 8839,
+ 'oplus' , 8853,
+ 'otimes' , 8855,
+ 'perp' , 8869,
+ 'sdot' , 8901,
+ 'lceil' , 8968,
+ 'rceil' , 8969,
+ 'lfloor' , 8970,
+ 'rfloor' , 8971,
+ 'lang' , 9001,
+ 'rang' , 9002,
+ 'loz' , 9674,
+ 'spades' , 9824,
+ 'clubs' , 9827,
+ 'hearts' , 9829,
+ 'diams' , 9830,
+ 'OElig' , 338,
+ 'oelig' , 339,
+ 'Scaron' , 352,
+ 'scaron' , 353,
+ 'Yuml' , 376,
+ 'circ' , 710,
+ 'tilde' , 732,
+ 'ensp' , 8194,
+ 'emsp' , 8195,
+ 'thinsp' , 8201,
+ 'zwnj' , 8204,
+ 'zwj' , 8205,
+ 'lrm' , 8206,
+ 'rlm' , 8207,
+ 'ndash' , 8211,
+ 'mdash' , 8212,
+ 'lsquo' , 8216,
+ 'rsquo' , 8217,
+ 'sbquo' , 8218,
+ 'ldquo' , 8220,
+ 'rdquo' , 8221,
+ 'bdquo' , 8222,
+ 'dagger' , 8224,
+ 'Dagger' , 8225,
+ 'permil' , 8240,
+ 'lsaquo' , 8249,
+ 'rsaquo' , 8250,
+ 'euro' , 8364,
+);
+
sub author_from {
my ( $self, $file ) = @_;
my $content = $self->_slurp($file);
@@ -278,8 +553,9 @@
([^\n]*)
/ixms) {
my $author = $1 || $2;
- $author =~ s{E<lt>}{<}g;
- $author =~ s{E<gt>}{>}g;
+
+ $author =~ s/E<([a-z0-9]+)>/chr($Name2character_number{$1}) || "E<$1>"/gei;
+
$self->author($author);
}
else {