CC: | perl [...] lantschner.name |
Subject: | German Umlauts encoding |
German Umlauts (Ä; ...) are encoded with a trailing space, even if in the middle or the
beginning of a word. See the attached test.
I recommend using a extended notation with curly-brackets as in the sub posted below. This
should work well, regardless of the position of the Umlaut.
sub enctex {
my $strg = shift;
$strg =~ s|&|\\&|;
$strg =~ s|Ä|\{\\"A\}|;
$strg =~ s|ä|\{\\"a\}|;
$strg =~ s|Ö|\{\\"O\}|;
$strg =~ s|ö|\{\\"o\}|;
$strg =~ s|Ü|\{\\"U\}|;
$strg =~ s|ü|\{\\"u\}|;
$strg =~ s|ß|\{\\ss\}|;
return $strg;
}
Subject: | 03-filter_umlauts.t |
#!/usr/bin/perl
use strict;
use warnings;
use Test::More tests => 14;
use blib;
use LaTeX::Encode;
use utf8;
# Testing German Umlauts amd "scharfes S" (aka sz)
is(latex_encode('Ã'), "\\\"A", "'Ã' - Single Umlaut A");
is(latex_encode('Ãrger'), "\\\"Arger", "'Ã' - Umlaut A at beginning of word");
is(latex_encode('Märtyrer'), "M\\\"artyrer", "'ä' - Umlaut a in the middle of word");
is(latex_encode('wä'), "w\\\"a", "'ä' - Umlaut a at the end of word");
is(latex_encode('Ã'), "\\\"O", "'Ã' - Single Umlaut O");
is(latex_encode('Ã'), "\\\"U", "'Ã' - Single Umlaut U");
is(latex_encode('ä'), "\\\"a", "'ä' - Single Umlaut a");
is(latex_encode('ö'), "\\\"o", "'ö' - Single Umlaut o");
is(latex_encode('ü'), "\\\"u", "'ü' - Single Umlaut u");
is(latex_encode('Ã'), "\\ss", "'Ã' - Single scharfes S (sz)");
is(latex_encode('StraÃe'), "Stra\\ss e", "'Ã' - scharfes S (sz) in the middle of the word");
is(latex_encode('FuÃ'), "Fu\\ss", "'Ã' - scharfes S (sz) at the end of the word");
# Scharfes S can *not* be ant the beginning of a word (at least not iun German)
is(latex_encode('Ã¥'), "\\aa", "'Ã¥' - a ring ");
is(latex_encode('Ã¥ber'), "\\aa ber", "'Ã¥' - a ring at the beginning of the word");