sub _lang_normalize($) {
#
http://tools.ietf.org/html/bcp47#section-2.1.1
# All subtags use lowercase letters
my $lang = lc(shift);
# with 2 exceptions: subtags that neither appear at the start of the tag nor occur after singletons
# i.e. there's a subtag of length at least 2 preceding the exception; and a following subtag or end-of-tag
# 1. two-letter subtags are all uppercase
$lang =~ s{(?<=\w\w-)(\w\w)(?=($|-))}{\U$1}g;
# 2. four-letter subtags are titlecase
$lang =~ s{(?<=\w\w-)(\w\w\w\w)(?=($|-))}{\u\L$1}g;
$lang
}
sub test {
my $x = _lang_normalize(shift);
my $y = shift;
$x eq $y or print STDERR "$x\n"
}
# BCP47 tests
test("en-ca-x-ca","en-CA-x-ca");
test("EN-ca-X-Ca","en-CA-x-ca");
test("En-Ca-X-Ca","en-CA-x-ca");
test("SGN-BE-FR","sgn-BE-FR");
test("sgn-be-fr","sgn-BE-FR");
test("AZ-latn-x-LATN","az-Latn-x-latn");
test("Az-latn-X-Latn","az-Latn-x-latn");
# More tests
test("zh-Hant","zh-Hant");
test("zh-Latn-wadegile","zh-Latn-wadegile");
test("zh-Latn-pinyin","zh-Latn-pinyin");
test("en-US","en-US");
test("en-GB","en-GB");
test("qqq-002","qqq-002");
test("ja-Latn","ja-Latn");
test("x-local","x-local");
test("he-Latn","he-Latn");
test("und","und");
test("nn","nn");
test("ko-Latn","ko-Latn");
test("ar-Latn","ar-Latn");
test("la-x-liturgic","la-x-liturgic");
test("fa-x-middle","fa-x-middle");
test("qqq-142","qqq-142");
test("bnt","bnt");
test("grc-x-liturgic","grc-x-liturgic");
test("egy-Latn","egy-Latn");
test("la-x-medieval","la-x-medieval");