Subject: | get_root_domain doesn't play nice with (ASCII) IDN domains |
Because the effective_tld_names.dat doesn't include the ascii representation of IDN domains, get_root_domain() for such a domain returns "Invalid TLD"
If however, we rebuild the tld_tree using Net::IDN::Encode's domain_to_ascii, this works fine.
This is not an error with Net::Domain::TLD or Data::Validate::Domain as recent enough versions of those recognise the ASCII IDN domains just fine (although it would be lovely if Net::Domain::TLD also used the effective_tld_names.dat file).
As an obvious other issue, this code cannot work with non-ASCII IDN domains due to the narrow domain_private_tld regexp, and possibly for other reasons, too.
Subject: | domain_public_suffix_bug.pl |
use feature qw(say);
use Net::Domain::TLD qw(tld_exists);
use Data::Validate::Domain qw(is_domain);
use Domain::PublicSuffix;
use Net::IDN::Encode qw(domain_to_ascii domain_to_unicode);
use Encode qw(decode);
my $uri = "http://xn--p8j9a0d9c9a.xn--q9jyb4c/index.html";
my $domain = "xn--p8j9a0d9c9a.xn--q9jyb4c";
my $ascii_tld = "xn--q9jyb4c";
say "Net::Domain::TLD::VERSION: $Net::Domain::TLD::VERSION";
say "Data::Validate::Domain::VERSION: $Data::Validate::Domain::VERSION";
say "Domain::PublicSuffix::VERSION: $Domain::PublicSuffix::VERSION";
# Show that Net::Domain::TLD is happy with this tld
say "tld_exists: " . tld_exists($ascii_tld);
# Show that Data::Validate::Domain is happy with this domain
say "is_domain: " . is_domain(
$domain,
{
'domain_allow_underscore' => 1,
'domain_private_tld' => qr/^[a-z0-9]+$/,
}
) // "Not defined";
# Create suffix with most up to date effective_tld_names.dat
my $suffix = Domain::PublicSuffix->new(
{
data_file => "$ENV{HOME}/effective_tld_names.dat",
}
);
say "Root domain is: " . $suffix->get_root_domain($domain);
say $suffix->error();
say "Rebuilding tld_tree";
my $tld_tree = $suffix->tld_tree();
foreach my $tld ( keys %$tld_tree ) {
my $decoded_tld = decode("utf8", $tld);
my $ascii = domain_to_ascii( $decoded_tld );
if($decoded_tld ne $ascii ) {
$tld_tree->{$ascii} = $tld_tree->{$tld};
}
}
$suffix->tld_tree($tld_tree);
say "After rebuilding root domain is: " . $suffix->get_root_domain($domain);
say $suffix->error();
__END__
Output:
Net::Domain::TLD::VERSION: 1.72
Data::Validate::Domain::VERSION: 0.10
Domain::PublicSuffix::VERSION: 0.10
tld_exists: 1
is_domain: xn--p8j9a0d9c9a.xn--q9jyb4c
Root domain is:
Invalid TLD
Rebuilding tld_tree
After rebuilding root domain is: xn--p8j9a0d9c9a.xn--q9jyb4c
bash-4.2$ perl -v
This is perl 5, version 16, subversion 0 (v5.16.0) built for x86_64-linux
(with 1 registered patch, see perl -V for more detail)