Subject: | [PATCH] Remove dependency on YAML, period |
While upgrading Date::Manip recently, I noticed that it still depends on
YAML::Syck. https://rt.cpan.org/Ticket/Display.html?id=77767 explains
why it's not simple to switch to YAML::XS. I also noticed some code in
internal/ about switching to JSON.
But I respectfully submit that while both YAML and JSON have their uses,
initializing a data structure from a constant string in your own program
is not one of them. Perl already has a method for doing that: literals.
Accordingly, I wrote a script to load the YAML data and output Perl
literals using a modified copy of Data::Dump (included). I used
Devel::Peek to verify that the resulting data
structures are identical, including the UTF-8 flag being off.
Since YAML::Syck was the only non-core dependency, this means you no
longer need a C compiler to install Date::Manip. Also, the test suite
(minus the POD tests) runs about 2% faster, thanks to removing the
overhead of loading YAML::Syck.
Personally, I would just edit the Perl data structures directly from now
on. However, if you prefer to edit YAML, you could certainly adapt my
script to regenerate the .pm files from YAML files you maintain. As
long as you do that on your machine before releasing, people wouldn't
need a YAML library to install Date::Manip.
I've attached both my script to do the conversion, and a patch that
shows the result of running it on Date::Manip 6.34.
Note that in order to run the script, you first have to rename all the
language.pm files to language.old. The script then reads the .old files
and writes updated .pm files.
Subject: | no-yaml.patch.txt |
Message body is not shown because it is too large.
Subject: | dm-yaml2perl.pl |
#! /usr/bin/env perl
#---------------------------------------------------------------------
# Convert Date::Manip to use Perl literals instead of YAML
#
# Copyright 2012 Christopher J. Madsen
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# First, rename all language.pm files to language.old
# Then run this program in the lib/Date/Manip/Lang/ directory
#---------------------------------------------------------------------
use strict;
use warnings;
use 5.010;
use YAML::XS qw(Load);
use File::Slurp qw(read_file write_file);
#=====================================================================
#=====================================================================
# This portion of the code is a hacked-up version of Data::Dump 1.21.
#
# Data::Dump didn't quite do what I wanted, but it was close. Aside
# from removing features not needed for this program, the changes are
# to make it quote hash keys containing non-ASCII data, output all
# values as strings (not numbers), output strings in single quotes
# using UTF-8 characters, and use longer lines.
#
# Copyright 1998-2010 Gisle Aas.
# Copyright 1996-1998 Gurusamy Sarathy.
#
# This library is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#---------------------------------------------------------------------
my $INDENT = ' ';
sub dumpit
{
_dump(shift, 'name', [], undef);
}
sub _dump
{
my $ref = ref $_[0];
my $rval = $ref ? $_[0] : \$_[0];
shift;
my($name, $idx, $dont_remember, $pclass, $pidx) = @_;
my($class, $type, $id);
my $strval = "$rval";
# Parse $strval without using regexps, in order not to clobber $1, $2,...
if ((my $i = index($strval, "=")) >= 0) {
$class = substr($strval, 0, $i);
$strval = substr($strval, $i+1);
}
if ((my $i = index($strval, "(0x")) >= 0) {
$type = substr($strval, 0, $i);
$id = substr($strval, $i + 2, -1);
}
else {
die "Can't parse $rval";
}
# warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG;
my $out;
if ($class) {
$pclass = $class;
$pidx = @$idx;
}
if ($type eq "SCALAR") {
if (!defined $$rval) {
$out = "undef";
}
# elsif (do {no warnings 'numeric'; $$rval + 0 eq $$rval}) {
# $out = $$rval;
# }
else {
$out = quote($$rval);
}
}
elsif ($type eq "ARRAY") {
my @vals;
my $tied;# = tied_str(tied(@$rval));
my $i = 0;
for my $v (@$rval) {
push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied, $pclass, $pidx));
$i++;
}
$out = "[" . format_list(1, $tied, @vals) . "]";
}
elsif ($type eq "HASH") {
my(@keys, @vals);
my $tied;# = tied_str(tied(%$rval));
# statistics to determine variation in key lengths
my $kstat_max = 0;
my $kstat_sum = 0;
my $kstat_sum2 = 0;
my @orig_keys = keys %$rval;
my $text_keys = 0;
for (@orig_keys) {
$text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/;
}
if ($text_keys) {
@orig_keys = sort { lc($a) cmp lc($b) } @orig_keys;
}
else {
@orig_keys = sort { $a <=> $b } @orig_keys;
}
my $quote;
for my $key (@orig_keys) {
unless ($key =~ /[^\040-\176]/) {
next if $key =~ /^-?[a-zA-Z_]\w*\z/;
next if $key =~ /^-?[1-9]\d{0,8}\z/;
}
$quote++;
last;
}
for my $key (@orig_keys) {
my $val = \$rval->{$key}; # capture value before we modify $key
$key = quote($key) if $quote;
$kstat_max = length($key) if length($key) > $kstat_max;
$kstat_sum += length($key);
$kstat_sum2 += length($key)*length($key);
push(@keys, $key);
push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied, $pclass, $pidx));
}
my $nl = "";
my $klen_pad = 0;
my $tmp = "@keys @vals";
if (length($tmp) > 60 || $tmp =~ /\n/ || $tied) {
$nl = "\n";
# Determine what padding to add
if ($kstat_max < 4) {
$klen_pad = $kstat_max;
}
elsif (@keys >= 2) {
my $n = @keys;
my $avg = $kstat_sum/$n;
my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1));
# I am not actually very happy with this heuristics
if ($stddev / $kstat_max < 0.25) {
$klen_pad = $kstat_max;
}
}
}
$out = "{$nl";
$out .= "$INDENT# $tied$nl" if $tied;
while (@keys) {
my $key = shift @keys;
my $val = shift @vals;
my $vpad = $INDENT . (" " x ($klen_pad ? $klen_pad + 4 : 0));
$val =~ s/\n/\n$vpad/gm;
my $kpad = $nl ? $INDENT : " ";
$key .= " " x ($klen_pad - length($key)) if $nl;
$out .= "$kpad$key => $val,$nl";
}
$out =~ s/,$/ / unless $nl;
$out .= "}";
}
else {
warn "Can't handle $type data";
$out = "'#$type#'";
}
return $out;
}
sub format_list
{
my $paren = shift;
my $comment = shift;
my $indent_lim = $paren ? 0 : 1;
if (@_ > 3) {
# can we use range operator to shorten the list?
my $i = 0;
while ($i < @_) {
my $j = $i + 1;
my $v = $_[$i];
while ($j < @_) {
# XXX allow string increment too?
if ($v eq "0" || $v =~ /^-?[1-9]\d{0,9}\z/) {
$v++;
}
elsif ($v =~ /^"([A-Za-z]{1,3}\d*)"\z/) {
$v = $1;
$v++;
$v = qq("$v");
}
else {
last;
}
last if $_[$j] ne $v;
$j++;
}
if ($j - $i > 3) {
splice(@_, $i, $j - $i, "$_[$i] .. $_[$j-1]");
}
$i++;
}
}
#my $tmp = "@_";
my $tmp = join(", ", @_);
if ($comment || (@_ > $indent_lim && (length($tmp) > 76 || $tmp =~ /\n/))) {
my @elem = @_;
for (@elem) { s/^/$INDENT/gm; }
return "\n" . ($comment ? "$INDENT# $comment\n" : "") .
join(",\n", @elem, "");
} else {
return $tmp;#join(", ", @_);
}
}
sub quote
{
local($_) = $_[0];
s/(['\\])/\\$1/g;
return qq('$_');
}
#---------------------------------------------------------------------
# Data::Dump code ends here
#=====================================================================
#=====================================================================
# Main loop:
for my $fn (glob '*.old') {
say $fn;
my $content = read_file( $fn, { binmode => ':raw' } );
$content =~ s/^use YAML::Syck;\n//m or die;
$content =~ s/^(.+\n)__DATA__\n//s or die;
my $perl = $1;
my $data = dumpit(Load($content));
$data = ("# These strings are raw undecoded UTF-8 octets:\n" .
"{ no utf8; \$Language = $data}\n\n");
utf8::encode($data);
$perl =~ s/^my \s+ \@in \s* = \s* <DATA>; \s*
\$Language \s* = \s* Load\(join\('', \s* \@in\)\); \s*
close \s+ DATA; \s*
/$data/mx or die;
$fn =~ s/old$/pm/ or die;
write_file( $fn, { binmode => ':raw' }, $perl );
}