Subject: | lzw compression fixup |
Martin,
I have been cleaning up some old patches here. You might want to check
the following as it fixed some random errors in the LZW compression when
dealing with millions of PDF files. Unfortunately I cannot find the
version of the code that it is derived from. Our notes here say version
29 but I cannot be sure.
Dan
dan@jumbuk.com
---snip
package Text::PDF::LZWDecode;
use vars qw(@ISA @basedict);
@ISA = qw(Text::PDF::FlateDecode);
@basedict = map { pack( "C", $_ ) } ( 0 .. 255, 0, 0 );
# no warnings qw(uninitialized);
use warnings;
sub new {
my ($class) = @_;
my ($self) = {};
$self->{'indict'} = [@basedict];
$self->{'insize'} = 9;
# $self->{'outfilt'} = Compress::Zlib::deflateInit(); # patent
precludes LZW encoding
bless $self, $class;
}
sub infilt {
my ( $self, $dat, $last ) = @_;
my ( $num, $cache, $cache_size, $res, $count );
$cache = 0;
$cache_size = 0;
$res = '';
$count = 258;
while ( $dat ne '' || $cache_size > 0 ) {
( $num, $cache, $cache_size ) =
$self->read_dat( \$dat, $cache, $cache_size, $self->{'insize'} );
return $res if ( $num == 257 );
if ( $num == 256 ) {
$self->{'indict'} = [@basedict];
$self->{'insize'} = 9;
$count = 258;
next;
}
if ( $count > 258 ) {
( $self->{'indict'}[ $count - 1 ] ) .=
substr( $self->{'indict'}[$num], 0, 1 );
}
if ( $count < 4096 ) {
$self->{'indict'}[$count] = $self->{'indict'}[$num];
}
$count++;
$res .= $self->{'indict'}[$num];
if ( $count >= 4096 ) {
# don't do anything on table full, the encoder tells us when
to clear
}
elsif ( $count == 512 ) { $self->{'insize'} = 10; }
elsif ( $count == 1024 ) { $self->{'insize'} = 11; }
elsif ( $count == 2048 ) { $self->{'insize'} = 12; }
}
return $res;
}