Subject: | [PATCH] Archive::Tar->iter |
The attached patch relative to Archive-Tar-1.38 implements the 'iter'
class method which returns an iterator function that reads the tar file
without loading it all in memory. Please consider including it for the
next release.
Regards,
Gisle
Subject: | iter.patch |
commit fe230a1c0cff94078705c6b69a50835cf0ee5852
Author: Gisle Aas <gisle@aas.no>
Date: Tue Aug 19 12:28:12 2008 +0200
Add Archive::Tar->iter() method.
diff --git a/lib/Archive/Tar.pm b/lib/Archive/Tar.pm
index 508bcfe..c6fbfd8 100644
--- a/lib/Archive/Tar.pm
+++ b/lib/Archive/Tar.pm
@@ -153,6 +153,11 @@ all options are case-sensitive.
Do not read more than C<limit> files. This is useful if you have
very big archives, and are only interested in the first few files.
+=item filter
+
+Can be set to a regular expression. Only files with names that match
+the expression will be read.
+
=item extract
If set to true, immediately extract entries when reading them. This
@@ -237,6 +242,7 @@ sub _read_tar {
my $opts = shift || {};
my $count = $opts->{limit} || 0;
+ my $filter = $opts->{filter};
my $extract = $opts->{extract} || 0;
### set a cap on the amount of files to extract ###
@@ -372,6 +378,10 @@ sub _read_tar {
undef $real_name;
}
+ if ($filter && $entry->name !~ $filter) {
+ next LOOP;
+ }
+
$self->_extract_file( $entry ) if $extract
&& !$entry->is_longlink
&& !$entry->is_unknown
@@ -1431,6 +1441,58 @@ sub create_archive {
return $tar->write( $file, $gzip );
}
+=head2 Archive::Tar->iter( $filename, {opt => $val} )
+
+Returns an iterator function that reads the tar file without loading
+it all in memory. Each time the function is called it will return the
+next file in the tarball. The files are returned as
+C<Archive::Tar::File> objects. The iterator function returns the
+empty list once it has exhausted the the files contained.
+
+The second argument can be a hash reference with options. Note that
+all options are case-sensitive.
+
+=over 4
+
+=item filter
+
+Can be set to a regular expression. Only files with names that match
+the expression will be returned (or extracted).
+
+=item extract
+
+If set to true, extract the files strait to disk. This lowers the
+memory requirement even further.
+
+=back
+
+Example usage:
+
+ my $next = Archive::Tar->iter("example.tar.gz", {filter => qr/\.pm$/});
+ while (my $f = $next->()) {
+ print $f->name, "\n";
+ # ...
+ }
+
+=cut
+
+sub iter {
+ my($class, $filename, $opt) = @_;
+ my %opt;
+ %opt = %$opt if $opt; # copy
+ my $handle = $class->_get_handle($filename, delete $opt{compressed}, READ_ONLY->( ZLIB ));
+ $opt{limit} = 1;
+ my @data;
+ return sub {
+ return shift(@data) if @data;
+ return unless $handle;
+ @data = @{$class->_read_tar($handle, \%opt)};
+ return shift(@data) if @data;
+ undef($handle);
+ return;
+ };
+}
+
=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties])
Returns a list of the names of all the files in the archive. The
diff --git a/t/05_iter.t b/t/05_iter.t
new file mode 100644
index 0000000..48f6971
--- /dev/null
+++ b/t/05_iter.t
@@ -0,0 +1,33 @@
+BEGIN {
+ if( $ENV{PERL_CORE} ) {
+ chdir '../lib/Archive/Tar' if -d '../lib/Archive/Tar';
+ }
+ use lib '../../..';
+}
+
+BEGIN { chdir 't' if -d 't' }
+
+use Test::More tests => 5;
+use strict;
+use lib '../lib';
+
+use Archive::Tar;
+
+my @names;
+my $next = Archive::Tar->iter("src/long/bar.tar");
+while (my $f = $next->()) {
+ push(@names, $f->name);
+}
+
+is(@names, 5);
+is($names[0], "c");
+is($names[4], "directory/really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-really-long-directory-name/myfile");
+
+@names = ();
+$next = Archive::Tar->iter("src/long/bar.tar", {filter => qr/^c/});
+while (my $f = $next->()) {
+ push(@names, $f->name);
+}
+
+is(@names, 1);
+is($names[0], "c");