Subject: | Adding support for Sources, compressed files and filed selection |
distribution = Parse-Debian-Packages-0.01
perl -v = This is perl, v5.8.7 built for i486-linux-gnu-thread-multi
uname -a = Linux thorr 2.6.12+thorr.2 #1 Thu Jul 28 17:01:21 CEST 2005 i686 GNU/Linux
You can now pass a filename to the constructor, the advantage of thef ilename is that you can parse plain/text, gziped or bziped files.
I add the parsing of Sources files with
$source{Files}{$filename} = { size =>
MD5sum =>
}
new() can return undef if the file does not exist of is not in supported format or can not be open.
You can ignore some fileds with ignore_fileds() method, it take a list
of filed names you find in Packages or Sources files, with two special
fileds:
- body which correspond to the long description of Packages,
- reset to reset the list of ignored fileds, this one must be the
first argument of ignore_fileds() call.
--- Packages.pm.old 2005-08-26 02:58:55.000000000 +0200
+++ Packages.pm 2005-08-26 19:02:21.000000000 +0200
@@ -1,25 +1,70 @@
use strict;
package Parse::Debian::Packages;
-our $VERSION = '0.01';
+our $VERSION = "0.02";
+
+use File::MMagic;
+use FileHandle;
+use Compress::Zlib;
+use Compress::Bzip2;
sub new {
my $class = shift;
- my $fh = shift;
+ my $file = shift;
+ my $fh;
- return bless { fh => $fh }, $class;
+ if (! ref $file) {
+ # Caller give us a filename
+ return undef unless -f $file;
+
+ # Default magic is ok for application/x-gzip application/x-bzip2 and text/plain
+ my $magic = File::MMagic->new();
+ my $type = $magic->checktype_filename($file);
+
+ SWITCH: for ($type) {
+ /text\/plain/ && do {
+ $fh = new FileHandle;
+ $fh->open("< $file") or return undef;
+ last;
+ };
+
+ /application\/x-gzip/ && do {
+ $fh = gzopen ($file, "rb") or return undef;
+ last;
+ };
+
+ /application\/x-bzip2/ && do {
+ $fh = bzopen ($file, "rb") or return undef;
+ last;
+ };
+ # It's not a supported file format
+ return undef;
+ }
+ return bless { FH => $fh, FIELDS_IGNORED => {}}, $class;
+ } else {
+ return bless { FH => $file, FIELDS_IGNORED => {}}, $class;
+ }
}
sub next {
my $self = shift;
- my $fh = $self->{fh};
my %parsed;
- while (<$fh>) {
+ my ($key, $value);
+ my ($md5, $size, $filename);
+
+ while ($_ = $self->{FH}->getline) {
last if /^$/;
- if (my ($key, $value) = m/^(.*): (.*)/) {
- $parsed{$key} = $value;
- }
- else {
+
+ if (($key, $value) = m/^([^\s:]*):\s?(.*)/) {
+ # Do not add an empty Files key when parsing Sources
+ $parsed{$key} = $value unless $key eq "Files"
+ or exists $self->{FIELDS_IGNORED}->{$key};
+
+ } elsif (!exists $self->{FIELDS_IGNORED}->{Files}
+ and ($md5, $size, $filename) = /^\s(\w{32})\s(\d+)\s(.*)/) {
+ $parsed{Files}{$filename} = { size => $size, MD5sum => $md5 };
+ } elsif (! /^\s\w{32}\s\d+\s.*/
+ and !exists $self->{FIELDS_IGNORED}->{body}) { # Do not include Sources Files as body
s/ //;
s/^\.$//;
$parsed{body} .= $_;
@@ -29,7 +74,31 @@
return %parsed;
}
-1;
+sub ignore_fileds {
+ my $self = shift;
+ if (@_) {
+ return $self->{FIELDS_IGNORED} = {} if (@_[0] eq "reset");
+
+ return map { $self->{FIELDS_IGNORED}->{$_} = 1
+ unless exists $self->{FIELDS_IGNORED}->{$_} } @_;
+ } else {
+ return sort keys %{$self->{FIELDS_IGNORED}};
+ }
+}
+
+## getline like with FileHandle
+sub Compress::Zlib::getline {
+ my ($self, $line) = shift;
+ $self->gzreadline($line) ? return $line : return '';
+}
+
+sub Compress::Bzip2::getline {
+ my ($self, $line) = shift;
+ $self->bzreadline($line) ? return $line : return '';
+}
+
+
+1
=head1 NAME
@@ -40,24 +109,58 @@
use YAML;
use IO::File;
+ use FileHandle;
use Parse::Debian::Packages;
- my $fh = IO::File->new("Packages");
- my $parser = Parse::Debian::Packages->new( $fh );
- while (my %package = $parser->next) {
+ my $pkg_file = "Packages";
+ my $src_file = "Sources";
+ my $other_src_file = "Sources.bz2";
+
+ my $fh_io = IO::File->new($pkg_file);
+ my $fh_FH = new FileHandle;
+ $fh_FH->open("< $src_file");
+
+ my $parser_on_io = Parse::Debian::Packages->new( $fh_io );
+ my $parser_on_FH = Parse::Debian::Packages->new( $fh_FH );
+ my $parser_on_filename = Parse::Debian::Packages->new( $other_src_file );
+
+ parser_on_io->ignore_fileds("Description", "body");
+ parser_on_FH->ignore_fileds("Build-Depends", "Files");
+
+ my %pkg_with_io = $parser_on_io->next;
+ my %pkg_with_FH = $parser_on_FH->next;
+ my %pkg_with_filename = $parser_on_filename->next;
+
+ print Dump \%pkg_with_io;
+ print Dump \%pkg_with_FH;
+ print Dump \%pkg_with_filename;
+
+ while (my %package = $parser_on_io->next) {
print Dump \%package;
}
=head1 DESCRIPTION
-This module parses the Packages files used by the debian package
-management tools.
+This module parses the Packages and Sources files used by the debian
+package management tools.
It presents itself as an iterator. Each call of the ->next method
will return the next package found in the file.
-For laziness, we take a filehandle in to the constructor. Please open
-the file for us.
+You can pass a FileHandle to the constructor or a filename, the
+advantage of the filename is that you can parse plain/text, gziped or
+bziped files.
+
+If the filename passed to the constructor doesn't repressent a file in
+supported format (text/plain, application/x-gzip, application/x-bzip2)
+or if that file can not be open, new() return undef.
+
+You can ignore some fileds with ignore_fileds() method, it take a list
+of filed names you find in Packages or Sources files, with two special
+filed:
+ - body which correspond to the long description of Packages,
+ - reset to reset the list of ignored fileds, this one must be the
+ first argument of ignore_fileds() call.
=head1 AUTHOR