Subject: | Suggested code for version 0.05 |
Perl 5.8.8 on RHEL 4 and 5
Thanks very much for your module! It was a great start!
I enhanced your package to support the following:
1) styles and formatted strings (especially Dates)
2) 1900/1904 Date convention
3) uses classes from Spreadsheet::ParseExcel for easy compatibility with
existing Excel import code
4) fixed many problems with inconsistencies in importing complex
spreadsheets
Code attached
Put Fmt2007.pm and Utility2007.pm in Spreadsheet::ParseExcel
Subject: | Utility2007.pm |
Message body is not shown because it is too large.
Subject: | Fmt2007.pm |
# Spreadsheet::ParseExcel::FmtDefault
# by Kawai, Takanori (Hippo2000) 2001.2.2
# This Program is ALPHA version.
#==============================================================================
package Spreadsheet::ParseExcel::Fmt2007;
use strict;
use warnings;
use Spreadsheet::ParseExcel::Utility2007 qw(ExcelFmt);
our $VERSION = '0.05'; #
my %hFmtDefault = (
0x00 => '@',
0x01 => '0',
0x02 => '0.00',
0x03 => '#,##0',
0x04 => '#,##0.00',
0x05 => '($#,##0_);($#,##0)',
0x06 => '($#,##0_);[RED]($#,##0)',
0x07 => '($#,##0.00_);($#,##0.00_)',
0x08 => '($#,##0.00_);[RED]($#,##0.00_)',
0x09 => '0%',
0x0A => '0.00%',
0x0B => '0.00E+00',
0x0C => '# ?/?',
0x0D => '# ??/??',
0x0E => 'm-d-yy',
0x0F => 'd-mmm-yy',
0x10 => 'd-mmm',
0x11 => 'mmm-yy',
0x12 => 'h:mm AM/PM',
0x13 => 'h:mm:ss AM/PM',
0x14 => 'h:mm',
0x15 => 'h:mm:ss',
0x16 => 'm-d-yy h:mm',
#0x17-0x24 -- Differs in Natinal
0x25 => '(#,##0_);(#,##0)',
0x26 => '(#,##0_);[RED](#,##0)',
0x27 => '(#,##0.00);(#,##0.00)',
0x28 => '(#,##0.00);[RED](#,##0.00)',
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)',
0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)',
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)',
0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)',
0x2D => 'mm:ss',
0x2E => '[h]:mm:ss',
0x2F => 'mm:ss.0',
0x30 => '##0.0E+0',
0x31 => '@',
);
#------------------------------------------------------------------------------
# new (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub new {
my($sPkg, %hKey) = @_;
my $oThis={
};
bless $oThis;
return $oThis;
}
#------------------------------------------------------------------------------
# TextFmt (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub TextFmt {
my($oThis, $sTxt, $sCode) =@_;
return $sTxt if((! defined($sCode)) || ($sCode eq '_native_'));
return pack('U*', unpack('n*', $sTxt));
}
#------------------------------------------------------------------------------
# FmtStringDef (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub FmtStringDef {
my($oThis, $iFmtIdx, $oBook, $rhFmt) =@_;
my $sFmtStr = $oBook->{FormatStr}->{$iFmtIdx};
if(!(defined($sFmtStr)) && defined($rhFmt)) {
$sFmtStr = $rhFmt->{$iFmtIdx};
}
$sFmtStr = $hFmtDefault{$iFmtIdx} unless($sFmtStr);
return $sFmtStr;
}
#------------------------------------------------------------------------------
# FmtString (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub FmtString {
my($oThis, $oCell, $oBook) =@_;
my $sFmtStr;# = $oThis->FmtStringDef(
# $oBook->{Format}[$oCell->{FormatNo}]->{FmtIdx}, $oBook);
unless(defined($sFmtStr)) {
if ($oCell->{Type} eq 'Numeric') {
if($oCell->{Format}){
$sFmtStr=$oCell->{Format};
} elsif(int($oCell->{Val}) != $oCell->{Val}) {
$sFmtStr = '0.00';
}
else {
$sFmtStr = '0';
}
}
elsif($oCell->{Type} eq 'Date') {
if($oCell->{Format}){
$sFmtStr=$oCell->{Format};
} elsif(int($oCell->{Val}) <= 0) {
$sFmtStr = 'h:mm:ss';
}
else {
$sFmtStr = 'm-d-yy';
}
}
else {
$sFmtStr = '@';
}
}
return $sFmtStr;
}
#------------------------------------------------------------------------------
# ValFmt (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub ValFmt {
my($oThis, $oCell, $oBook) =@_;
my($Dt, $iFmtIdx, $iNumeric, $Flg1904);
if ($oCell->{Type} eq 'Text') {
$Dt = ((defined $oCell->{Val}) && ($oCell->{Val} ne ''))?
$oThis->TextFmt($oCell->{Val}, $oCell->{Code}):'';
}
else {
$Dt = $oCell->{Val};
}
$Flg1904 = $oBook->{Flg1904};
my $sFmtStr = $oThis->FmtString($oCell, $oBook);
return ExcelFmt($sFmtStr, $Dt, $Flg1904, $oCell->{Type});
}
#------------------------------------------------------------------------------
# ChkType (for Spreadsheet::ParseExcel::FmtDefault)
#------------------------------------------------------------------------------
sub ChkType {
my($oPkg, $iNumeric, $iFmtIdx) =@_;
if ($iNumeric) {
if((($iFmtIdx >= 0x0E) && ($iFmtIdx <= 0x16)) ||
(($iFmtIdx >= 0x2D) && ($iFmtIdx <= 0x2F))) {
return "Date";
}
else {
return "Numeric";
}
}
else {
return "Text";
}
}
1;
Subject: | XLSX.pm |
package Spreadsheet::XLSX;
use 5.008008;
use strict;
use warnings;
our @ISA = qw();
our $VERSION = '0.03';
use Archive::Zip;
use Spreadsheet::ParseExcel::FmtDefault;
use Data::Dumper;
################################################################################
sub new {
my ($class, $filename, $converter) = @_;
my $self = {};
$self -> {zip} = Archive::Zip -> new ($filename) or die ("Cant't open $filename as a zip file\n");
my $member_shared_strings = $self -> {zip} -> memberNamed ('xl/sharedStrings.xml');
my @shared_strings = ();
if ($member_shared_strings) {
my $mstr = $member_shared_strings->contents;
$mstr =~ s/<t\/>/<t><\/t>/gsm; # this handles an empty t tag in the xml <t/>
#foreach my $t ($member_shared_strings -> contents =~ /t\>([^\<]*)\<\/t/gsm) {
foreach my $t ($mstr =~ /t>(.*?)<\/t/gsm) {
$t = $converter -> convert ($t) if $converter;
push @shared_strings, $t;
}
}
my $member_styles = $self -> {zip} -> memberNamed ('xl/styles.xml');
my @styles = ();
my %style_info = ();
if ($member_styles) {
foreach my $t ($member_styles -> contents =~ /xf\ numFmtId="([^"]*)"(?!.*\/cellStyleXfs)/gsm) {
# $t = $converter -> convert ($t) if $converter;
push @styles, $t;
}
my $default = $1;
foreach my $t1 (@styles){
$member_styles -> contents =~ /numFmtId="$t1" formatCode="([^"]*)/;
my $formatCode=$1;
if ($formatCode eq $default || not($formatCode)){
if ($t1 == 9 || $t1==10){ $formatCode="0.00000%";}
elsif ($t1 == 14){ $formatCode="m-d-yy";}
else {
$formatCode="";
}
}
$style_info{$t1} = $formatCode;
$default=$1;
}
}
my $member_workbook = $self -> {zip} -> memberNamed ('xl/workbook.xml') or die ("xl/workbook.xml not found in this zip\n");
my $oBook = Spreadsheet::ParseExcel::Workbook->new;
$oBook->{SheetCount} = 0;
$oBook->{FmtClass} = Spreadsheet::ParseExcel::Fmt2007->new;
$oBook->{Flg1904}=0;
if ($member_workbook->contents =~ /date1904="1"/){
$oBook->{Flg1904}=1;
}
my @Worksheet = ();
foreach ($member_workbook -> contents =~ /\<(.*?)\/?\>/g) {
/^(\w+)\s+/;
my ($tag, $other) = ($1, $');
my @pairs = split /\" /, $other;
$tag eq 'sheet' or next;
my $sheet = {
MaxRow => 0,
MaxCol => 0,
MinRow => 1000000,
MinCol => 1000000,
};
foreach ($other =~ /(\S+=".*?")/gsm) {
my ($k, $v) = split /=?"/;
if ($k eq 'name') {
$sheet -> {Name} = $v;
$sheet -> {Name} = $converter -> convert ($sheet -> {Name}) if $converter;
}
elsif ($k eq 'r:id') {
($sheet -> {Id}) = $v =~ m{rId(\d+)};
};
}
my $wsheet = Spreadsheet::ParseExcel::Worksheet->new(%$sheet);
push @Worksheet, $wsheet;
$oBook->{Worksheet}[$oBook->{SheetCount}] = $wsheet;
$oBook->{SheetCount}+=1;
}
$self -> {Worksheet} = \@Worksheet;
foreach my $sheet (@Worksheet) {
my $member_name = "xl/worksheets/sheet$sheet->{Id}.xml";
my $member_sheet = $self -> {zip} -> memberNamed ($member_name) or die ("$member_name not found in this zip\n");
my ($row, $col);
my $flag = 0;
my $s = 0;
my $s2 = 0;
my $sty = 0;
foreach ($member_sheet -> contents =~ /(\<.*?\/?\>|.*?(?=\<))/g) {
if (/^\<c r=\"([A-Z])([A-Z]?)(\d+)\"/) {
$col = ord ($1) - 65;
if ($2) {
$col++;
$col *= 26;
$col += (ord ($2) - 65);
}
$row = $3 - 1;
$s = /t=\"s\"/ ? 1 : 0;
$s2 = /t=\"str\"/ ? 1 : 0;
/s="([^"]*)"/;
$sty = $1>0 ? $1 : 0 ;
}
elsif (/^<v/) {
$flag = 1;
}
elsif (/^<\/v/) {
$flag = 0;
}
elsif (length ($_) && $flag) {
my $v = $s ? $shared_strings [$_] : $_;
if ($v eq "</c>"){$v="";}
my $type = "Text";
my $thisstyle = "";
if (not($s) && not($s2)){
$type="Numeric";
$thisstyle = $style_info{$styles[$sty]};
if ($thisstyle =~ /(?<!Re)d|m|y/){
$type="Date";
}
}
$sheet -> {MaxRow} = $row if $sheet -> {MaxRow} < $row;
$sheet -> {MaxCol} = $col if $sheet -> {MaxCol} < $col;
$sheet -> {MinRow} = $row if $sheet -> {MinRow} > $row;
$sheet -> {MinCol} = $col if $sheet -> {MinCol} > $col;
if ($v =~ /(.*)E\-(.*)/gsm && $type eq "Numeric"){
$v=$1/(10**$2); # this handles scientific notation for very small numbers
}
my $cell =Spreadsheet::ParseExcel::Cell->new(
Val => $v,
Format => $thisstyle,
Type => $type
);
$cell->{_Value} = $oBook->{FmtClass}->ValFmt($cell, $oBook);
if ($type eq "Date" && $v<1){ #then this is Excel time field
$cell->{Type}="Text";
$cell->{Val}=$cell->{_Value};
}
$sheet -> {Cells} [$row] [$col] = $cell;
}
}
$sheet -> {MinRow} = 0 if $sheet -> {MinRow} > $sheet -> {MaxRow};
$sheet -> {MinCol} = 0 if $sheet -> {MinCol} > $sheet -> {MaxCol};
}
foreach my $stys (keys %style_info){
}
bless ($self, $class);
return $oBook;
}
1;
__END__
=head1 NAME
Spreadsheet::XLSX - Perl extension for reading MS Excel 2007 files;
=head1 SYNOPSIS
use Text::Iconv;
my $converter = Text::Iconv -> new ("utf-8", "windows-1251");
# Text::Iconv is not really required.
# This can be any object with the convert method. Or nothing.
use Spreadsheet::XLSX;
my $excel = Spreadsheet::XLSX -> new ('test.xlsx', $converter);
foreach my $sheet (@{$excel -> {Worksheet}}) {
printf("Sheet: %s\n", $sheet->{Name});
$sheet -> {MaxRow} ||= $sheet -> {MinRow};
foreach my $row ($sheet -> {MinRow} .. $sheet -> {MaxRow}) {
$sheet -> {MaxCol} ||= $sheet -> {MinCol};
foreach my $col ($sheet -> {MinCol} .. $sheet -> {MaxCol}) {
my $cell = $sheet -> {Cells} [$row] [$col];
if ($cell) {
printf("( %s , %s ) => %s\n", $row, $col, $cell -> {Val});
}
}
}
}
=head1 DESCRIPTION
This module is a (quick and dirty) emulation of Spreadsheet::ParseExcel for
Excel 2007 (.xlsx) file format.
=head1 SEE ALSO
=over 2
=item Text::CSV_XS, Text::CSV_PP
http://search.cpan.org/~hmbrand/
A pure perl version is available on http://search.cpan.org/~makamaka/
=item Spreadsheet::ParseExcel
http://search.cpan.org/~kwitknr/
=item Spreadsheet::ReadSXC
http://search.cpan.org/~terhechte/
=item Spreadsheet::BasicRead
http://search.cpan.org/~gng/ for xlscat likewise functionality (Excel only)
=item Spreadsheet::ConvertAA
http://search.cpan.org/~nkh/ for an alternative set of cell2cr () /
cr2cell () pair
=item Spreadsheet::Perl
http://search.cpan.org/~nkh/ offers a Pure Perl implementation of a
spreadsheet engine. Users that want this format to be supported in
Spreadsheet::Read are hereby motivated to offer patches. It's not high
on my todo-list.
=item xls2csv
http://search.cpan.org/~ken/ offers an alternative for my C<xlscat -c>,
in the xls2csv tool, but this tool focusses on character encoding
transparency, and requires some other modules.
=item Spreadsheet::Read
http://search.cpan.org/~hmbrand/ read the data from a spreadsheet (interface
module)
=back
=head1 AUTHOR
Dmitry Ovsyanko, E<lt>do@eludia.ru<gt>, http://eludia.ru/wiki/
Patches by:
Steve Simms
Joerg Meltzer
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2008 by Dmitry Ovsyanko
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.
=cut