Skip Menu |

This queue is for tickets about the Spreadsheet-XLSX CPAN distribution.

Report information
The Basics
Id: 41042
Status: resolved
Priority: 0/
Queue: Spreadsheet-XLSX

People
Owner: Nobody in particular
Requestors: rob.polocz [...] trackvia.com
Cc:
AdminCc:

Bug Information
Severity: Important
Broken in: (no value)
Fixed in: (no value)



Subject: Suggested code for version 0.05
Perl 5.8.8 on RHEL 4 and 5 Thanks very much for your module! It was a great start! I enhanced your package to support the following: 1) styles and formatted strings (especially Dates) 2) 1900/1904 Date convention 3) uses classes from Spreadsheet::ParseExcel for easy compatibility with existing Excel import code 4) fixed many problems with inconsistencies in importing complex spreadsheets Code attached Put Fmt2007.pm and Utility2007.pm in Spreadsheet::ParseExcel
Subject: Utility2007.pm

Message body is not shown because it is too large.

Subject: Fmt2007.pm
# Spreadsheet::ParseExcel::FmtDefault # by Kawai, Takanori (Hippo2000) 2001.2.2 # This Program is ALPHA version. #============================================================================== package Spreadsheet::ParseExcel::Fmt2007; use strict; use warnings; use Spreadsheet::ParseExcel::Utility2007 qw(ExcelFmt); our $VERSION = '0.05'; # my %hFmtDefault = ( 0x00 => '@', 0x01 => '0', 0x02 => '0.00', 0x03 => '#,##0', 0x04 => '#,##0.00', 0x05 => '($#,##0_);($#,##0)', 0x06 => '($#,##0_);[RED]($#,##0)', 0x07 => '($#,##0.00_);($#,##0.00_)', 0x08 => '($#,##0.00_);[RED]($#,##0.00_)', 0x09 => '0%', 0x0A => '0.00%', 0x0B => '0.00E+00', 0x0C => '# ?/?', 0x0D => '# ??/??', 0x0E => 'm-d-yy', 0x0F => 'd-mmm-yy', 0x10 => 'd-mmm', 0x11 => 'mmm-yy', 0x12 => 'h:mm AM/PM', 0x13 => 'h:mm:ss AM/PM', 0x14 => 'h:mm', 0x15 => 'h:mm:ss', 0x16 => 'm-d-yy h:mm', #0x17-0x24 -- Differs in Natinal 0x25 => '(#,##0_);(#,##0)', 0x26 => '(#,##0_);[RED](#,##0)', 0x27 => '(#,##0.00);(#,##0.00)', 0x28 => '(#,##0.00);[RED](#,##0.00)', 0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)', 0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)', 0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)', 0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)', 0x2D => 'mm:ss', 0x2E => '[h]:mm:ss', 0x2F => 'mm:ss.0', 0x30 => '##0.0E+0', 0x31 => '@', ); #------------------------------------------------------------------------------ # new (for Spreadsheet::ParseExcel::FmtDefault) #------------------------------------------------------------------------------ sub new { my($sPkg, %hKey) = @_; my $oThis={ }; bless $oThis; return $oThis; } #------------------------------------------------------------------------------ # TextFmt (for Spreadsheet::ParseExcel::FmtDefault) #------------------------------------------------------------------------------ sub TextFmt { my($oThis, $sTxt, $sCode) =@_; return $sTxt if((! defined($sCode)) || ($sCode eq '_native_')); return pack('U*', unpack('n*', $sTxt)); } #------------------------------------------------------------------------------ # FmtStringDef (for Spreadsheet::ParseExcel::FmtDefault) #------------------------------------------------------------------------------ sub FmtStringDef { my($oThis, $iFmtIdx, $oBook, $rhFmt) =@_; my $sFmtStr = $oBook->{FormatStr}->{$iFmtIdx}; if(!(defined($sFmtStr)) && defined($rhFmt)) { $sFmtStr = $rhFmt->{$iFmtIdx}; } $sFmtStr = $hFmtDefault{$iFmtIdx} unless($sFmtStr); return $sFmtStr; } #------------------------------------------------------------------------------ # FmtString (for Spreadsheet::ParseExcel::FmtDefault) #------------------------------------------------------------------------------ sub FmtString { my($oThis, $oCell, $oBook) =@_; my $sFmtStr;# = $oThis->FmtStringDef( # $oBook->{Format}[$oCell->{FormatNo}]->{FmtIdx}, $oBook); unless(defined($sFmtStr)) { if ($oCell->{Type} eq 'Numeric') { if($oCell->{Format}){ $sFmtStr=$oCell->{Format}; } elsif(int($oCell->{Val}) != $oCell->{Val}) { $sFmtStr = '0.00'; } else { $sFmtStr = '0'; } } elsif($oCell->{Type} eq 'Date') { if($oCell->{Format}){ $sFmtStr=$oCell->{Format}; } elsif(int($oCell->{Val}) <= 0) { $sFmtStr = 'h:mm:ss'; } else { $sFmtStr = 'm-d-yy'; } } else { $sFmtStr = '@'; } } return $sFmtStr; } #------------------------------------------------------------------------------ # ValFmt (for Spreadsheet::ParseExcel::FmtDefault) #------------------------------------------------------------------------------ sub ValFmt { my($oThis, $oCell, $oBook) =@_; my($Dt, $iFmtIdx, $iNumeric, $Flg1904); if ($oCell->{Type} eq 'Text') { $Dt = ((defined $oCell->{Val}) && ($oCell->{Val} ne ''))? $oThis->TextFmt($oCell->{Val}, $oCell->{Code}):''; } else { $Dt = $oCell->{Val}; } $Flg1904 = $oBook->{Flg1904}; my $sFmtStr = $oThis->FmtString($oCell, $oBook); return ExcelFmt($sFmtStr, $Dt, $Flg1904, $oCell->{Type}); } #------------------------------------------------------------------------------ # ChkType (for Spreadsheet::ParseExcel::FmtDefault) #------------------------------------------------------------------------------ sub ChkType { my($oPkg, $iNumeric, $iFmtIdx) =@_; if ($iNumeric) { if((($iFmtIdx >= 0x0E) && ($iFmtIdx <= 0x16)) || (($iFmtIdx >= 0x2D) && ($iFmtIdx <= 0x2F))) { return "Date"; } else { return "Numeric"; } } else { return "Text"; } } 1;
Subject: XLSX.pm
package Spreadsheet::XLSX; use 5.008008; use strict; use warnings; our @ISA = qw(); our $VERSION = '0.03'; use Archive::Zip; use Spreadsheet::ParseExcel::FmtDefault; use Data::Dumper; ################################################################################ sub new { my ($class, $filename, $converter) = @_; my $self = {}; $self -> {zip} = Archive::Zip -> new ($filename) or die ("Cant't open $filename as a zip file\n"); my $member_shared_strings = $self -> {zip} -> memberNamed ('xl/sharedStrings.xml'); my @shared_strings = (); if ($member_shared_strings) { my $mstr = $member_shared_strings->contents; $mstr =~ s/<t\/>/<t><\/t>/gsm; # this handles an empty t tag in the xml <t/> #foreach my $t ($member_shared_strings -> contents =~ /t\>([^\<]*)\<\/t/gsm) { foreach my $t ($mstr =~ /t>(.*?)<\/t/gsm) { $t = $converter -> convert ($t) if $converter; push @shared_strings, $t; } } my $member_styles = $self -> {zip} -> memberNamed ('xl/styles.xml'); my @styles = (); my %style_info = (); if ($member_styles) { foreach my $t ($member_styles -> contents =~ /xf\ numFmtId="([^"]*)"(?!.*\/cellStyleXfs)/gsm) { # $t = $converter -> convert ($t) if $converter; push @styles, $t; } my $default = $1; foreach my $t1 (@styles){ $member_styles -> contents =~ /numFmtId="$t1" formatCode="([^"]*)/; my $formatCode=$1; if ($formatCode eq $default || not($formatCode)){ if ($t1 == 9 || $t1==10){ $formatCode="0.00000%";} elsif ($t1 == 14){ $formatCode="m-d-yy";} else { $formatCode=""; } } $style_info{$t1} = $formatCode; $default=$1; } } my $member_workbook = $self -> {zip} -> memberNamed ('xl/workbook.xml') or die ("xl/workbook.xml not found in this zip\n"); my $oBook = Spreadsheet::ParseExcel::Workbook->new; $oBook->{SheetCount} = 0; $oBook->{FmtClass} = Spreadsheet::ParseExcel::Fmt2007->new; $oBook->{Flg1904}=0; if ($member_workbook->contents =~ /date1904="1"/){ $oBook->{Flg1904}=1; } my @Worksheet = (); foreach ($member_workbook -> contents =~ /\<(.*?)\/?\>/g) { /^(\w+)\s+/; my ($tag, $other) = ($1, $'); my @pairs = split /\" /, $other; $tag eq 'sheet' or next; my $sheet = { MaxRow => 0, MaxCol => 0, MinRow => 1000000, MinCol => 1000000, }; foreach ($other =~ /(\S+=".*?")/gsm) { my ($k, $v) = split /=?"/; if ($k eq 'name') { $sheet -> {Name} = $v; $sheet -> {Name} = $converter -> convert ($sheet -> {Name}) if $converter; } elsif ($k eq 'r:id') { ($sheet -> {Id}) = $v =~ m{rId(\d+)}; }; } my $wsheet = Spreadsheet::ParseExcel::Worksheet->new(%$sheet); push @Worksheet, $wsheet; $oBook->{Worksheet}[$oBook->{SheetCount}] = $wsheet; $oBook->{SheetCount}+=1; } $self -> {Worksheet} = \@Worksheet; foreach my $sheet (@Worksheet) { my $member_name = "xl/worksheets/sheet$sheet->{Id}.xml"; my $member_sheet = $self -> {zip} -> memberNamed ($member_name) or die ("$member_name not found in this zip\n"); my ($row, $col); my $flag = 0; my $s = 0; my $s2 = 0; my $sty = 0; foreach ($member_sheet -> contents =~ /(\<.*?\/?\>|.*?(?=\<))/g) { if (/^\<c r=\"([A-Z])([A-Z]?)(\d+)\"/) { $col = ord ($1) - 65; if ($2) { $col++; $col *= 26; $col += (ord ($2) - 65); } $row = $3 - 1; $s = /t=\"s\"/ ? 1 : 0; $s2 = /t=\"str\"/ ? 1 : 0; /s="([^"]*)"/; $sty = $1>0 ? $1 : 0 ; } elsif (/^<v/) { $flag = 1; } elsif (/^<\/v/) { $flag = 0; } elsif (length ($_) && $flag) { my $v = $s ? $shared_strings [$_] : $_; if ($v eq "</c>"){$v="";} my $type = "Text"; my $thisstyle = ""; if (not($s) && not($s2)){ $type="Numeric"; $thisstyle = $style_info{$styles[$sty]}; if ($thisstyle =~ /(?<!Re)d|m|y/){ $type="Date"; } } $sheet -> {MaxRow} = $row if $sheet -> {MaxRow} < $row; $sheet -> {MaxCol} = $col if $sheet -> {MaxCol} < $col; $sheet -> {MinRow} = $row if $sheet -> {MinRow} > $row; $sheet -> {MinCol} = $col if $sheet -> {MinCol} > $col; if ($v =~ /(.*)E\-(.*)/gsm && $type eq "Numeric"){ $v=$1/(10**$2); # this handles scientific notation for very small numbers } my $cell =Spreadsheet::ParseExcel::Cell->new( Val => $v, Format => $thisstyle, Type => $type ); $cell->{_Value} = $oBook->{FmtClass}->ValFmt($cell, $oBook); if ($type eq "Date" && $v<1){ #then this is Excel time field $cell->{Type}="Text"; $cell->{Val}=$cell->{_Value}; } $sheet -> {Cells} [$row] [$col] = $cell; } } $sheet -> {MinRow} = 0 if $sheet -> {MinRow} > $sheet -> {MaxRow}; $sheet -> {MinCol} = 0 if $sheet -> {MinCol} > $sheet -> {MaxCol}; } foreach my $stys (keys %style_info){ } bless ($self, $class); return $oBook; } 1; __END__ =head1 NAME Spreadsheet::XLSX - Perl extension for reading MS Excel 2007 files; =head1 SYNOPSIS use Text::Iconv; my $converter = Text::Iconv -> new ("utf-8", "windows-1251"); # Text::Iconv is not really required. # This can be any object with the convert method. Or nothing. use Spreadsheet::XLSX; my $excel = Spreadsheet::XLSX -> new ('test.xlsx', $converter); foreach my $sheet (@{$excel -> {Worksheet}}) { printf("Sheet: %s\n", $sheet->{Name}); $sheet -> {MaxRow} ||= $sheet -> {MinRow}; foreach my $row ($sheet -> {MinRow} .. $sheet -> {MaxRow}) { $sheet -> {MaxCol} ||= $sheet -> {MinCol}; foreach my $col ($sheet -> {MinCol} .. $sheet -> {MaxCol}) { my $cell = $sheet -> {Cells} [$row] [$col]; if ($cell) { printf("( %s , %s ) => %s\n", $row, $col, $cell -> {Val}); } } } } =head1 DESCRIPTION This module is a (quick and dirty) emulation of Spreadsheet::ParseExcel for Excel 2007 (.xlsx) file format. =head1 SEE ALSO =over 2 =item Text::CSV_XS, Text::CSV_PP http://search.cpan.org/~hmbrand/ A pure perl version is available on http://search.cpan.org/~makamaka/ =item Spreadsheet::ParseExcel http://search.cpan.org/~kwitknr/ =item Spreadsheet::ReadSXC http://search.cpan.org/~terhechte/ =item Spreadsheet::BasicRead http://search.cpan.org/~gng/ for xlscat likewise functionality (Excel only) =item Spreadsheet::ConvertAA http://search.cpan.org/~nkh/ for an alternative set of cell2cr () / cr2cell () pair =item Spreadsheet::Perl http://search.cpan.org/~nkh/ offers a Pure Perl implementation of a spreadsheet engine. Users that want this format to be supported in Spreadsheet::Read are hereby motivated to offer patches. It's not high on my todo-list. =item xls2csv http://search.cpan.org/~ken/ offers an alternative for my C<xlscat -c>, in the xls2csv tool, but this tool focusses on character encoding transparency, and requires some other modules. =item Spreadsheet::Read http://search.cpan.org/~hmbrand/ read the data from a spreadsheet (interface module) =back =head1 AUTHOR Dmitry Ovsyanko, E<lt>do@eludia.ru<gt>, http://eludia.ru/wiki/ Patches by: Steve Simms Joerg Meltzer =head1 COPYRIGHT AND LICENSE Copyright (C) 2008 by Dmitry Ovsyanko This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut