Here's a patch that implements that.
Index: t/02_main.t
===================================================================
--- t/02_main.t (revision 15684)
+++ t/02_main.t (working copy)
@@ -8,13 +8,15 @@
$^W = 1;
}
-use Test::More tests => 73;
+use Test::More tests => 83;
use File::Spec::Functions ':ALL';
use Parse::CSV;
my $readfile = catfile( 't', 'data', 'simple.csv' );
ok( -f $readfile, "$readfile exists" );
+my $readfile2 = catfile( 't', 'data', 'newlines.csv' );
+ok( -f $readfile2, "$readfile2 exists" );
@@ -57,10 +59,34 @@
is( $csv->errstr, '', '->errstr returns "" still' );
}
+SCOPE: {
+ my $csv = Parse::CSV->new(
+ file => $readfile2,
+ );
+ # Pull the first line
+ my $line = $csv->fetch;
+ is_deeply( $line, [ qw{a b c d e} ], '->fetch returns as expected' );
+ is( $csv->row, 1, '->row returns 1' );
+ is( $csv->errstr, '', '->errstr returns ""' );
+ # Pull the second line
+ $line = $csv->fetch;
+ is_deeply( $line, [ "this", "\nis\n", "also", "a", "sample with some\nembedded newlines\nin it" ], '->fetch returns as expected' );
+ is( $csv->row, 2, '->row returns 2' );
+ is( $csv->errstr, '', '->errstr returns ""' );
+
+ # Pull the third line
+ $line = $csv->fetch;
+ is_deeply( $line, [ qw{1 2 3 4.5 5} ], '->fetch returns as expected' );
+ is( $csv->row, 3, '->row returns 3' );
+ is( $csv->errstr, '', '->errstr returns ""' );
+}
+
+
+
#####################################################################
# Test fields
Index: t/data/newlines.csv
===================================================================
--- t/data/newlines.csv (revision 0)
+++ t/data/newlines.csv (revision 0)
@@ -0,0 +1,7 @@
+a,b,c,d,e
+this,"
+is
+",also,a,"sample with some
+embedded newlines
+in it"
+1,2,3,4.5,5
\ No newline at end of file
Index: lib/Parse/CSV.pm
===================================================================
--- lib/Parse/CSV.pm (revision 15684)
+++ lib/Parse/CSV.pm (working copy)
@@ -209,7 +209,8 @@
# Seperate the Text::CSV attributes
unless ( Params::Util::_HASH0($self->{csv_attr}) ) {
- $self->{csv_attr} = {};
+ $self->{csv_attr} = {binary => 1}; # Suggested by Text::CSV_XS docs to always be on
+ # XXX it would be nice to not have this list hard-coded.
foreach ( qw{quote_char eol escape_char sep_char binary always_quote} ) {
next unless exists $self->{$_};
$self->{csv_attr}->{$_} = delete $self->{$_};
@@ -230,21 +231,7 @@
# Handle automatic field names
if ( Params::Util::_STRING($self->{names}) and $self->{names} ) {
# Grab the first line
- my $line = $self->_getline;
- unless ( defined $line ) {
- Carp::croak("Failed to get header line from CSV");
- }
-
- # Parse the line into columns
- unless ( $self->{csv_xs}->parse($line) ) {
- Carp::croak(
- "Failed to parse header line from CSV: "
- . $self->{csv_xs}->error_input
- );
- }
-
- # Turn the array ref into a hash if needed
- $self->{names} = [ $self->{csv_xs}->fields ];
+ $self->{names} = $self->_getline;
}
# Check names
@@ -303,17 +290,11 @@
# The filter can skip rows,
# iterate till we get something.
- while ( defined(my $line = $self->_getline) ) {
- # Parse the line into columns
- unless ( $self->{csv_xs}->parse($line) ) {
- $self->{errstr} = "Failed to parse row $self->{row}";
- return undef;
- }
-
+ while ( my $row = $self->_getline ) {
# Turn the array ref into a hash if needed
my $rv = undef;
my $names = $self->{names};
- my @cols = $self->{csv_xs}->fields;
+ my @cols = @$row;
if ( $names ) {
$rv = {};
foreach ( 0 .. $#$names ) {
@@ -350,17 +331,10 @@
my $self = shift;
$self->{errstr} = '';
- # Fetch the next file line
- my $handle = $self->{handle};
- my $line = <$handle>;
- unless ( defined $line ) {
- $self->{errstr} = $handle->eof ? '' : $!;
- return undef;
- }
-
- # Parse the line into columns
- $self->{row}++;
- return $line;
+ my $row = $self->{csv_xs}->getline( $self->{handle} );
+ $self->{row}++ if defined $row;
+ $self->{savedrow} = $row;
+ return $row;
}
=pod
@@ -406,7 +380,9 @@
=cut
sub string {
- shift->{csv_xs}->string(@_);
+ my $self = shift;
+ $self->{csv_xs}->combine(@{$self->{savedrow}});
+ $self->{csv_xs}->string;
}
=pod
@@ -421,7 +397,9 @@
=cut
sub print {
- shift->{csv_xs}->print(@_);
+ my ($self, $cols) = @_;
+ $self->{savedrow} = $cols;
+ $self->{csv_xs}->print($cols);
}
=pod
@@ -436,7 +414,7 @@
=cut
sub fields {
- shift->{csv_xs}->fields;
+ @{shift()->{savedrow} || []};
}
=pod
Index: Changes
===================================================================
--- Changes (revision 15684)
+++ Changes (working copy)
@@ -1,5 +1,12 @@
Revision history for Perl extension Parse-CSV
+ - Now accepts embedded newlines in fields. Previously parsing
+ would fail when embedded newlines were encountered. [Ken
+ Williams]
+ - Now sets 'binary => 1' by default, as recommended by the
+ Text::CSV_XS docs. This also allows multi-line data by
+ default. [Ken Williams]
+
2.01 Sun 4 Nov 2012
- Upgrading to Module::Install 1.06
- The names method returns a null list if there are no names