diff -urN Text-CSV_XS-0.42~orig/CSV_XS.pm Text-CSV_XS-0.42/CSV_XS.pm
--- Text-CSV_XS-0.42~orig/CSV_XS.pm 2008-04-16 05:11:51.000000000 -0800
+++ Text-CSV_XS-0.42/CSV_XS.pm 2008-04-21 19:25:55.000000000 -0800
@@ -63,6 +63,7 @@
allow_loose_quotes => 0,
allow_loose_escapes => 0,
allow_whitespace => 0,
+ allow_bad_quoting => 0,
blank_is_undef => 0,
verbatim => 0,
types => undef,
@@ -110,6 +111,7 @@
allow_loose_escapes => 7,
allow_double_quoted => 8,
allow_whitespace => 9,
+ allow_bad_quoting => 24,
blank_is_undef => 10,
eol => 11, # 11 .. 18
@@ -217,6 +219,13 @@
$self->{allow_whitespace};
} # allow_whitespace
+sub allow_bad_quoting
+{
+ my $self = shift;
+ @_ and $self->_set_attr ("allow_bad_quoting", shift);
+ $self->{allow_bad_quoting};
+ } # allow_bad_quoting
+
sub blank_is_undef
{
my $self = shift;
@@ -724,6 +733,25 @@
allow this format, we cannot help there are some vendors that make
their applications spit out lines styled like this.
+=item allow_bad_quoting
+
+By default, C<quote_char> characters in a field are strictly parsed. Any
+non-escaped quote character causes the parser to simply fail. Turning this
+option on will allow the parser to attempt to correct by bumping past the
+offending quote character. For example, normally:
+
+ 1,"foo "bar" baz",42
+
+Would result in a parse error (2023). With allow_bad_quoting this will parse
+into three fields:
+
+ ("1", 'foo "bar" baz', "42")
+
+Keep in mind, this behaviour is fragile, and is to only be used as a last
+resort to recover from badly formed CSV. If you use this option be very
+careful to verify the results you get are what you intended; if at all
+possible, correct the original CSV data and avoid this option.
+
=item escape_char
The character used for escaping certain characters inside quoted fields.
@@ -837,6 +865,7 @@
allow_loose_quotes => 0,
allow_loose_escapes => 0,
allow_whitespace => 0,
+ allow_bad_quoting => 0,
blank_is_undef => 0,
verbatim => 0,
});
diff -urN Text-CSV_XS-0.42~orig/CSV_XS.xs Text-CSV_XS-0.42/CSV_XS.xs
--- Text-CSV_XS-0.42~orig/CSV_XS.xs 2008-04-15 22:51:24.000000000 -0800
+++ Text-CSV_XS-0.42/CSV_XS.xs 2008-04-21 19:35:15.000000000 -0800
@@ -34,6 +34,7 @@
#define CACHE_ID_allow_loose_escapes 7
#define CACHE_ID_allow_double_quoted 8
#define CACHE_ID_allow_whitespace 9
+#define CACHE_ID_allow_bad_quoting 24
#define CACHE_ID_blank_is_undef 10
#define CACHE_ID_eol 11
#define CACHE_ID_eol_len 19
@@ -83,6 +84,7 @@
byte allow_loose_escapes;
byte allow_double_quoted;
byte allow_whitespace;
+ byte allow_bad_quoting;
byte blank_is_undef;
byte verbatim;
@@ -209,6 +211,7 @@
csv->allow_loose_escapes = csv->cache[CACHE_ID_allow_loose_escapes];
csv->allow_double_quoted = csv->cache[CACHE_ID_allow_double_quoted];
csv->allow_whitespace = csv->cache[CACHE_ID_allow_whitespace ];
+ csv->allow_bad_quoting = csv->cache[CACHE_ID_allow_bad_quoting ];
csv->blank_is_undef = csv->cache[CACHE_ID_blank_is_undef ];
csv->verbatim = csv->cache[CACHE_ID_verbatim ];
#endif
@@ -286,6 +289,7 @@
csv->allow_loose_escapes = bool_opt ("allow_loose_escapes");
csv->allow_double_quoted = bool_opt ("allow_double_quoted");
csv->allow_whitespace = bool_opt ("allow_whitespace");
+ csv->allow_bad_quoting = bool_opt ("allow_bad_quoting");
csv->blank_is_undef = bool_opt ("blank_is_undef");
csv->verbatim = bool_opt ("verbatim");
#endif
@@ -303,6 +307,7 @@
csv->cache[CACHE_ID_allow_loose_escapes] = csv->allow_loose_escapes;
csv->cache[CACHE_ID_allow_double_quoted] = csv->allow_double_quoted;
csv->cache[CACHE_ID_allow_whitespace] = csv->allow_whitespace;
+ csv->cache[CACHE_ID_allow_bad_quoting] = csv->allow_bad_quoting;
csv->cache[CACHE_ID_blank_is_undef] = csv->blank_is_undef;
csv->cache[CACHE_ID_verbatim] = csv->verbatim;
#endif
@@ -875,9 +880,18 @@
/* uncovered */ goto restart;
}
}
-#endif
- ERROR_INSIDE_QUOTES (2023);
+
+ if (csv->allow_bad_quoting) {
+ CSV_PUT_SV(sv, c);
+ CSV_PUT_SV(sv, c2);
+ }
+ else {
+ ERROR_INSIDE_QUOTES (2023);
+ }
}
+#else
+ ERROR_INSIDE_QUOTES (2023);
+#endif
}
else
/* !waitingForField, !InsideQuotes */
Binary files Text-CSV_XS-0.42~orig/.CSV_XS.xs.swp and Text-CSV_XS-0.42/.CSV_XS.xs.swp differ
diff -urN Text-CSV_XS-0.42~orig/t/12_acc.t Text-CSV_XS-0.42/t/12_acc.t
--- Text-CSV_XS-0.42~orig/t/12_acc.t 2008-02-25 23:24:40.000000000 -0900
+++ Text-CSV_XS-0.42/t/12_acc.t 2008-04-21 19:51:40.000000000 -0800
@@ -3,7 +3,7 @@
use strict;
$^W = 1; # use warnings core since 5.6
-use Test::More tests => 44;
+use Test::More tests => 46;
BEGIN {
use_ok "Text::CSV_XS";
@@ -23,6 +23,7 @@
is ($csv->allow_loose_quotes, 0, "allow_loose_quotes");
is ($csv->allow_loose_escapes, 0, "allow_loose_escapes");
is ($csv->allow_whitespace, 0, "allow_whitespace");
+is ($csv->allow_bad_quoting, 0, "allow_bad_quoting");
is ($csv->blank_is_undef, 0, "blank_is_undef");
is ($csv->verbatim, 0, "verbatim");
@@ -40,6 +41,7 @@
is ($csv->allow_loose_quotes (1), 1, "allow_loose_quotes (1)");
is ($csv->allow_loose_escapes (1), 1, "allow_loose_escapes (1)");
is ($csv->allow_whitespace (1), 1, "allow_whitespace (1)");
+is ($csv->allow_bad_quoting (1), 1, "allow_bad_quoting (1)");
is ($csv->blank_is_undef (1), 1, "blank_is_undef (1)");
is ($csv->verbatim (1), 1, "verbatim (1)");
is ($csv->escape_char ("\\"), "\\", "escape_char (\\)");
diff -urN Text-CSV_XS-0.42~orig/t/65_allow.t Text-CSV_XS-0.42/t/65_allow.t
--- Text-CSV_XS-0.42~orig/t/65_allow.t 2008-04-05 14:02:10.000000000 -0800
+++ Text-CSV_XS-0.42/t/65_allow.t 2008-04-21 19:49:25.000000000 -0800
@@ -4,7 +4,7 @@
$^W = 1;
#use Test::More "no_plan";
- use Test::More tests => 803;
+ use Test::More tests => 828;
BEGIN {
use_ok "Text::CSV_XS", ();
@@ -153,6 +153,30 @@
}
}
+ok (1, "Allow bad quoting");
+# Allow unescaped quotes inside a quoted field
+{ my @bad = (
+ # valid, line
+ [ 1, 1, qq{foo,bar,"quux",quux}, ],
+ [ 2, 0, qq{rj,bs,"r"jb"s",rjbs}, ],
+ [ 3, 0, qq{"some "spaced" quote data",2,3,4}, ],
+ [ 4, 0, qq{"a lone " quote",foo,bar} ],
+ [ 5, 1, qq{and an,entirely,quoted,"field"}, ],
+ [ 6, 1, qq{and then,"one with ""quoted"" quotes",okay,?}, ],
+ );
+
+ for (@bad) {
+ my ($tst, $valid, $bad) = @$_;
+ $csv = Text::CSV_XS->new ();
+ ok ($csv, "$tst - new (alq => 0)");
+ is ($csv->parse ($bad), $valid, "$tst - parse () fail");
+
+ $csv->allow_bad_quoting (1);
+ ok ($csv->parse ($bad), "$tst - parse () pass");
+ ok (my @f = $csv->fields, "$tst - fields");
+ }
+ }
+
ok (1, "blank_is_undef");
foreach my $conf (
[ 0, 0, 0, 1, "", " ", '""', 2, "", "", "" ],