Subject: | Improve unit matching; capture unit type and unit number |
I've modified the pattern for unit matching to match whole words
('apartment' as well as 'apt', 'department' as well as 'dept'), and to
capture the unit type and unit number (as 'unit' and 'unitnum'
respectively).
I've attached a diff of the module, and a diff of the modified unit test.
Subject: | US.pm.patch |
--- US.pm 2005-05-17 09:12:46.000000000 -0500
+++ US.pm 2009-04-01 12:43:41.000000000 -0500
@@ -697,7 +705,6 @@
dircode => join("|", keys %Direction_Code),
zip => qr/\d{5}(?:-\d{4})?/,
corner => qr/(?:\band\b|\bat\b|&|\@)/i,
- unit => qr/(?:(?:su?i?te|p\W*[om]\W*b(?:ox)?|dept|apt|ro*m|fl|apt|unit|box)\W+|#\W*)[\w-]+/i,
);
{
@@ -724,6 +731,20 @@
)
/ix;
+ $Addr_Match{unit} = qr/
+ (?:
+ (su?i?te
+ |p\W*[om]\W*b(?:ox)?
+ |(?:ap|dep)(?:ar)?t(?:me?nt)?
+ |ro*m
+ |flo*r?
+ |unit
+ |box) (?{ $_{unit} = $^N })
+ \W+|\#\W*
+ )
+ ( [\w-]+) (?{ $_{unitnum}= $^N })
+ /ix;
+
$Addr_Match{place} = qr/
(?:
([^\d,]+?)\W+ (?{ $_{city} = $^N })
Subject: | t-01_parser.t.patch |
--- ../Geo-StreetAddress-US-0.99 2/t/01_parser.t 2005-05-15 16:40:25.000000000 -0500
+++ ./t/01_parser.t 2009-04-01 13:12:18.000000000 -0500
@@ -1,5 +1,5 @@
use blib;
-use Test::More tests => 38;
+use Test::More tests => 40;
use strict;
use warnings;
@@ -64,6 +64,8 @@
'zip' => undef,
'suffix' => undef,
'type' => 'Hwy',
+ 'unit' => 'Suite',
+ 'unitnum' => '500',
'prefix' => 'N'
},
"1005 N Gravenstein Hwy Suite 500 Sebastopol, CA" => {
@@ -74,6 +76,32 @@
'zip' => undef,
'suffix' => undef,
'type' => 'Hwy',
+ 'unit' => 'Suite',
+ 'unitnum' => '500',
+ 'prefix' => 'N'
+ },
+ "1005 N Gravenstein Hwy Apt 500 Sebastopol, CA" => {
+ 'number' => '1005',
+ 'street' => 'Gravenstein',
+ 'state' => 'CA',
+ 'city' => 'Sebastopol',
+ 'zip' => undef,
+ 'suffix' => undef,
+ 'type' => 'Hwy',
+ 'unit' => 'Apt',
+ 'unitnum' => '500',
+ 'prefix' => 'N'
+ },
+ "1005 N Gravenstein Hwy Apartment 500 Sebastopol, CA" => {
+ 'number' => '1005',
+ 'street' => 'Gravenstein',
+ 'state' => 'CA',
+ 'city' => 'Sebastopol',
+ 'zip' => undef,
+ 'suffix' => undef,
+ 'type' => 'Hwy',
+ 'unit' => 'Apartment',
+ 'unitnum' => '500',
'prefix' => 'N'
},
"1005 N Gravenstein Highway, Sebastopol, CA, 95472" => {