Subject: | Some patches |
Here's some patches that modernize the WWW::Search::Monster module and
one that adds a new feature.
See the info-www-search list for more details.
Subject: | approximate_hit_count_patch |
--- lib/WWW/Search/Monster.pm 2006-02-08 21:17:54.000000000 -0500
+++ lib/WWW/Search/Monster.pm.v3 2006-02-08 21:04:01.000000000 -0500
@@ -332,6 +332,7 @@
$content =~ s/ / /ig;
$content =~ m/Jobs (\d+) to (\d+) of (\d+)/;
my $nrows = $2 - $1 + 1;
+ $self->approximate_hit_count($3);
# Determine _next_url
my ($nexturl) =
Subject: | monster-doc-patch |
--- lib/WWW/Search/Monster.pm 2006-01-31 22:14:41.000000000 -0500
+++ lib/WWW/Search/Monster.pm.new 2006-01-31 22:12:36.000000000 -0500
@@ -76,91 +76,159 @@
=over 2
-=item * 1 Accounting/Auditing
+=item * 1 Accounting/Auditing
-=item * 2 Administrative and Support Services
+=item * 2 Administrative and Support Services
-=item * 8 Advertising/Marketing/Public Relations
+=item * 8 Advertising/Marketing/Public Relations
-=item * 540 Agriculture, Forestry, & Fishing
+=item * 5620 Aerospace/Aviation/Defense
-=item * 541 Architectural Services
+=item * 540 Agriculture, Forestry, & Fishing
-=item * 12 Arts, Entertainment, and Media
+=item * 9004 Airlines
-=item * 576 Banking
+=item * 541 Architectural Services
-=item * 46 Biotechnology and Pharmaceutical
+=item * 12 Arts, Entertainment, and Media
-=item * 542 Community, Social Services, and Nonprofit
+=item * 576 Banking
-=item * 543 Computers, Hardware
+=item * 46 Biotechnology and Pharmaceutical
-=item * 6 Computers, Software
+=item * 3979 Building and Grounds Maintenance
-=item * 544 Construction, Mining and Trades
+=item * 8125 Business Opportunity/Investment Required
-=item * 546 Consulting Services
+=item * 8126 Career Fairs
-=item * 545 Customer Service and Call Center
+=item * 9005 Computer Services
-=item * 3 Education, Training, and Library
+=item * 543 Computers, Hardware
-=item * 547 Employment Placement Agencies
+=item * 6 Computers, Software
-=item * 4 Engineering
+=item * 544 Construction, Mining and Trades
-=item * 548 Finance/Economics
+=item * 546 Consulting Services
-=item * 549 Financial Services
+=item * 5622 Consumer Products
-=item * 550 Government and Policy
+=item * 545 Customer Service and Call Center
-=item * 551 Healthcare, Other
+=item * 3 Education, Training, and Library
-=item * 9 Healthcare, Practitioner and Technician
+=item * 7305 Electronics
-=item * 552 Hospitality/Tourism
+=item * 547 Employment Placement Agencies
-=item * 5 Human Resources
+=item * 5624 Energy/Utilities
-=item * 660 Information Technology
+=item * 4 Engineering
-=item * 553 Installation, Maintenance, and Repair
+=item * 9002 Environmental Services
-=item * 45 Insurance
+=item * 3561 Executive Management
-=item * 554 Internet/E-Commerce
+=item * 548 Finance/Economics
-=item * 555 Law Enforcement, and Security
+=item * 549 Financial Services
-=item * 7 Legal
+=item * 550 Government and Policy
-=item * 47 Manufacturing and Production
+=item * 7306 Healthcare - Business Office & Finance
-=item * 556 Military
+=item * 2947 Healthcare - CNAs/Aides/MAs/Home Health
-=item * 11 Other
+=item * 3972 Healthcare - Laboratory/Pathology Services
-=item * 557 Personal Care and Service
+=item * 2963 Healthcare - LPNs & LVNs
-=item * 558 Real Estate
+=item * 2990 Healthcare - Medical & Dental Practitioners
-=item * 13 Restaurant and Food Service
+=item * 3007 Healthcare - Medical Records, Health IT & Informatics
-=item * 44 Retail/Wholesale
+=item * 9014 Healthcare - Optical
-=item * 10 Sales
+=item * 551 Healthcare, Other
-=item * 559 Science
+=item * 3973 Healthcare - Pharmacy
-=item * 560 Sports and Recreation
+=item * 3974 Healthcare - Radiology/Imaging
-=item * 561 Telecommunications
+=item * 3975 Healthcare - RNs & Nurse Management
-=item * 562 Transportation and Warehousing
+=item * 3976 Healthcare - Social Services/Mental Health
-=item
+=item * 3977 Healthcare - Support Services
+
+=item * 3978 Healthcare - Therapy/Rehab Services
+
+=item * 552 Hospitality/Tourism
+
+=item * 5 Human Resources/Recruiting
+
+=item * 660 Information Technology
+
+=item * 553 Installation, Maintenance, and Repair
+
+=item * 45 Insurance
+
+=item * 554 Internet/E-Commerce
+
+=item * 555 Law Enforcement, and Security
+
+=item * 7 Legal
+
+=item * 47 Manufacturing and Production
+
+=item * 556 Military
+
+=item * 542 Nonprofit
+
+=item * 9010 Operations Management
+
+=item * 11 Other
+
+=item * 557 Personal Care and Service
+
+=item * 9007 Product Management
+
+=item * 9008 Project/Program Management
+
+=item * 5623 Publishing/Printing
+
+=item * 7307 Purchasing
+
+=item * 558 Real Estate
+
+=item * 13 Restaurant and Food Service
+
+=item * 44 Retail/Wholesale
+
+=item * 10 Sales
+
+=item * 9009 Sales - Account Management
+
+=item * 9011 Sales - Telemarketing
+
+=item * 5957 Sales - Work at Home/Commission Only
+
+=item * 559 Science
+
+=item * 560 Sports and Recreation/Fitness
+
+=item * 5625 Supply Chain/Logistics
+
+=item * 561 Telecommunications
+
+=item * 9013 Textiles
+
+=item * 562 Transportation and Warehousing
+
+=item * 9003 Veterinary Services
+
+=item * 9006 Waste Management Services
=back
Subject: | monster-fix-patch |
--- lib/WWW/Search/Monster.pm.orig 2001-05-02 14:24:02.000000000 -0400
+++ lib/WWW/Search/Monster.pm 2006-01-31 22:14:41.000000000 -0500
@@ -264,56 +264,42 @@
$content =~ s/ / /ig;
$content =~ m/Jobs (\d+) to (\d+) of (\d+)/;
my $nrows = $2 - $1 + 1;
- if($content =~ m/Next page >>/) {
- my $options;
- my $nexturl;
- PROCESS_FORM: while(1) {
- $tag = $p->get_tag("form");
- $nexturl = $self->{'search_base_url'} . '/'.
- $tag->[1]{'action'} . '?';
- while(1) {
- $token = $p->get_token();
- my $type = $token->[0];
- $tag = $token->[1];
- next PROCESS_FORM if($type eq 'E' && $tag eq 'form');
- next if($tag ne 'input');
- my $value = $token->[2]{'value'};
- last PROCESS_FORM if ($value =~ m/Next page \>\>/);
- next PROCESS_FORM if ($value =~ m/\<\< Previous page/);
- my $name = $token->[2]{'name'};
- my $escaped = WWW::Search::escape_query($value);
- $nexturl .= "$name=$escaped" . '&' ;
- }
- }
- print STDERR "Next url is $nexturl\n" if($debug);
- $self->{'_next_url'} = $nexturl;
- } else {
- print STDERR "No next button\n" if($debug);
- }
+ # Determine _next_url
+ my ($nexturl) =
+ ($content =~ /<a[^>]*href="([^"]*)[^>]*>Next page >></ );
+ $self->{'_next_url'} = $self->{search_base_url} . $nexturl;
+
my($hits_found) = 0;
my($hit) = ();
$p = new HTML::TokeParser(\$content);
+
+ #skim the content until we reach the header row of the main table
while(1) {
$tag = $p->get_tag("td");
my $data = $p->get_trimmed_text("/td");
last if($data eq 'Location' ||
$data eq 'Company' ||
- $data eq 'Modified');
+ $data eq 'Modified'); # 'Modified' is not used anymore (Jan06)
}
+
for(my $i = 0; $i< $nrows; $i++) {
- $tag = $p->get_tag("tr");
+ $tag = $p->get_tag("tr"); #Jump to beginning of next row
+
$tag = $p->get_tag("td");
- $tag = $p->get_tag("td"); # fix skew problem WR
my $date = $p->get_trimmed_text("/td");
- $tag = $p->get_tag("td");
- my $location = $p->get_trimmed_text("/td");
+
$tag = $p->get_tag("a");
my $url = $self->{'search_base_url'} . $tag->[1]{href};
my $title = $p->get_trimmed_text("/a");
+
$tag = $p->get_tag("td");
my $company = $p->get_trimmed_text("/td");
+
+ $tag = $p->get_tag("a");
+ my $location = $p->get_trimmed_text("/a");
+
$hit = new WWW::SearchResult;
$hit->url($url);
$hit->company($company);
Subject: | endless_loop_patch |
--- lib/WWW/Search/Monster.pm 2006-02-08 21:04:01.000000000 -0500
+++ lib/WWW/Search/Monster.pm.v4 2006-02-08 21:21:43.000000000 -0500
@@ -345,8 +345,8 @@
$p = new HTML::TokeParser(\$content);
#skim the content until we reach the header row of the main table
- while(1) {
- $tag = $p->get_tag("td");
+ while($p->get_tag("td"))
+ {
my $data = $p->get_trimmed_text("/td");
last if($data eq 'Location' ||
$data eq 'Company' ||