Skip Menu |

This queue is for tickets about the WWW-Search-Jobs CPAN distribution.

Report information
The Basics
Id: 17549
Status: resolved
Estimated: 30 min
Priority: 0/
Queue: WWW-Search-Jobs

People
Owner: MTHURN [...] cpan.org
Requestors:
Cc:
AdminCc:

Bug Information
Severity: Normal
Broken in: 2.01
Fixed in: (no value)



Subject: Some patches
Here's some patches that modernize the WWW::Search::Monster module and one that adds a new feature. See the info-www-search list for more details.
Subject: approximate_hit_count_patch
--- lib/WWW/Search/Monster.pm 2006-02-08 21:17:54.000000000 -0500 +++ lib/WWW/Search/Monster.pm.v3 2006-02-08 21:04:01.000000000 -0500 @@ -332,6 +332,7 @@ $content =~ s/ / /ig; $content =~ m/Jobs (\d+) to (\d+) of (\d+)/; my $nrows = $2 - $1 + 1; + $self->approximate_hit_count($3); # Determine _next_url my ($nexturl) =
Subject: monster-doc-patch
--- lib/WWW/Search/Monster.pm 2006-01-31 22:14:41.000000000 -0500 +++ lib/WWW/Search/Monster.pm.new 2006-01-31 22:12:36.000000000 -0500 @@ -76,91 +76,159 @@ =over 2 -=item * 1 Accounting/Auditing +=item * 1 Accounting/Auditing -=item * 2 Administrative and Support Services +=item * 2 Administrative and Support Services -=item * 8 Advertising/Marketing/Public Relations +=item * 8 Advertising/Marketing/Public Relations -=item * 540 Agriculture, Forestry, & Fishing +=item * 5620 Aerospace/Aviation/Defense -=item * 541 Architectural Services +=item * 540 Agriculture, Forestry, & Fishing -=item * 12 Arts, Entertainment, and Media +=item * 9004 Airlines -=item * 576 Banking +=item * 541 Architectural Services -=item * 46 Biotechnology and Pharmaceutical +=item * 12 Arts, Entertainment, and Media -=item * 542 Community, Social Services, and Nonprofit +=item * 576 Banking -=item * 543 Computers, Hardware +=item * 46 Biotechnology and Pharmaceutical -=item * 6 Computers, Software +=item * 3979 Building and Grounds Maintenance -=item * 544 Construction, Mining and Trades +=item * 8125 Business Opportunity/Investment Required -=item * 546 Consulting Services +=item * 8126 Career Fairs -=item * 545 Customer Service and Call Center +=item * 9005 Computer Services -=item * 3 Education, Training, and Library +=item * 543 Computers, Hardware -=item * 547 Employment Placement Agencies +=item * 6 Computers, Software -=item * 4 Engineering +=item * 544 Construction, Mining and Trades -=item * 548 Finance/Economics +=item * 546 Consulting Services -=item * 549 Financial Services +=item * 5622 Consumer Products -=item * 550 Government and Policy +=item * 545 Customer Service and Call Center -=item * 551 Healthcare, Other +=item * 3 Education, Training, and Library -=item * 9 Healthcare, Practitioner and Technician +=item * 7305 Electronics -=item * 552 Hospitality/Tourism +=item * 547 Employment Placement Agencies -=item * 5 Human Resources +=item * 5624 Energy/Utilities -=item * 660 Information Technology +=item * 4 Engineering -=item * 553 Installation, Maintenance, and Repair +=item * 9002 Environmental Services -=item * 45 Insurance +=item * 3561 Executive Management -=item * 554 Internet/E-Commerce +=item * 548 Finance/Economics -=item * 555 Law Enforcement, and Security +=item * 549 Financial Services -=item * 7 Legal +=item * 550 Government and Policy -=item * 47 Manufacturing and Production +=item * 7306 Healthcare - Business Office & Finance -=item * 556 Military +=item * 2947 Healthcare - CNAs/Aides/MAs/Home Health -=item * 11 Other +=item * 3972 Healthcare - Laboratory/Pathology Services -=item * 557 Personal Care and Service +=item * 2963 Healthcare - LPNs & LVNs -=item * 558 Real Estate +=item * 2990 Healthcare - Medical & Dental Practitioners -=item * 13 Restaurant and Food Service +=item * 3007 Healthcare - Medical Records, Health IT & Informatics -=item * 44 Retail/Wholesale +=item * 9014 Healthcare - Optical -=item * 10 Sales +=item * 551 Healthcare, Other -=item * 559 Science +=item * 3973 Healthcare - Pharmacy -=item * 560 Sports and Recreation +=item * 3974 Healthcare - Radiology/Imaging -=item * 561 Telecommunications +=item * 3975 Healthcare - RNs & Nurse Management -=item * 562 Transportation and Warehousing +=item * 3976 Healthcare - Social Services/Mental Health -=item +=item * 3977 Healthcare - Support Services + +=item * 3978 Healthcare - Therapy/Rehab Services + +=item * 552 Hospitality/Tourism + +=item * 5 Human Resources/Recruiting + +=item * 660 Information Technology + +=item * 553 Installation, Maintenance, and Repair + +=item * 45 Insurance + +=item * 554 Internet/E-Commerce + +=item * 555 Law Enforcement, and Security + +=item * 7 Legal + +=item * 47 Manufacturing and Production + +=item * 556 Military + +=item * 542 Nonprofit + +=item * 9010 Operations Management + +=item * 11 Other + +=item * 557 Personal Care and Service + +=item * 9007 Product Management + +=item * 9008 Project/Program Management + +=item * 5623 Publishing/Printing + +=item * 7307 Purchasing + +=item * 558 Real Estate + +=item * 13 Restaurant and Food Service + +=item * 44 Retail/Wholesale + +=item * 10 Sales + +=item * 9009 Sales - Account Management + +=item * 9011 Sales - Telemarketing + +=item * 5957 Sales - Work at Home/Commission Only + +=item * 559 Science + +=item * 560 Sports and Recreation/Fitness + +=item * 5625 Supply Chain/Logistics + +=item * 561 Telecommunications + +=item * 9013 Textiles + +=item * 562 Transportation and Warehousing + +=item * 9003 Veterinary Services + +=item * 9006 Waste Management Services =back
Subject: monster-fix-patch
--- lib/WWW/Search/Monster.pm.orig 2001-05-02 14:24:02.000000000 -0400 +++ lib/WWW/Search/Monster.pm 2006-01-31 22:14:41.000000000 -0500 @@ -264,56 +264,42 @@ $content =~ s/ / /ig; $content =~ m/Jobs (\d+) to (\d+) of (\d+)/; my $nrows = $2 - $1 + 1; - if($content =~ m/Next page &gt;&gt;/) { - my $options; - my $nexturl; - PROCESS_FORM: while(1) { - $tag = $p->get_tag("form"); - $nexturl = $self->{'search_base_url'} . '/'. - $tag->[1]{'action'} . '?'; - while(1) { - $token = $p->get_token(); - my $type = $token->[0]; - $tag = $token->[1]; - next PROCESS_FORM if($type eq 'E' && $tag eq 'form'); - next if($tag ne 'input'); - my $value = $token->[2]{'value'}; - last PROCESS_FORM if ($value =~ m/Next page \>\>/); - next PROCESS_FORM if ($value =~ m/\<\< Previous page/); - my $name = $token->[2]{'name'}; - my $escaped = WWW::Search::escape_query($value); - $nexturl .= "$name=$escaped" . '&' ; - } - } - print STDERR "Next url is $nexturl\n" if($debug); - $self->{'_next_url'} = $nexturl; - } else { - print STDERR "No next button\n" if($debug); - } + # Determine _next_url + my ($nexturl) = + ($content =~ /<a[^>]*href="([^"]*)[^>]*>Next page &gt;&gt;</ ); + $self->{'_next_url'} = $self->{search_base_url} . $nexturl; + my($hits_found) = 0; my($hit) = (); $p = new HTML::TokeParser(\$content); + + #skim the content until we reach the header row of the main table while(1) { $tag = $p->get_tag("td"); my $data = $p->get_trimmed_text("/td"); last if($data eq 'Location' || $data eq 'Company' || - $data eq 'Modified'); + $data eq 'Modified'); # 'Modified' is not used anymore (Jan06) } + for(my $i = 0; $i< $nrows; $i++) { - $tag = $p->get_tag("tr"); + $tag = $p->get_tag("tr"); #Jump to beginning of next row + $tag = $p->get_tag("td"); - $tag = $p->get_tag("td"); # fix skew problem WR my $date = $p->get_trimmed_text("/td"); - $tag = $p->get_tag("td"); - my $location = $p->get_trimmed_text("/td"); + $tag = $p->get_tag("a"); my $url = $self->{'search_base_url'} . $tag->[1]{href}; my $title = $p->get_trimmed_text("/a"); + $tag = $p->get_tag("td"); my $company = $p->get_trimmed_text("/td"); + + $tag = $p->get_tag("a"); + my $location = $p->get_trimmed_text("/a"); + $hit = new WWW::SearchResult; $hit->url($url); $hit->company($company);
Subject: endless_loop_patch
--- lib/WWW/Search/Monster.pm 2006-02-08 21:04:01.000000000 -0500 +++ lib/WWW/Search/Monster.pm.v4 2006-02-08 21:21:43.000000000 -0500 @@ -345,8 +345,8 @@ $p = new HTML::TokeParser(\$content); #skim the content until we reach the header row of the main table - while(1) { - $tag = $p->get_tag("td"); + while($p->get_tag("td")) + { my $data = $p->get_trimmed_text("/td"); last if($data eq 'Location' || $data eq 'Company' ||
Patches applied and new distro 2.021 released to CPAN. Thank you Brian! -- - - Martin 'Kingpin' Thurn