Subject: | A better mech-dump with --user and --pass, plus proxy (environment) |
Date: | Mon, 09 Jul 2007 15:28:06 -0700 |
To: | bug-WWW-Mechanize [...] rt.cpan.org |
From: | kernel [...] pkts.ca |
Here's an updated mech-dump that handles --user and --pass from the
command line, and proxy settings from the environment.
It's better because it handles all of the 'TODO' items on the man page.
There's a patch that adds '--credentials' in the bug tracking system,
but I didn't look there until after I'd finished.
This version tries without the username/password, and if authentication
is required, it tries again with them.
Enjoy!
#!/usr/bin/perl -w
eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
if 0; # not running under some shell
=head1 NAME
mech-dump - Dumps information about a web page
=cut
use warnings;
use strict;
use WWW::Mechanize;
use Getopt::Long;
use Pod::Usage;
my @actions;
my $absolute;
my $user;
my $pass;
my $domain;
my $host;
GetOptions(
"user=s" => \$user,
"pass=s" => \$pass,
forms => sub { push( @actions, \&dump_forms ); },
links => sub { push( @actions, \&dump_links ); },
images => sub { push( @actions, \&dump_images ); },
all => sub { push( @actions, \&dump_forms, \&dump_links, \&dump_images ); },
absolute => \$absolute,
help => sub { pod2usage(1); },
) or pod2usage(2);
=head1 SYNOPSIS
mech-dump [options] [file|url]
Options:
--forms Dump table of forms (default action)
--links Dump table of links
--images Dump table of images
--all Dump all three of the above, in that order
--user Set the username
--pass Set the password
--absolute Show URLs as absolute, even if relative in the page
--help Show this message
The order of the options specified is relevant. Repeated options
get repeated dumps.
=cut
my $uri = shift or die "Must specify a URL or file to check\n";
if ( -e $uri ) {
require URI::file;
$uri = URI::file->new_abs( $uri )->as_string;
}
@actions = (\&dump_forms) unless @actions;
my $mech = WWW::Mechanize->new( cookie_jar => undef );
$mech->env_proxy();
my $response = $mech->get( $uri );
if (!$response->is_success and defined ($response->www_authenticate)) {
if (!defined $user or !defined $pass) {
die("Page requires username and password, but none specified.\n");
}
$mech->credentials($user,$pass);
$response = $mech->get( $uri );
$response->is_success or die "Can't fetch $uri with username and password\n", $response->status_line, "\n";
}
$mech->is_html or die qq{$uri returns type "}, $mech->ct, qq{", not "text/html"\n};
for my $action ( @actions ) {
$action->( $mech );
}
sub dump_links {
my $mech = shift;
for my $link ( $mech->links ) {
my $url = $absolute ? $link->url_abs : $link->url;
print "$url\n";
}
return;
}
sub dump_images {
my $mech = shift;
for my $image ( $mech->images ) {
my $url = $absolute ? $image->url_abs : $image->url;
print "$url\n";
}
return;
}
sub dump_forms {
my $mech = shift;
for my $form ( $mech->forms() ) {
print $form->dump;
print "\n";
}
return;
}
=head1 TODO
=over 4
=item * Options for C<--user>, C<--pass> and C<--proxy>.
=back
=cut