Documentation patch for bin/scrape utility with summary of commands and
example session :)
Great tool, thanks.
Subject: | scrape.patch |
diff --git a/bin/scraper b/bin/scraper
index 5f7921b..71eb0f6 100755
--- a/bin/scraper
+++ b/bin/scraper
@@ -31,7 +31,7 @@ my $print = sub {
my(@stack, $source);
my $stuff = process_args($ARGV[0])
- or die "Usage: scraper [URI-or-filename]\n";
+ or die "Usage: scraper [URI-or-filename-or-HTML-via-STDIN]\n";
my $term = Term::ReadLine->new("Web::Scraper");
my $scraper = scraper { run_loop($_[0], $term) };
@@ -50,7 +50,7 @@ sub process_args {
return URI->new($uri);
} elsif ($uri && -e $uri) {
$source = [ 'file', $uri ];
- open my $fh, "<", $uri or die "$uri: $!";
+ open my $fh, "<", $uri or <die "$uri: $!";
return join "", <$fh>;
}
@@ -105,3 +105,93 @@ my \$result = \$scraper->scrape($var);
CODE
}
+
+__END__
+
+=head1 NAME
+
+scraper - Interactive shell for Web::Scraper sessions.
+
+=head2 SYNOPSIS
+
+scraper [URI, filename or html from stdin]
+
+Interactive shell for Web::Scraper sessions
+
+=head2 COMMANDS
+
+=over
+
+=item C<d>
+
+Present output as perl data structure formatted with L<Data::Dumper>
+
+=item C<y>
+
+Present output as C<YAML> data.
+
+=item C<s>
+
+Present output as HTML (via HTML::TreeBuilder)
+
+=item C<c>
+
+Generate L<Web::Scraper> code for last command
+
+=item C<c all>
+
+Generate L<Web::Scraper> code for whole session
+
+=item C<q>
+
+Quit
+
+=back
+
+=head2 EXAMPLE SESSION
+
+ $ scraper http://example.com
+ scraper> process "body", "b", 'RAW'
+ scraper> y
+ ---
+ b: '<p>You have reached this web page by typing "example.com", "example.net", or "example.org" into your web browser.</p><p>These domain names are reserved for use in documentation and are not available for registration. See <a href="http://www.rfc-editor.org/rfc/rfc2606.txt">RFC 2606</a>, Section 3.</p>'
+ scraper> d
+ $VAR1 = {
+ 'b' => '<p>You have reached this web page by typing "example.com", "example.net", or "example.org" into your web browser.</p><p>These domain names are reserved for use in documentation and are not available for registration. See <a href="http://www.rfc-editor.org/rfc/rfc2606.txt">RFC 2606</a>, Section 3.</p>'
+ };
+ scraper> s
+ <html>
+ <head>
+ <title>Example Web Page</title>
+ </head>
+ <body>
+ <p>You have reached this web page by typing "example.com", "example.net", or "example.org" into your web browser.</p>
+ <p>These domain names are reserved for use in documentation and are not available for registration. See <a href="http://www.rfc-editor.org/rfc/rfc2606.txt">RFC 2606</a>, Section 3.</p>
+ </body>
+ </html>
+ scraper> c
+ #!/Users/kd/perl/bin/perl
+ use strict;
+ use Web::Scraper;
+ use URI;
+
+ my $uri = URI->new("http://example.com");
+ my $scraper = scraper {
+ process "body", "b", 'RAW';
+ };
+ my $result = $scraper->scrape($uri);
+ scraper> c all
+ #!/Users/kd/perl/bin/perl
+ use strict;
+ use Web::Scraper;
+ use URI;
+
+ my $uri = URI->new("http://example.com");
+ my $scraper = scraper {
+ process "body", "b", 'RAW';
+ };
+ my $result = $scraper->scrape($uri);
+ scraper> q
+ $
+
+=cut