Subject: | htmlstrip command line tool |
Date: | Thu, 21 Jul 2016 12:55:45 +0300 |
To: | bug-html-strip [...] rt.cpan.org |
From: | Gabor Szabo <gabor [...] szabgab.com> |
It would be really nice if installing the module also installed a command
line script that could be used to strip html.
Included is a first version you could ship.
Gabor Szabo
--------
#!/usr/bin/env perl
use strict;
use warnings;
use Getopt::Long qw(GetOptions);
use HTML::Strip;
GetOptions('help' => \&usage) or usage();
if (@ARGV) {
foreach my $file (@ARGV) {
my $content;
if (open my $fh, '<', $file) {
local $/ = undef;
$content = <$fh>;
} else {
warn "Could not open '$file'";
next;
}
if ($content) {
strip($content);
}
}
} else {
my $content = join '', <STDIN>;
strip($content);
}
sub strip {
my ($raw_html) = @_;
my $hs = HTML::Strip->new();
my $clean_text = $hs->parse( $raw_html );
$hs->eof;
print $clean_text;
}
sub usage {
print <<"USAGE";
Usage:
$0 filename
cat file | $0
USAGE
exit;
}