Subject: | extra functions for Text::WikiCreole |
I've written a few extra functions for the module, which for the
moment I've put into separate modules that I've called
Text::WikiCreole::Table and Text::WikiCreole::Extra. I'd like to discuss
the best way to publish the functions to CPAN, but in the meantime I
thought I'd upload them here in case they are of any use.
Subject: | Table.pm |
package Text::WikiCreole::Table;
use strict;
use warnings;
=head1 NAME
Text::WikiCreole::Table - extract Creole tables to CSV files and vice-versa
=cut
our $VERSION = '1.00';
# 2011-11-23 djh 1.00 Created
#=======================================================================
=head1 DESCRIPTION
This module has methods for converting tables expressed in wiki Creole
syntax, to equivalent tables expressed in CSV syntax.
It uses Text::CSV for CSV manipulation.
=cut
use Text::CSV;
#=======================================================================
=head1 METHODS
=head2 creole_page_to_csv
$csv_list = Text::WikiCreole::Table->creole_page_to_csv($creole_text);
Reads Creole text supplied as an argument and returns a reference to a
list of the tables found in the text, formatted as CSV.
Each CSV table in the list is a single string.
=cut
sub creole_page_to_csv
{
my ($self, $text) = @_;
my @lines = split /\n/, $text;
my @csv; # results
my $collecting_table = 0;
my @table;
for my $line (@lines)
{
if ($collecting_table)
{
if ($line =~ /^\s*\|/)
{
push @table, $line;
}
else # last line of table
{
push @csv, creole_table_to_csv(\@table);
$collecting_table = 0;
}
}
else
{
if ($line =~ /^\s*\|/) # first line of table
{
@table = ( $line );
$collecting_table = 1;
}
}
}
return @csv ? \@csv : undef;
}
#=======================================================================
=head2 parse_creole_table
Convert a table expressed in Creole wiki markup to CSV format.
$csv_text = creole_table_to_csv(\@table);
The argument is a reference to an array of lines of Creole wikitext,
which must represent a table. The function returns a text string which
contains the same table in CSV format.
=cut
sub creole_table_to_csv
{
my ($creole_lines) = @_;
my $csv = Text::CSV->new({ binary => 1 })
or die "Cannot use CSV: ".Text::CSV->error_diag ();
my $csv_text = '';
for my $line (@$creole_lines)
{
my @columns;
my @headers;
while ($line =~ /\s*(\|=?)(.*?)(?=\|)/g)
{
my $h = $1;
my $cell = $2;
push @headers, ($h eq '|=' ? 1 : 0);
push @columns, $cell;
}
$csv->combine(@columns)
or die "Cannot change '$line' to CSV: ".Text::CSV->error_diag ();;
$csv_text .= $csv->string . "\n";
}
return $csv_text;
}
#=======================================================================
1;
# vim:et sts=4 sw=4 tw=0:
Subject: | Extra.pm |
package Text::WikiCreole::Extra;
use strict;
use warnings;
=head1 NAME
Text::WikiCreole::Extra - Additional functionality for Creole pages
=cut
our $VERSION = '1.01';
# 2011-11-23 djh 1.00 Created
# 2011-12-01 djh 1.01 Added Exporter
#=======================================================================
=head1 SYNOPSIS
require Text::WikiCreole::Extra; # or use instead of require
...
$toc = Text::WikiCreole::Extra::creole_toc($content, $options);
or
use Text::WikiCreole::Extra qw(creole_toc);
...
$toc = creole_toc($content, $options);
=head1 DESCRIPTION
Text::WikiCreole::Extra implements some additional functions for use
when processing Creole 1.0 wiki markup with L<Text::WikiCreole>.
The functions can generate a table of contents for a wiki page,
or scan the image files that appear on a page.
For compatibility with the style of L<Text::WikiCreole> these are
functions rather than methods. It is possible to export each of the
function names, but by contrast with L<Text::WikiCreole> the names are
not exported by default.
=cut
use Exporter qw(import);
our @EXPORT_OK = qw(creole_toc creole_toc_scan toc_format
creole_get_image_references);
#=======================================================================
=head1 FUNCTIONS
=head2 creole_toc
Create an HTML table of contents for a Creole page.
$html = creole_toc($content, $options);
Takes as arguments a C<$content> string representing a Creole page and a
hash reference of C<$options> to format the table of contents.
This function is a simple wrapper around creole_toc_scan() and toc_format().
See their documentation for further details of the arguments.
=cut
sub creole_toc
{
my ($content, $options) = @_;
return toc_format(creole_toc_scan($content), $options);
}
#=======================================================================
=head2 creole_toc_scan
Produce an array summarising the section headings in a Creole page.
$toc = creole_toc_scan($creole_text_string);
The returned value is a reference to a list of pairs.
Each pair is a reference to a two-element list;
the first element in the list is the I<level> of a heading,
and the second element is the text of that heading.
The heading I<level> is a small number, representing the number of
C<=> characters in front of the heading text.
The headings are listed in document order.
=cut
sub creole_toc_scan
{
my ($content) = @_;
my @content = split "\n", $content;
my @toc;
for my $line (@content)
{
my ($left, $text, $right) = $line =~ /^\s*(=+)\s*(.+?)\s*(=+)\s*$/;
next unless $right;
warn "Mismatched equals at '$line' left='$left' right='$right'\n"
if $left ne $right;
my $level = length $left;
push @toc, [$level, $text];
}
return \@toc;
}
#=======================================================================
=head2 toc_format
Format an HTML table of contents, given a list of section headings.
$html = toc_format($toc, $options);
This function takes a reference to a list of section headings, as
produced by creole_toc_scan(), and produces a formatted HTML representation
of the list. The output is influenced by various options, which are the
elements of the hash referenced as an argument:
=over 4
=item levels
The level-number of the least important heading to include in the table
of contents. The default is 3. (i.e. I<=== A third-level heading ===>)
=item title
The text to appear as the title of the table of contents.
The default is I<'Table of Contents'>.
=back
The output is not an HTML table, but is a sequence of C<div> elements.
<div id="toctitle">your title here</div>
<div class="tocbox">
<div class="tocline"><a href="#tag">heading text</a></div>
</div>
The appearance can be changed by writing CSS for the various classes of
C<div> elements. Redefine this function if larger changes are needed.
The HTML fragment identifiers (e.g. C<#tag>) should be inserted as
anchors in the text of the actual headings. This can be done by
adjusting the C<$Text::WikiCreole::chunks{"h1"}->{filter}> callback
for the relevant heading levels. For example:
sub _creole_h_filter {
# arg is text of header element
&Text::WikiCreole::strip_head_eq;
(my $tag = $_[0]) =~ tr/ /_/;
my $top = '<a href="#top" class="top">^</a> ';
$_[0] = "<a name=\"$tag\"></a>$top$_[0]";
}
for (1..6) {
$Text::WikiCreole::chunks{"h$_"}->{filter} = \&_creole_h_filter;
}
=cut
sub toc_format
{
my ($toc, $options) = @_;
my $maxlevels = $options->{levels} || 3;
my $title = '<div id="toctitle">'
. ($options->{title} || 'Table of Contents')
. "</div>\n";
my $html = '';
$html .= '<div class="tocbox">';
$html .= $title;
for my $heading (@$toc)
{
my $level = $heading->[0];
next if $level > $maxlevels;
my $text = $heading->[1];
my $indent = ' ' x ($level * 2);
(my $tag = $text) =~ s/ /_/g;
$html .= '<div class="tocline">' . $indent
. '<a href="#' . $tag . '">' . $text . '</a>'
. "</div>\n";
}
$html .= "</div>\n\n";
}
#=======================================================================
=head2 creole_get_image_references
Scan a Creole page for references to images.
$images = creole_get_image_references($text);
This function returns a reference to a list of the names of image files
that appear on the page. The list contains the filenames in the order
that they occur on the page and will contain the same name multiple
times if the image occurs more than once on the page.
The argument is the content from a Creole wiki page.
=cut
sub creole_get_image_references
{
my ($text) = @_;
my @images = $text =~ /\{\{\s*([^\{][^\n]*?)\}\}/g;
return unless @images;
for my $image (@images)
{
$image =~ s/\s*\|.*//;
}
return \@images;
}
#=======================================================================
1;
__END__
=head1 COPYRIGHT & LICENSE
Copyright 2011 Dave Howorth C<< <djh@cpan.org> >>
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself or under the terms of the GPL.
# vim:et sts=4 sw=4 tw=0: