Subject: | beautifying whitespace [patch] |
Hi,
the TeXLive project is using XML::DOM, and one member (Fabrice Popineau
who's currently not very active) has patched DOM.pm to give output
that's better usable in strings - it either removes whitespace, or adds
consistent indendation.
I must admit that I do not completely understand the purpose or use
cases. I have just come across it because I wanted to reuse texlive's
xml files and Perl scripts, and had to use the patched DOM.pm or change
the scripts to get it running.
It might be that the clean solution would be to put the "whitespace
beautifying" code into a separate module, but just in case you're
interested I send you the patch. It's clear that it can't be applied
without changes (hardcoded variables which should be setable from the
calling script), but it's a start.
I'd be glad if you could tell me whether you are interested, or not at
all. The code is in the public domain (I can send you a mail by the
author in which he asserts this, if you wish).
Thank you in advance,
Frank
P.S. and it's based on an old version, but that's trivial to find.
Subject: | DOM.pm.diff |
--- /usr/share/perl5/XML/DOM.pm 2003-07-29 00:46:43.000000000 +0200
+++ src/Packages/texlive/texlive/LocalTPM/Tools/XML/DOM.pm 2006-03-23 13:46:15.000000000 +0100
@@ -34,6 +34,7 @@
use vars qw( $VERSION @ISA @EXPORT
$IgnoreReadOnly $SafeMode $TagStyle
%DefaultEntities %DecodeDefaultEntity
+ $beautifying $current_indent $string_indent $current_print_level @need_indent
);
use Carp;
use XML::RegExp;
@@ -41,7 +42,7 @@
BEGIN
{
require XML::Parser;
- $VERSION = '1.43';
+ $VERSION = '1.42';
my $needVersion = '2.28';
die "need at least XML::Parser version $needVersion (current=${XML::Parser::VERSION})"
@@ -113,6 +114,12 @@
"&" => "&"
);
+$beautifying = 1;
+$current_indent = 0;
+$string_indent = " ";
+$current_print_level = 0;
+@need_indent = ();
+
#
# If you don't want DOM warnings to use 'warn', override this method like this:
#
@@ -2710,6 +2717,12 @@
my $name = $self->[_TagName];
+ if ($XML::DOM::beautifying) {
+ for (my $i = 0; $i < $XML::DOM::current_indent; $i++) {
+ $FILE->print ($XML::DOM::string_indent);
+ }
+ }
+
$FILE->print ("<$name");
if (defined $self->[_A])
@@ -2728,12 +2741,38 @@
my @kids = @{$self->[_C]};
if (@kids > 0)
{
- $FILE->print (">");
+ $FILE->print (">");
+ $XML::DOM::current_print_level++;
+ if ($XML::DOM::beautifying)
+ {
+ if ($#kids > 0 || ! $kids[0]->isTextNode)
+ {
+ $FILE->print ("\n");
+ }
+ $XML::DOM::current_indent++;
+ }
for my $kid (@kids)
{
$kid->print ($FILE);
}
+ if ($XML::DOM::beautifying)
+ {
+ $XML::DOM::current_indent--;
+ if ($#kids > 0|| ($#kids == 0 && ! $kids[0]->isTextNode) || $XML::DOM::need_indent[$XML::DOM::current_print_level])
+ {
+ for (my $i = 0; $i < $XML::DOM::current_indent; $i++)
+ {
+ $FILE->print ($XML::DOM::string_indent);
+ }
+ }
+ $XML::DOM::need_indent[$XML::DOM::current_print_level] = 0;
+ }
$FILE->print ("</$name>");
+ $XML::DOM::current_print_level--;
+ if ($XML::DOM::beautifying)
+ {
+ $FILE->print ("\n");
+ }
}
else
{
@@ -2750,6 +2789,10 @@
{
$FILE->print (" />");
}
+ if ($XML::DOM::beautifying)
+ {
+ $FILE->print ("\n");
+ }
}
}
@@ -3174,7 +3217,24 @@
sub print
{
my ($self, $FILE) = @_;
- $FILE->print (XML::DOM::encodeText ($self->getData, '<&>"'));
+ my ($s) = XML::DOM::encodeText ($self->getData, '<&>"');
+ if ($XML::DOM::beautifying)
+ {
+ $s =~ s@^[\s\n]*(.*)[\s\n]*$@$1@so;
+ $s =~ s@\n\s*@\n@gm;
+ if (length($s) + $XML::DOM::current_print_level > 48)
+ {
+ $XML::DOM::need_indent[$XML::DOM::current_print_level] = 1;
+ $s = "\n$s\n";
+ $s =~ s@\n\n$@\n@;
+ }
+ else
+ {
+ $XML::DOM::need_indent[$XML::DOM::current_print_level] = 0;
+ $s =~ s@\n$@@;
+ }
+ }
+ $FILE->print ($s);
}
sub isTextNode
@@ -4198,7 +4258,7 @@
{
my ($class, %args) = @_;
- $args{Style} = 'XML::Parser::Dom';
+ $args{Style} = 'Dom';
$class->SUPER::new (%args);
}
@@ -4264,30 +4324,30 @@
# request, which we could convert to a stream with a fork()...
my $result;
- eval
- {
- use LWP::UserAgent;
-
- my $ua = $self->{LWP_UserAgent};
- unless (defined $ua)
- {
- unless (defined $LWP_USER_AGENT)
- {
- $LWP_USER_AGENT = LWP::UserAgent->new;
-
- # Load proxy settings from environment variables, i.e.:
- # http_proxy, ftp_proxy, no_proxy etc. (see LWP::UserAgent(3))
- # You need these to go thru firewalls.
- $LWP_USER_AGENT->env_proxy;
- }
- $ua = $LWP_USER_AGENT;
- }
- my $req = new HTTP::Request 'GET', $url;
- my $response = $ua->request ($req);
+# eval
+# {
+# use LWP::UserAgent;
- # Parse the result of the HTTP request
- $result = $self->parse ($response->content, @_);
- };
+# my $ua = $self->{LWP_UserAgent};
+# unless (defined $ua)
+# {
+# unless (defined $LWP_USER_AGENT)
+# {
+# $LWP_USER_AGENT = LWP::UserAgent->new;
+
+# # Load proxy settings from environment variables, i.e.:
+# # http_proxy, ftp_proxy, no_proxy etc. (see LWP::UserAgent(3))
+# # You need these to go thru firewalls.
+# $LWP_USER_AGENT->env_proxy;
+# }
+# $ua = $LWP_USER_AGENT;
+# }
+# my $req = new HTTP::Request 'GET', $url;
+# my $response = $ua->request ($req);
+
+# # Parse the result of the HTTP request
+# $result = $self->parse ($response->content, @_);
+# };
if ($@)
{
die "Couldn't parsefile [$url] with LWP: $@";
@@ -4900,7 +4960,7 @@
=item $VERSION
The variable $XML::DOM::VERSION contains the version number of this
-implementation, e.g. "1.43".
+implementation, e.g. "1.42".
=back