Skip Menu |

Preferred bug tracker

Please visit the preferred bug tracker to report your issue.

This queue is for tickets about the HTML-Tidy CPAN distribution.

Report information
The Basics
Id: 7254
Status: rejected
Priority: 0/
Queue: HTML-Tidy

People
Owner: Nobody in particular
Requestors: aaronp [...] classmates.com
Cc:
AdminCc:

Bug Information
Severity: (no value)
Broken in: (no value)
Fixed in: (no value)



Date: Wed, 4 Aug 2004 11:29:32 -0700
From: Aaron Patterson <aaronp [...] classmates.com>
To: bug-html-tidy [...] rt.cpan.org
Subject: tidy->clean bug with patch
Here's a snippet of code to repro the bug, it produces an 'Illegal instruction' error (a proposed patch is below): #!/usr/bin/perl -w use strict; use HTML::Tidy; my $html = do { local $/; <DATA> }; my $tidy = new HTML::Tidy; $tidy->clean($html); my @mess = map { $_ ? $_->as_string() : undef } $tidy->messages(); __DATA__ <form action="http://www.alternation.net/cobra/index.pl"> <td><input name="random" type="image" value="random creature" src="http://www.creaturesinmyhead.com/images/random.gif"></td> </form> __END__ Here's a patch I made to get rid of the illigal instruction, and provide messaging returned from the 'clean()' method: diff -cNr --exclude=Makefile old_html/HTML-Tidy-1.04/lib/HTML/Tidy.pm new_html/HTML-Tidy-1.04/lib/HTML/Tidy.pm *** old_html/HTML-Tidy-1.04/lib/HTML/Tidy.pm 2004-05-12 13:18:54.000000000 -0700 --- new_html/HTML-Tidy-1.04/lib/HTML/Tidy.pm 2004-07-27 12:13:08.101379320 -0700 *************** *** 167,201 **** my $self = shift; my $filename = shift; - my $parse_errors; my $html = join( "", @_ ); my $errorblock = _tidy_messages( $html ); return unless defined $errorblock; - my @lines = split( /\012/, $errorblock ); - for my $line ( @lines ) { - chomp $line; - - my $message; - if ( $line =~ /^line (\d+) column (\d+) - (Warning|Error): (.+)$/ ) { - my $type = ($3 eq "Warning") ? TIDY_WARNING : TIDY_ERROR; - $message = HTML::Tidy::Message->new( $filename, $type, $1, $2, $4 ); - - } elsif ( $line =~ /^\d+ warnings?, \d+ errors? were found!/ ) { - # Summary line we don't want - - } elsif ( $line eq "No warnings or errors were found." ) { - # Summary line we don't want - - } else { - warn "Unknown error type: $line"; - ++$parse_errors; - } - push( @{$self->{messages}}, $message ) if $self->_is_keeper( $message ); - } ! return !$parse_errors; } =head2 clean( $str [, $str...] ) --- 167,222 ---- my $self = shift; my $filename = shift; my $html = join( "", @_ ); my $errorblock = _tidy_messages( $html ); return unless defined $errorblock; ! return !$self->_parse_errors($filename, $errorblock); ! } ! ! sub _parse_errors { ! my $self = shift; ! my $filename = shift; ! my $errs = shift; ! ! my $parse_errors; ! ! my @lines = split( /\012/, $errs ); ! for my $line ( @lines ) { ! chomp $line; ! ! my $message; ! if ( $line =~ /^line (\d+) column (\d+) - (Warning|Error): (.+)$/ ) { ! my $type = ($3 eq "Warning") ? TIDY_WARNING : TIDY_ERROR; ! $message = HTML::Tidy::Message->new( $filename, $type, $1, $2, $4 ); ! ! } elsif ( $line =~ /^\d+ warnings?, \d+ errors? were found!/ ) { ! # Summary line we don't want ! ! } elsif ( $line eq "No warnings or errors were found." ) { ! # Summary line we don't want ! ! } elsif ( $line eq "This document has errors that must be fixed before" ){ ! # Summary line we don't want ! ! } elsif ( $line eq "using HTML Tidy to generate a tidied up version." ){ ! # Summary line we don't want ! ! } elsif ( $line =~ m/^Info:/ ) { ! # Info line we don't want ! ! } elsif ( $line =~ m/^\s*$/ ) { ! # Blank line we don't want ! ! } else { ! warn "Unknown error type: $line"; ! ++$parse_errors; ! } ! push( @{$self->{messages}}, $message ) if $self->_is_keeper( $message ); ! } ! return $parse_errors; } =head2 clean( $str [, $str...] ) *************** *** 209,215 **** sub clean { my $self = shift; ! return _tidy_clean(join( "", @_ )); } --- 230,239 ---- sub clean { my $self = shift; ! my ($cleaned, $errbuf) = _tidy_clean(join( "", @_ )); ! ! $self->_parse_errors('', $errbuf); ! return $cleaned; } diff -cNr --exclude=Makefile old_html/HTML-Tidy-1.04/Tidy.xs new_html/HTML-Tidy-1.04/Tidy.xs *** old_html/HTML-Tidy-1.04/Tidy.xs 2004-04-01 21:36:19.000000000 -0800 --- new_html/HTML-Tidy-1.04/Tidy.xs 2004-07-27 13:47:07.415073328 -0700 *************** *** 38,76 **** RETVAL ! SV * _tidy_clean(input) INPUT: char *input ! CODE: TidyBuffer errbuf = {0}; - TidyDoc tdoc = tidyCreate(); // Initialize "document" TidyBuffer output = {0}; ! int rc; ! rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, input ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair(tdoc); if ( rc >= 0) rc = tidySaveBuffer( tdoc, &output ); ! if ( rc > 1 ) // If error, force output. ! rc = tidyOptSetBool( tdoc, TidyForceOutput, yes ) ? rc : -1; if ( rc >= 0 ) { char *str; str = (char *)output.bp; if ( str ) ! RETVAL = newSVpvn( str, strlen(str) ); ! tidyBufFree( &output ); } else { XSRETURN_UNDEF; } tidyBufFree( &errbuf ); tidyRelease( tdoc ); - OUTPUT: - RETVAL - --- 38,80 ---- RETVAL ! void _tidy_clean(input) INPUT: char *input ! PPCODE: TidyBuffer errbuf = {0}; TidyBuffer output = {0}; ! TidyDoc tdoc = tidyCreate(); // Initialize "document" ! int rc = -1; ! rc = tidyOptSetInt( tdoc, TidyWrapLen, 0 ); ! if(rc >= 0) ! rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, input ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair(tdoc); + if ( rc > 1 ) + rc = tidyOptSetBool( tdoc, TidyForceOutput, yes ) ? rc : -1; if ( rc >= 0) rc = tidySaveBuffer( tdoc, &output ); ! if ( rc >= 0) ! rc = tidyRunDiagnostics( tdoc); if ( rc >= 0 ) { char *str; str = (char *)output.bp; if ( str ) ! XPUSHs(sv_2mortal(newSVpvn(str, strlen(str)))); ! ! if(errbuf.bp) ! XPUSHs(sv_2mortal(newSVpvn(errbuf.bp, strlen(errbuf.bp)))); } else { XSRETURN_UNDEF; } + tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc );