cleanhtm.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:25 2010 from cleanhtm.pl 2006/09/09 1.9 KB.

#!/Perl
# cleanhtm.pl - 2006.09.09 - geoff mclane (geoffmclane.com)
# AIM: To clean certain items from a HTML document ...
use strict;
require 'logfile.pl' or die "ERROR: Can NOT load logfile.pl ...\n";
require 'htmltools.pl' or die "ERROR: Can NOT load htmltools.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );
# user variable
my $def_input = '..\javascript\messageontop.htm';
my $def_output = 'tempout.htm';
# program variable
my $in_file = $def_input;
my $out_file = $def_output;
$in_file = pop @ARGV if (@ARGV);
$out_file = pop @ARGV if (@ARGV);
prt( "Got input from [$in_file], output to [$out_file] ...\n" );
if ( ! -f $in_file) {
   mydie("OOPS: Can NOT locate [$in_file] ...\n");
}
open IF, "<$in_file" or mydie("OOPS: Can NOT open [$in_file] ...\n");
my @lines = <IF>;   # slurp it all in
close IF;
my $cnt = scalar @lines;
prt("Processing $cnt lines from [$in_file] ...\n");
my $txt = join("\n", @lines);
my $ccnt = length($txt);
prt("Or $ccnt characters from [$in_file] ...\n");
my $ntxt = htmlexpand($txt);
my $tlen = length($ntxt);
prt( "len=$tlen - Add font tag to new line ...\n");
$ntxt = tag2newline($ntxt,'font');
$tlen = length($ntxt);
prt( "len=$tlen - Add input tag to new line ...\n");
$ntxt = tag2newline($ntxt,'input');
$tlen = length($ntxt);
prt( "len=$tlen - Add form tag to new line ...\n");
$ntxt = tag2newline($ntxt,'form');
$tlen = length($ntxt);
prt( "len=$tlen - Add comments to new line ...\n");
$ntxt = comments2newline($ntxt);
$tlen = length($ntxt);
prt( "len=$tlen - left before trimblanks ...\n");
###$ntxt = trimblanklines($ntxt);
$ntxt = trimblanks($ntxt);
$ccnt = length($ntxt);
prt("Now $ccnt characters ...\n");
open OF, ">$out_file" or mydie("YEEK! Can NOT create [$out_file] ...\n");
print OF $ntxt;
close OF;
prt("Written $ccnt characters to [$out_file]...\n");
close_log($outfile,1);
exit(0);
# eof - cleanhtm.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional