fav-04b.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:31 2010 from fav-04b.pl 2006/07/15 4.9 KB.

#!/Perl
# AIM: Load broken HTML report from front page ...
require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
open_log($outfile);
prt( "$0 ... Hello, World...\n" );
my $b_infile = 'c:\HOMEPAGE\Broken02.htm';
open INF, "<$b_infile" or die "Unable to open $b_infile ...\n";
my @b_lines = <INF>; # slurp it all in ...
close INF;
my @b_row1 = ('Status', 'Hyperlink', 'In Page', 'Page Title', 'Modified By');
my $b_icnt = scalar @b_row1;
my $b_lc = scalar @b_lines;
prt( "Got $b_lc lines to process ...\n" );
my $b_line = '';
my ($b_len, $b_i, $b_ch);
my $b_intag = 0;
my $b_tag = '';
my @b_arr = ();
my $b_intab = 0;
my $b_intr = 0;
my $b_intd = 0;
my $b_tdcnt = 0;
my $b_part = '';
my $b_text = '';
my $b_row = 0;
my @b_all = ();
my @b_brokenf = ();
my @b_brokeno = ();
my @b_ok = ();
my $b_status = '';
my $b_hyper = '';
my $b_page = '';
my $b_title = '';
my $b_modby = '';
my $b_bcnt = 0;
my $b_bcntf = 0;
my $b_bcntfd = 0;
my $b_bcnto = 0;
my $b_ocnt = 0;
my $b_ucnt = 0;
foreach $b_line (@b_lines) {
   chomp $b_line;
   $b_len = length($b_line);
   for ($b_i = 0; $b_i < $b_len; $b_i++) {
      $b_ch = substr($b_line, $b_i, 1);
      if ($b_intag) {
         if ($b_ch eq '>') {
            $b_intag = 0;
            @b_arr = split(/ /, $b_tag);
            $b_part = $b_arr[0];
            if ($b_intab) {
               if ($b_part eq '/table') {
                  prt( "Got table close ...\n" );
                  $b_intab = 0;
               } elsif ($b_part eq 'tr') {
                  $b_row++;
                  prt( "Got <TR> ... row $b_row ...\n" );
                  $b_intr = 1;
                  $b_tdcnt = 0;
               } elsif ($b_part eq '/tr') {
                  prt( "Got </TR> ...\n" );
                  $b_intr = 0;
               } elsif ($b_intr) {
                  if ($b_part eq 'td') {
                     $b_intd = 1;
                     $b_tdcnt++;
                     prt( "Got <TD> $b_tdcnt...\n" );
                  } elsif ($b_part eq '/td') {
                     prt( "Got </TD> $b_tdcnt...text=$b_text\n" );
                     if ($b_row == 1) {
                        if ($b_tdcnt && ($b_tdcnt <= $b_icnt)) {
                           if ($b_text ne $b_row1[$b_tdcnt-1]) {
                              prt( "Oops row=$b_row tdcnt=$b_tdcnt [".$b_text."] ne [".$b_row1[$b_tdcnt-1]."]!\n");
                              mydie( "ERROR: Does not appear the correct HTML file ...\n" );
                           }
                        }
                     } else {
                        # greater than row 1
                        if ($b_tdcnt == 1) {
                           $b_status = $b_text;
                        } elsif ($b_tdcnt == 2) {
                           $b_hyper = $b_text;
                        } elsif ($b_tdcnt == 3) {
                           $b_page = $b_text;
                        } elsif ($b_tdcnt == 4) {
                           $b_title = $b_text;
                        } elsif ($b_tdcnt == 5) {
                           $b_modby = $b_text;
                           push(@b_all, [$b_status, $b_hyper, $b_page, $b_title, $b_modby]);
                           prt( "push(\@b_all, [$b_status, $b_hyper, $b_page, $b_title, $b_modby]) ..\n" );
                        }
                     }
                     $b_intd = 0;
                     $b_text = '';
                  }
               }
            } else {
               if ($b_part eq 'table') {
                  prt( "Got table begin ...\n" );
                  $b_intab = 1;
               }
            }
         } else {
            $b_tag .= $b_ch;
         }
         next;
      } elsif ($b_ch eq '<') {
         $b_intag = 1;
         $b_tag = '';
         next;
      }
      if ($b_intab && $b_intr && $b_intd) {
         $b_text .= $b_ch;
      }
   } # for line length
}
$b_icnt = scalar @b_all;
prt( "Got $b_icnt table entries ...\n" );
# push(@b_all, [$b_status, $b_hyper, $b_page, $b_title, $b_modby]);
for ($b_i = 0; $b_i < $b_icnt; $b_i++) {
   $b_text = $b_all[$b_i][0];
   $b_status = $b_all[$b_i][0];
   $b_hyper = $b_all[$b_i][1];
   $b_page = $b_all[$b_i][2];
   $b_title = $b_all[$b_i][3];
   $b_modby = $b_all[$b_i][4];
   prt( "Extracted [$b_status, $b_hyper, $b_page, $b_title, $b_modby] ...\n" );
   if ($b_status eq 'Broken') {
      $b_bcnt++;
      if (($b_page eq 'favorites.htm') || ($b_title =~ /List of Favorites/)) {
         $b_bcntf++;
         if ( in_brokenf($b_hyper) ) {
            $b_bcntfd++;
         } else {
            push(@b_brokenf, $b_hyper);
         }
      } else {
         $b_bcnto++;
         if ( in_brokeno($b_hyper) ) {
            $b_bcntod++;
         } else {
            push(@b_brokeno, $b_hyper);
         }
      }
   } elsif ($b_status eq 'OK') {
      $b_ocnt++;
   } else {
      $b_ucnt++;
   }
}
prt( "Got bcnt=$b_bcnt ($b_bcntf + $b_bcnto = ".($b_bcntf + $b_bcnto).") dupes=$b_bcntfd ocnt=$b_ocnt other=$b_ucnt ...\n" );
prt( "Total ".($b_bcnt + $b_ocnt)." of $b_icnt ...\n" );
prt( "my \@fav_broken = (\n" );
$b_i = 0;
foreach $b_text (@b_brokenf) {
   if ($b_i) {
      prt( ",\n" );
   }
   prt( " '$b_text'" );
   $b_i++;
}
prt( ");\n" );
prt( "my \@oth_broken = (\n" );
$b_i = 0;
foreach $b_text (@b_brokeno) {
   if ($b_i) {
      prt( ",\n" );
   }
   prt( " '$b_text'" );
   $b_i++;
}
prt( ");\n" );
close_log($outfile,1);
exit(0);
##################################
### subs only
sub in_brokenf {
   my ($h) = shift;
   foreach my $l (@b_brokenf) {
      if ($l eq $h) {
         return 1;
      }
   }
   return 0;
}
sub in_brokeno {
   my ($h) = shift;
   foreach my $l (@b_brokeno) {
      if ($l eq $h) {
         return 1;
      }
   }
   return 0;
}
# eof - fav-04b.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional