stripcols.pl to HTML.

index -|- end

Generated: Mon Aug 16 14:14:39 2010 from stripcols.pl 2010/06/29 20.5 KB.

#!/perl -w
# NAME: stripcols.pl
# AIM: Strip the left number of columns off a file
# 29/06/2010 - Functionality check
# 03/11/2009 - Add more options 
# 2009/09/23  - geoff mclane - http://geoffair.net/mperl/
use strict;
use warnings;
use Cwd;
use File::Basename;
use Time::gmtime;
unshift(@INC, "C:\\GTools\\perl");
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
    my @tmpsp = split(/\\/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $perl_base = "C:\\GTools\\perl"; # perl directory
my $outfile = $perl_base."\\temp.$pgmname.txt";
open_log($outfile);

# FEATURES
my $load_log = 0;
my $dbg_load = 0; # add debug lines top and bottom, and repeat last line
my $rule_line = '12345678901234567890123456789012345678901234567890123456789012345678901234567890';
my $dbg_run = 0; # no arguments - run on defaults
my $html_out = 0; # output HTML
my $dbg2log = 0;  # add the output to the LOG file as well
my $skip_common = 0;    # avoid writing duplicate lines to output
my $dbg_sc01 = 0;
# PROGRAM VARIABLES
my $in_file = "temp.txt";
#my $in_file = "C:\\DTEMP\\temp2.txt";
my $out_file = $perl_base."\\tempoutf.txt";
my $columns = 0;
my $newtxt = '';
my @col_ranges = ();
my $beyond_col = 0;
my @warnings = ();
my @html_cols = ();
my $html_rtalign = 0;
my $top_skip = 0;
my $bot_skip = 0;
my $got_out = 0;    # if given out, and is text, set to load it...

sub prtw($) {
   my ($msg) = @_;
   prt($msg);
   $msg =~ s/\n$//;
   push(@warnings,$msg);
}

sub show_warnings() {
   if (@warnings) {
      prt( "Got ".scalar @warnings." WARNINGS...\n" );
      foreach my $msg (@warnings) {
         prt( "$msg\n" );
      }
   } else {
      prt( "No warnings issued.\n" );
   }
}

sub pgm_exit($$) {
   my ($val,$msg) = @_;
   show_warnings();
   if (length($msg)) {
      $msg .= "\n" if ( !($msg =~ /\n$/) );
      prt($msg);
   }
   close_log($outfile,$load_log);
   exit($val);
}

sub prt_help() {
    prt( "$pgmname - Version 0.0.1.\n" );
    prt( "$pgmname in_file columns [out_file] [-ll]\n" );
    prt( " in_file   - file to strip columns from.\n" );
    prt( " columns   - number of columns to strip from head of line.\n" );
    prt( " out_file  - Optional: file to write stripped output. (default=$out_file)\n" );
    prt( " -b <VAL>  - Strip line beyond this column VAL.\n" );
    prt( " -c <VAL>  - Columns to strip from beginning of lines, 0 to whatever.\n" );
    prt( " -cr <RNG> - Give column deletion range, BGN-END. Strip from BGN+1 to END.\n" );
    prt( " -i <FILE> - Alternate way to give INPUT file name.\n" );
    prt( " -o <FILE> - Alternate way to give OUTPUT file name.\n" );
    prt( " -htm      - Output into HTML table.\n" );
    prt( " -hc <VAL> - Split the table into columns at VAL. May use multiple columns.\n" );
    prt( " -rt <VAL> - Skip VAL rows at the top of the file.\n" );
    prt( " -rb <VAL> - Skip VAL rows at the bottom of the file.\n" );
    prt( " -ll       - Optional: load log at end in notepad. (default=No)\n" );
    prt( " -sc       - Skip duplicate lines in text output.\n");
    prt( "Note the output file is unconditionally written, but it is renamed to .old or .bak first!\n" );
}

sub get_YYYYMMDD($$) {
    my ($t,$sep) = @_;
    # sec, min, hour, mday, mon, year, wday, yday, and isdst.
    my $tm = gmtime($t);
    my $m = sprintf( "%04d/%02d/%02d", $tm->year() + 1900, $tm->mon() + 1, $tm->mday());
    $m =~ s/\//$sep/g if ($sep ne '/');
    return $m;
}

sub chomp_rule_line($$$) {
   my ($rrl,$bgn,$end) = @_;
   my $line = ${$rrl};
   my $len = length($line);
   my @arr = split('',$line);
   for (my $i = $bgn; $i < $end; $i++) {
      last if ($i >= $len);
      $arr[$i] = '_';
   }
   $line = join('',@arr);
   ${$rrl} = $line;
}

sub chomp_rule_line_end($$) {
   my ($rrl,$bc) = @_;
   my $line = ${$rrl};
   my $len = length($line);
   my @arr = split('',$line);
   for (my $i = $bc; $i < $len; $i++) {
      $arr[$i] = '_';
   }
   $line = join('',@arr);
   ${$rrl} = $line;
}

sub chomp_rule_line_bgn($$) {
   my ($rrl,$bc) = @_;
   my $line = ${$rrl};
   my $len = length($line);
   my @arr = split('',$line);
   for (my $i = 0; $i < $bc; $i++) {
      last if ($i >= $len);
      $arr[$i] = '_';
   }
   $line = join('',@arr);
   ${$rrl} = $line;
}


sub process_file($$$) {
    my ($fil,$cols,$rtxt) = @_;
    my ($ra, @lines, @nlns, $line, $skip, $len, $nl, $res);
    my $rc = scalar @col_ranges;
    my ($i,$bgn,$end,$off,$rl);
    my $tot_in = 0;
    my $tot_out = 0;
    my $mret = 0;
    if ( open( INF, "<$fil" ) ) {
        @lines = <INF>;
        close INF;
        prt( "Processing ".scalar @lines." lines, from [$fil]\n" );
        @nlns = ();
        push(@nlns,$rule_line) if ($dbg_load);
        $rl = $rule_line;
        my $done_rl = 0;
        my $last_line = '';
        foreach $line (@lines) {
            chomp $line;
            $len = length($line);
            $tot_in += $len + 2;
            $last_line = $line if (length(trim_all($line)));
            if ($beyond_col) {
               if ($beyond_col < $len) {
                  $line = substr($line,0,$beyond_col);
                  chomp_rule_line_end(\$rl,$beyond_col) if (!$done_rl);
               }
            }
            $len = length($line);
            if ($rc) {
               $nl = $line;
               $off = 0;
               for ($i = 0; $i < $rc; $i++) {
                  $bgn = $col_ranges[$i][0] - 1;
                  $end = $col_ranges[$i][1] - 1;
                  if ($end > $len) {
                      $end = $len;
                  }
                  if ($end <= $len) {
                     chomp_rule_line(\$rl,$bgn,$end) if (!$done_rl);
                     $nl = substr($line,0,$bgn-$off).substr($line,$end-$off);
                     $line = $nl;
                     $off += $end - $bgn;
                  } else {
                     prtw("WARNING: Range bgn=$bgn to end $end FAILED\n") if (!$done_rl);
                  }
               }
            }
            $len = length($line);
            $skip = ($len >= $cols) ? $cols : $len;
            $line = substr($line,$skip);
            chomp_rule_line_bgn(\$rl,$cols) if (!$done_rl);
            $len = length($line);
            $tot_out += $len + 2;
            push(@nlns,$line);
            $done_rl++;
        }

        if ($dbg_load) {
           push(@nlns,$last_line);
           push(@nlns,$rl);
           $last_line = "B=$cols";
           if ($rc) {
              $last_line .= ", $rc ranges ";
              for ($i = 0; $i < $rc; $i++) {
                  $bgn = $col_ranges[$i][0];
                  $end = $col_ranges[$i][1];
                  $last_line .= ' ' while (length($last_line) < ($bgn - 1));
                  $last_line .= "$bgn-$end ";
              }
           }
           if ($beyond_col) {
              $last_line .= ' ' while (length($last_line) < ($beyond_col-1));
              $last_line .= ", and beyond $beyond_col.";
           }
           push(@nlns,$last_line);
        }
        #$res = join("\n",@nlns);
        #$res .= "\n";
        #${$rtxt} = $res;
        ${$rtxt} = \@nlns;
        prt( "Done total $tot_in characters, and output $tot_out...\n");
        $mret = 1;
    } else {
        prtw("ERROR: Unable to open file [$fil]\n");
    }
    return $mret;
}

sub get_begin_table() {
   #my $bt = '   <table align="center" cellpadding="0" cellspacing="0" summary="List of lines">';
   my $bt = '   <table align="center" border="1" cellpadding="1" cellspacing="1" summary="List of lines">';
   $bt .= "\n";
   return $bt;
}

sub begin_html($) {
   my ($infile) = @_;
   my $txt = <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html>
 <head>
  <title>
   $infile column stripped
  </title>
  <style type="text/css">
  <!--
.smallx { font-size: x-small }
.small { font-size: small }
.medium { font-size: mediam }
-->
  </style>
 </head>
 <body>
  <div class="smallx">
EOF
   return $txt
}

sub is_my_file_type($) {
   my ($fil) = shift;
   return 1 if ($fil =~ /\.m2ts$/);
   return 0;
}

sub get_file_type_txt($) {
   my ($txt) = shift;
   $txt = "<a href=\"wedding/$txt\">$txt</a>";
   return $txt;
}

sub write_output($$$) {
   my ($in,$out,$rtarr) = @_;
   my ($ntxt,$len,$i);
   my ($line,$bgn,$txt,$end,$tdtxt);
   my ($j,$off,$pbgn,$td);
   my $lncnt = scalar @{$rtarr};
   my $hlcnt = scalar @html_cols;
   my $lnout = 0;
   my %hash = ();
   if ($lncnt && ($lncnt > $top_skip)) {
      if ($html_out) {
         prt("[dbg_sc01] Output the lines in a HTML shell, $lncnt lines, to file [$out], top_skip=$top_skip bot_skip=$bot_skip\n") if ($dbg_sc01);
         $ntxt = '';
         $tdtxt = ($html_rtalign ? "<td align=\"right\">" : "<td>");
         for ($i = $top_skip; $i < $lncnt; $i++) {
            last if (($lncnt - $i) < $bot_skip);
            $line = ${$rtarr}[$i];
            $len = length($line);
            $ntxt .= "<tr>\n";
            $lnout++;
            if ($hlcnt) {
               $off = 0;
               $bgn = 0;
               for ($j = 0; $j < $hlcnt; $j++) {
                  $td = $tdtxt;
                  $pbgn = $bgn;
                  $bgn = $html_cols[$j];
                  if ($bgn < $len) {
                     $txt = substr($line,$off,$bgn-$off);
                     $end = substr($line,$bgn);
                  } else {
                     $txt = $line;
                     $end = '&nbsp;';
                  }
                  $txt = trim_all($txt);
                  $off += $bgn - $pbgn;
                  if ($txt =~ /^(\d|,)+$/) {
                     $td = "<td align=\"right\">";
                  } elsif ($txt =~ /^(\d|,)+\w{1}B$/) {
                     $td = "<td align=\"right\">";
                  } elsif ( is_my_file_type($txt) ) {
                     $txt = "<a href=\"wedding/$txt\">$txt</a>";
                  }
                  $ntxt .= "$td\n";
                  $ntxt .= "$txt\n";
                  $ntxt .= "</td>\n";
               }
               $txt = trim_all($end);
               if (length($txt) == 0) {
                  $txt = '&nbsp;';
               } elsif ($txt =~ /^(\d|,)+$/) {
                  $td = "<td align=\"right\">";
               } elsif ($txt =~ /^(\d|,)+\w{1}B$/) {
                  $td = "<td align=\"right\">";
               } elsif ( is_my_file_type($txt) ) {
                  $txt = get_file_type_txt($txt);  # like "<a href=\"wedding/$txt\">$txt</a>";
               }
               $ntxt .= "$td\n";
               $ntxt .= "$txt\n";
            } else {
               $line = trim_all($line);
               $ntxt .= "$line\n";
            }
            $ntxt .= "</td>\n";
            $ntxt .= "</tr>\n";
         }
         if ( length($ntxt) ) {
            $ntxt = begin_html($in).get_begin_table().$ntxt;
            $ntxt .= "</table>\n";
            $ntxt .= "</div>\n";
            $ntxt .= "<!-- generated by $pgmname on ".get_YYYYMMDD(time(),'/')." -->\n";
            $ntxt .= "</body>\n";
            $ntxt .= "</html>\n";
         }
      } else {
         for ($i = $top_skip; $i < $lncnt; $i++) {
            last if (($lncnt - $i) < $bot_skip);
            $line = ${$rtarr}[$i];
            $len = length($line);
            if ($skip_common) {
                if ($len && !(defined $hash{$line})) {
                    $hash{$line} = 1;
                    $ntxt .= "$line\n";
                    $lnout++;
                }
            } else {
                $ntxt .= "$line\n";
                $lnout++;
            }
         }
         $ntxt .= "\n" if ( length($ntxt) );
      }
      $len = length($ntxt);
      if ($len) {
         rename2oldbak($out);
         write2file($ntxt,$out);
         prt( "Written $out, with $lnout ($lncnt) lines, $len chars...\n" );
         if ($dbg2log) {
            prt("12345678901234567890123456789012345678901234567890123456789012345678901234567890\n");
            prt($ntxt);
            prt("12345678901234567890123456789012345678901234567890123456789012345678901234567890\n");
         }
         system($out) if ($got_out);
      } else {
         prt("ERROR: Got no NEW text!\n");
      }
   } else {
      if ($lncnt) {
         prt("WARNING: Got no out lines! $top_skip > $lncnt!\n");
      } else {
         prt("ERROR: Got no NEW lines!\n");
      }
   }
}

# ================================================
# MAIN
parse_args(@ARGV);
if ( process_file($in_file, $columns, \$newtxt) ) {
   write_output($in_file,$out_file,$newtxt);
   pgm_exit(0,"Normal exit");
} else {
   pgm_exit(1,"Error exit");
}
# ================================================

################################
sub parameter_verification {
   my ($rc,$i,$bgn,$end,$j,$bgn2,$end2,$i2,$j2);
   $rc = scalar @col_ranges;
   if ($rc) {
      for ($i = 0; $i < $rc; $i++) {
         $i2 = $i + 1;
         $bgn = $col_ranges[$i][0] - 1;
         $end = $col_ranges[$i][1] - 1;
         if ($bgn <= $columns) {
             mydie("ERROR: BAD Range $i2 $bgn - $end <= $columns!\n");
         }
         if ($beyond_col && ($end >= $beyond_col)) {
             mydie("ERROR: BAD Range $i2 $bgn - $end >= $beyond_col beyond column!\n");
         }
         if ($bgn < $end) {
            for ($j = ($i + 1); $j < $rc; $j++) {
               $j2 = $j + 1;
               $bgn2 = $col_ranges[$j][0] - 1;
               $end2 = $col_ranges[$j][1] - 1;
               if ($bgn2 > $end) {
                  # that's ok
               } else {
                  mydie("ERROR: BAD Range $i2 [ $bgn - $end ] conflicts with $j2 [ $bgn2 - $end2 ]!\n");
               }
            }
         } else {
            mydie("ERROR: Range values out of order $bgn GT or EQU $end\n");
         }
      }
   }
}

sub need_arg {
   my ($a,@b) = @_;
   mydie("ERROR: Argument [$a] needs a following argument!\n")
      if (! @b);
}

sub parse_args {
    my (@av) = @_;
    my $cnt = scalar @av;
    prt( "Parsing $cnt arguments...\n" );
    $cnt = 0;
    my (@arr,$flag);
    $flag = 0;
    my $abort = 0;
    while (@av) {
        my $arg = $av[0];
        if ($arg =~ /^-/) {
            if ($arg eq '-ll') {
                $load_log = 1;
                prt( "Set to load log at end\n" );
            } elsif (($arg eq '-?') || ($arg eq '-h') || ($arg eq '--help') ||
                ($arg eq '/?') || ($arg eq '/h') || ($arg eq '/help')) {
                prt_help();
                exit(0);
            } elsif ($arg eq '-c') {
               need_arg(@av);
               shift @av;
               $arg = $av[0];
               if ($arg =~ /^\d+$/) {
                   $columns = $arg;
                   prt("Set columns to strip to [$columns]\n");
                   $flag |= 2;
               } else {
                   prt("ERROR: -c must be followed by a number 0 to ...! Got [$arg]\n");
                   $abort++;
               }
            } elsif ($arg eq '-cr') {
               need_arg(@av);
               shift @av;
               $arg = $av[0];
               @arr = split('-',$arg);
               if ((scalar @arr == 2)&&($arr[0] =~ /^\d+$/)&&($arr[1] =~ /^\d+$/)) {
                  if ($arr[0] < $arr[1]) {
                     push(@col_ranges, [ $arr[0], $arr[1] ]);
                     prt("Set strip range ".scalar @col_ranges." to $arr[0] -to- $arr[1]\n");
                  } else {
                     prt("ERROR: -cr range must be in order, and 1st less than 2nd, like 10-20! Got [$arg]\n");
                     $abort++;
                  }
               } else {
                  prt("ERROR: -cr must be followed by range, in form, like '10-20'! Got [$arg]\n");
                  $abort++;
               }
            } elsif ($arg eq '-i') {
               need_arg(@av);
               shift @av;
               $arg = $av[0];
               if (-f $arg) {
                  $in_file = $arg;
                  prt("Set in file to [$in_file]\n");
                  $flag |= 1;
               } else {
                  prt("ERROR: -i must be followed by valid file name! Got [$arg]\n");
                  $abort++;
               }
            } elsif ($arg eq '-o') {
               need_arg(@av);
               shift @av;
               $arg = $av[0];
               $out_file = $arg;
               prt("Set out file to [$out_file]\n");
               $flag |= 4;
            } elsif ($arg eq '-b') {
               need_arg(@av);
               shift @av;
               $arg = $av[0];
               if ($arg =~ /^\d+$/) {
                  $beyond_col = $arg;
                  prt("Set beyond column $beyond_col...\n");
               } else {
                  prt("ERROR: -b must be followed by a number, like -b 75!\n");
                  $abort++;
               }
            } elsif ($arg eq '-d') {
               $dbg_load = 1;
               prt("Set dbg_load - adds debug line to top and bottom.\n");
            } elsif ($arg =~ /^-htm/) {
               $html_out = 1;
               prt("Set HTML output\n");
            } elsif ($arg eq '-hr') {
               $html_rtalign = 1;
               prt("Set right align on columns.\n");
            } elsif ($arg eq '-hc') {
               need_arg(@av);
               shift @av;
               $arg = $av[0];
               if ($arg =~ /^\d+$/) {
                  push(@html_cols,$arg);
                  prt("Set HTML column ".scalar @html_cols." to $arg.\n");
               } else {
                  prt("ERROR: -hc must be followed by a column number, like -hc 75!\n");
                  $abort++;
               }
            } elsif ($arg eq '-sc') {
                $skip_common = 1;
                prt("Set to skip repeated lines.\n");
            } elsif ($arg eq '-rt') {
               need_arg(@av);
               shift @av;
               $arg = $av[0];
               if ($arg =~ /^\d+$/) {
                  $top_skip  = $arg;
                  prt("Skip $top_skip rows at TOP.\n");
               } else {
                  prt("ERROR: -rt must be followed by a row count, like -rc 2!\n");
                  $abort++;
               }
            } elsif ($arg eq '-rb') {
               need_arg(@av);
               shift @av;
               $arg = $av[0];
               if ($arg =~ /^\d+$/) {
                  $bot_skip  = $arg;
                  prt("Skip $bot_skip rows at BOTTOM.\n");
               } else {
                  prt("ERROR: -rt must be followed by a row count, like -rc 2!\n");
                  $abort++;
               }
            } else {
                prt("ERROR: Unkknown argument [$arg]\n");
                $abort++;
            }
        } else {
            $cnt++;
            # if ($cnt == 1) {
            if ( !($flag & 1) ) {
                $in_file = $arg;
                prt("Set in file to [$in_file]\n");
                $flag |= 1;
            } elsif ( !($flag & 2) ) {
               #} elsif ($cnt == 2) {
               $columns = $arg;
               prt("Set columns to strip to [$columns]\n");
               $flag |= 2;
            } elsif ( !($flag & 4) ) {
               # } elsif ($cnt == 3) {
               $out_file = $arg;
               prt("Set out file to [$out_file]\n");
               $flag |= 4;
               $got_out = 1 if ($out_file =~ /\.txt$/i);
            } else {
                prt("ERROR: Unkknown argument [$arg]\n");
                $abort++;
            }
        }
        shift @av;
    }

   mydie("Aborting...\n") if ($abort);

   parameter_verification();

   if ($dbg_run) {
      $html_out = 1;
#      push(@html_cols,6);
#      push(@html_cols,18);
#      push(@html_cols,38);
#      push(@html_cols,44);
#      #$dbg2log = 1;
      $load_log = 1;
      $out_file = 'temphtml.htm';
#      $dbg_load = 0; # add debug lines top and bottom, and repeat last line
      $in_file = 'temp.txt';
      $columns = 3;
      #push(@col_ranges, [ $arr[0], $arr[1] ]);
      $beyond_col = 62;
      push(@html_cols,34);
      push(@html_cols,50);
   } else {
       if ( !(($flag & 1)&&($flag & 2)) ) {
           prt_help();
           exit(1);
       } elsif ( !($flag & 4) ) {
           prt("Using default out file of [$out_file]\n");
       }
    }
    prt("In file [$in_file], columns=$columns, out to [$out_file]\n") if ($dbg_run || $dbg_load);
}

# eof - stripcols.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional