genindex3.pl to HTML.

Generated: Mon Aug 16 14:14:21 2010 from genindex3.pl 2010/08/04 10.1 KB.
#!/Perl
# AIM: genindex3.pl
# to read a FOLDER, find ALL the HTML files there, and build a LIST
# see genindex2.pl if you want -
# to read a FOLDER, find the INDEX HTML file there, and build a LIST of LINK from it
#
# From that list of ALL HTM files - output name and title
# FIX20060626 - added option to build list into an Array, giving lnk,file date,title ...
# of form 
#    var ma = new Array(
#      new item( "index.htm", "Index", "Link to main index" ),
#      new item( "fgfs-026.htm", "2006-06-06", "FlightGear 0.9.10 with MSVC8"),
# to get the file data,  using
# ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
#       $atime,$mtime,$ctime,$blksize,$blocks)
#           = stat($filename);
# or
# my $sa = stat($filename);
# my $tm = scalar locatime $sa->$mtime;

use File::stat;

print "$0 ... Hello, World...\n";
# this should come from the command line, or an INPUT FILE
my @indexs = ("index.htm", "index.html", "index.php");
my @in_excl = ();
my $def_folder = 'C:\HOMEPAGE\GA\fg\srczips';
#my $def_folder = 'C:\HOMEPAGE\GA\java';
##my $def_folder = 'C:\HOMEPAGE\Max5\test';
##my $def_folder = 'C:\HOMEPAGE\P26\travel';
##my $def_folder = 'c:\HOMEPAGE\P26\fg';
##my $def_folder = 'c:\HOMEPAGE\P26\html';
##my $def_folder = 'c:\HOMEPAGE\public_html\unix';
my $in_dir; # = shift || die "ERROR: Must give input folder ...\n";
my $out_file = 'temp.'.$0.'.txt';
my $htmfile = 'tempindex.htm';
my $OH;
my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my @newarr = (); # contents of files found, as mdata|$file|$dtt|$title
open $OH, ">$out_file" || die "ERROR: Can NOT create output file ... aborting ...\n";
my $write_log = 1;

my $html_bgn = <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 
"http://www.w3.org/TR/html4/loose.dtd">

<html>
<head>
<meta http-equiv="Content-Language" content="en-gb">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<meta name="generator" content="genindex3.pl">
<title>New Index</title>
<style type="text/css">
<!-- /* some style 2006.10.20 */
body {
   margin:0cm 1cm;
   background-image:url('../clds3.jpg');
}
h1{
   background:#efefef;
   border-style:solid;
   border-color:#d9e2e2;
   border-width:1px;
   padding-top:2px;
   padding-bottom:2px;
   padding-left:2px;
   padding-right:2px;
   font-size:200%;
   text-align:center;
}
h2 { font-size: 12pt; font-weight: bold; background-color: #CCCCFF }
.ctr { text-align:center; }
-->
</style>
</head>

<body>
<h1>New Index</h1>
<p class="ctr"><a href="../home2.htm">home</a></p>
<div align="center">
  <center>
  <table border="2" cellpadding="2" bordercolor="#0000FF" id="Num1" summary="Index of links">
    <tr>
      <th>Link</th>
      <th>Title</th>
      <th>Date</th>
    </tr>

EOF

my $html_end = <<EOF;
  </table>
  </center>
</div>
<p class="ctr"><a href="../home2.htm">home</a></p>
</body>
</html>
EOF

parse_arguments(@ARGV);

prt ("Processing directory $in_dir ...\n");

opendir( DIR, $in_dir) || die "ERROR: Can NOT open $in_dir ... aborting ...\n";
my @files = readdir(DIR);
closedir DIR;

prt ("Found ".scalar @files." items in the directory ...\n");
my $file;
my $filcnt = 0;
my @titles = ();
my @links = ();
my @htmlist = ();
# first pass - find the file that will give us the ORDER
my $got_ind = 0;
my $ind_file = '';
my $line = '';
my @loc_files = ();
foreach $file (@files) {
   if (($file eq '.')||($file eq '..')) {
      next;
   }
   my $ff = $in_dir . '/' . $file;
   if ( -d $ff ) {
      #prt( "Ignore Directory $file ...\n");
   } else {
      if (is_my_file($file)) {
         push(@htmlist, $file);
      }
   }
}
$got_ind = scalar @htmlist;
if (! $got_ind) {
   prt( "ERROR: Unable to locate any HTML files .../n" );
   die "aborting .../n";
} else {
   prt( "Found $got_ind HTML files in folder $in_dir ... processing ...\n" );
}

# building up a TITLE array
foreach $file (@htmlist) {
   my $ff = $in_dir.'/'.$file;
   if (open $IF, "<$ff") {
      close($IF);
         my $sb = stat($ff);
         my $tit = get_title($ff);
         if (length($tit) == 0) {
            $tit = $file;
         }
         push(@titles, "$tit|$file");
         ### prt( "\$mypages[$filcnt] = \"$file|$tit\";\n");
         ### FIX20060626 - add other type of output ...
         my $tm = scalar localtime $sb->mtime;
         ###prt( "File $file, title = $tit, date = $tm \n" );
         my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005'
         my $ac = scalar @arr;
         my $doff = 2;
         my $yoff = 4;
         if ($ac == 6) {
            $doff++;
            $yoff++;
         }
         if (($ac == 5)||($ac == 6)) {
            my $mn = mth_to_num( $arr[1] );
            my $mnn = 0;
            if ($mn) {
               if( $mn < 10 ) {
                  $mnn = '0'.$mn;
               } else {
                  $mnn = ''.$mn;
               }
            } else {
               $mnn = '??';
            }
            my $dn = $arr[$doff];
            if ($dn < 10) {
               $dn = '0'.$dn;
            }
            my $dtt = $arr[$yoff].'/'.$mnn.'/'.$dn;
            ###my $ent = "$sb->mtime|$file|$dtt|$tit";
            my $ent = $sb->mtime."|$file|$dtt|$tit";
            prt( "Got '$arr[4]/$arr[1]/$arr[2]' ... $dtt ... $ent ...\n" );
            push(@newarr, $ent);
         } else {
            prt( "\n*********************************\n");
            prt( "WARNING: Split NOT 5 or 6!!!\n" );
            prt( "**********************************\n");
            die "SPLIT OF TIME FAILED!!!\n";
         }
         ###prt("\n");
         $filcnt++;

   } else {
      prt( "WARNING: Could not open [$ff]!\n" );
   }
}

$got_ind = scalar @newarr;
if (! $got_ind) {
   prt( "ERROR: Failed to generate HTML list .../n" );
   die "aborting .../n";
} else {
   prt( "Got array of $got_ind entries ... processing ...\n" );
}

my $fc = 0;
### now I want to OUTPUT in the REVERSE DATE ORDER
##################################################
prt("    var ma = new Array(\n");
#      new item( "index.htm", "Index", "Link to main index" ),
#      new item( "fgfs-026.htm", "2006-06-06", "FlightGear 0.9.10 with MSVC8"),
foreach $file (reverse sort @newarr) {
   my @arr = split( /\|/, $file );
   if (scalar @arr == 4) {
      if ($fc) {
         prt( ",\n" );
      }
      prt("        new item( \"$arr[1]\", \"$arr[2]\", \"$arr[3]\" )");
   } else {
      prt("WARNING: Split error on $file ???\n");
   }
   $fc++;
}
prt("\n   );\n");
### now OUTPUT as a HTML table, in reverse date order
#####################################################
write_html( $htmfile );


close_log();
exit(0);
### end of program ###

sub write_html {
   my ($fil) = shift;
   open WOF, ">$fil" or mydie("ERROR: Unable to open $fil! $!\n");
   print WOF $html_bgn;
   foreach $file (reverse sort @newarr) {
      my @arr = split( /\|/, $file );
      if (scalar @arr == 4) {
         print WOF "    <tr>\n";
         print WOF "      <td><a href=\"$arr[1]\">$arr[1]</a></td>\n";
         print WOF "      <td>$arr[3]</td>\n";
         print WOF "      <td>$arr[2]</td>\n";
         print WOF "    </tr>\n";
      } else {
         prt("WARNING: Split error on $file ???\n");
      }
   }
   print WOF $html_end;
   close WOF;
   system( $fil );
}

## month to number
sub mth_to_num {
   my ($mth) = shift;
   my $cnt = 0;
   ###prt( "Chk [$mth] " );
   foreach my $m (@mths) {
      $cnt++;
      if ($m eq $mth) {
         ###prt( "Is $m - return $cnt\n" );
         return $cnt;
      }
      ###else {
      ###   prt( "Not [$m] " );
      ###}
   }
   prt( "WARNING: Returning 0!!!\n" );
   return 0;
}

sub get_title {
   my ($f) = @_;
   open $IF, "<$f" or die "Can not OPEN $f! ... aborting ...\n";
   my @lines = <$IF>; # slurp whole file, to an array of lines
   close($IF);
   my $titln = '';
   my $intit = 0;
   foreach my $ln (@lines) {
      chomp $ln;
      if ($intit) {
         if ($ln =~ /<\/title>/i) {
            $titln .= ' '.$ln;
            $intit = 0;
         } else {
            $titln .= ' '.$ln;
         }
      } elsif ( $ln =~ /<title/i ) {
         if ($ln =~ /<\/title>/i) {
            $titln = $ln;
         } else {
            $titln = $ln;
            $intit = 1;  # stay here until end
         }
      } else {
         
      }
   }
   if (length($titln)) {
      # strip <title
      $pos = index(uc($titln),'<TITLE>');
      if ($pos != -1) {
         $titln = substr($titln, ($pos + 7));
         $pos = index(uc($titln), '</TITLE>');
         if ($pos > 0) {
            $titln = substr($titln,0, $pos);
         }
      }
   }
   while(length($titln)) {
      if (substr($titln,0,1) eq ' ') {
         $titln = substr($titln,1);
      } else {
         last;
      }
   }
   while (substr($titln,-1) eq ' ') {
      $titln = substr($titln,0, length($titln)-1);
   }
   return $titln;
}

sub is_my_file {
   my ($f) = @_;
   my $ret = 0;
   if ($f =~ /(.*)\.htm$/i) {
      $ret = 1;
   } elsif ($f =~ /(.*)\.html$/i) {
      $ret = 1;
   } elsif ($f =~ /(.*)\.shtml$/i) {
      $ret = 1;
   } elsif ($f =~ /(.*)\.php$/i) {
      $ret = 1;
   }
   if ($ret) {
      foreach my $f2 (@in_excl) {
         ### prt("Comparing ".uc($f2)." with ".uc($f)." ...\n");
         if (uc($f2) eq uc($f)) {
            $ret = 0;
            last;
         }
      }
   }
   return $ret;
}

sub is_index {
   my ($f) = @_;
   foreach my $f2 (@indexs) {
      if (uc($f2) eq uc($f)) {
         return 1;
      }
   }
   return 0;
}

sub eat_sp {
   my ($l) = @_;
   while (substr($l,0,1) eq ' ') {
      $l = substr($l,1);
   }
   return $l;
}

sub prt {
   my ($m) = @_;
   print $m;
   print $OH $m;
}

sub log_close {
   if ($write_log) {
      close( $OH );
   }
}

sub close_log {
   if ($write_log) {
      prt( "Closing LOG file, and passing to 'system($outfile)'\nMay need to CLOSE notepad to continue ...\n" );
      log_close();
      system( $out_file );
   }
}


sub parse_arguments {
   my (@av) = @_;
   my $ac = 0;
   my $arg;
   if (! @av) {
      push(@av, $def_folder);
      ###die "ERROR: Must give input folder ... aborting ...\n";
      prt( "WARNING: Should give an input folder ...\n" );
      prt( "Using default [$def_folder] ...\n" );
   }
   while(@av) {
      $ac++; # bump argument count
      $arg = $av[0];
      if ($ac == 1) {
         $in_dir = $arg;
         if (! -d $in_dir) {
            die "ERROR: Can not locate folder $in_dir ... aborting ...\n";
         }
      } elsif ($ac == 2) {
         if (! -f $arg) {
            die "ERROR: Can not locate exclude file $arg ... aborting ...\n";
         }
         open $IF, "<$arg" or die "Can not OPEN $arg! ... aborting ...\n";
         @in_excl = <$IF>; # slurp whole file, to an array of lines
         close($IF);
         foreach $arg (@in_excl) {
            chomp $arg;
            prt ("Excluding [$arg] ...\n");
         }
      } else {
         die "ERROR: Too many arguments given ... aborting ...\n";
      }
      shift @av; # use up argument
   }
}

# eof - genindex2.pl
index -|- top