genindex2.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:39 2010 from genindex2.pl 2006/06/26 8.1 KB.

#!/Perl
# AIM: genindex2.pl
# to read a FOLDER, find the INDEX HTML file there, and build a LIST of LINK from it
# That list give the HTM file name and title
# FIX20060626 - added option to build list into an Array, giving lnk,file date,title ...
# of form 
#    var ma = new Array(
#      new item( "index.htm", "Index", "Link to main index" ),
#      new item( "fgfs-026.htm", "2006-06-06", "FlightGear 0.9.10 with MSVC8"),
# to get the file data,  using
# ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
#       $atime,$mtime,$ctime,$blksize,$blocks)
#           = stat($filename);
# or
# my $sa = stat($filename);
# my $tm = scalar locatime $sa->$mtime;
# UGH - But this is ALL extracted from index.htm ... not exactly what I want now ...
use File::stat;
print "$0 ... Hello, World...\n";
# this should come from the command line, or an INPUT FILE
my @indexs = ("index.htm", "index.html", "index.php");
my @in_excl = ();
my $def_folder = 'c:\HOMEPAGE\P26\fg';
my $in_dir; # = shift || die "ERROR: Must give input folder ...\n";
my $out_file = 'temp'.$0.'.txt';
my $OH;
my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my @newarr = (); # contents of files found, as mdata|$file|$dtt|$title
open $OH, ">$out_file" || die "ERROR: Can NOT create output file ... aborting ...\n";
my $write_log = 1;
parse_arguments(@ARGV);
prt ("Processing directory $in_dir ...\n");
opendir( DIR, $in_dir) || die "ERROR: Can NOT open $in_dir ... aborting ...\n";
my @files = readdir(DIR);
closedir DIR;
prt ("Found ".scalar @files." items in the directory ...\n");
my $file;
my $filcnt = 0;
my @titles = ();
my @links = ();
# first pass - find the file that will give us the ORDER
my $got_ind = 0;
my $ind_file = '';
my $line = '';
my @loc_files = ();
foreach $file (@files) {
   if (($file eq '.')||($file eq '..')) {
      next;
   }
   my $ff = $in_dir . '/' . $file;
   if ( -d $ff ) {
      #prt( "Ignore Directory $file ...\n");
   } else {
      if (is_my_file($file)) {
         if (is_index($file)) {
            $got_ind = 1;
            $ind_file = $ff;
            last;
         }
      }
   }
}
if (! $got_ind) {
   prt( "ERROR: Unable to locate INDEX file .../n" );
   die "aborting .../n";
}
open $IF, "<$ind_file" or die "Can not OPEN $ind_file! ... aborting ...\n";
my @ind_lines = <$IF>; # slurp whole file, to an array of lines
close($IF);
prt( "Found ".scalar @ind_lines." lines in $ind_file ...\n" );
my $href = " HREF";
foreach $line (@ind_lines) {
   chomp $line;
   while ( $line =~ / href/io ) {
      my $pos = index(uc($line), $href);
      if ($pos > 0) {
         my $lp = substr($line,($pos+5));
         $lp = eat_sp($lp);
         if (substr($lp,0,1) eq '=') {
            $lp = substr($lp,1);
         }
         $lp = eat_sp($lp);
         my $c = substr($lp,0,1);
         if (($c eq '"')||($c eq "'")) {
            $lp = substr($lp,1);
         } else {
            # hmm no delimiter - ok
            $c = ' ';
         }
         $pos = index($lp,$c);
         my $hr = substr($lp,0,$pos);
         if ($hr =~ /^http/i) {
            prt( "Discarded [$hr] in $line ... \n" );
         } else {
            ###prt( "Found [$hr] in $line ... \n" );
            push(@loc_files,$hr);
         }
         $line = substr($lp,($pos+1));
      }
   }
}
prt( "Found ".scalar @loc_files." local files in $ind_file ...\n" );
# got through the files, and get the TITLE for each
# building up a TITLE array
foreach $file (sort @files) {
   if (($file eq '.')||($file eq '..')) {
      next;
   }
   my $ff = $in_dir . '/' . $file;
   if ( -d $ff ) {
      #prt( "Ignore Directory $file ...\n");
   } else {
      if (is_my_file($file)) {
         my $sb = stat($ff);
         my $tit = get_title($ff);
         if (length($tit) == 0) {
            $tit = $file;
         }
         push(@titles, "$tit|$file");
         ### prt( "\$mypages[$filcnt] = \"$file|$tit\";\n");
         ### FIX20060626 - add other type of output ...
         my $tm = scalar localtime $sb->mtime;
         ###prt( "File $file, title = $tit, date = $tm \n" );
         my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005'
         if (scalar @arr == 5) {
            my $mn = mth_to_num( $arr[1] );
            my $mnn = 0;
            if ($mn) {
               if( $mn < 10 ) {
                  $mnn = '0'.$mn;
               } else {
                  $mnn = ''.$mn;
               }
            } else {
               $mnn = '??';
            }
            my $dn = $arr[2];
            if ($dn < 10) {
               $dn = '0'.$dn;
            }
            my $dtt = $arr[4].'/'.$mnn.'/'.$dn;
            ###my $ent = "$sb->mtime|$file|$dtt|$tit";
            my $ent = $sb->mtime."|$file|$dtt|$tit";
            ###prt( "Got '$arr[4]/$arr[1]/$arr[2]' ... $dtt ... $ent ...\n" );
            push(@newarr, $ent);
         }
         ###prt("\n");
         $filcnt++;
      } else {
         #prt( "IGNORE $file ...\n" );
      }
   }
}
prt( "Done list in FILE order ... now title order ...\n" );
my $fc = 0;
### now I want to OUTPUT in the LINKS FOUND ORDER
#################################################
# the 'index' file should come FIRST
foreach $file (sort @titles) {
   prt( "\$mypages[$fc] = \"$file\";\n" );
   $fc++;
}
##close $OH;
close_log();
exit(0);
### end of program ###
## month to number
sub mth_to_num {
   my ($mth) = shift;
   my $cnt = 0;
   ###prt( "Chk [$mth] " );
   foreach my $m (@mths) {
      $cnt++;
      if ($m eq $mth) {
         ###prt( "Is $m - return $cnt\n" );
         return $cnt;
      }
      ###else {
      ###   prt( "Not [$m] " );
      ###}
   }
   prt( "WARNING: Returning 0!!!\n" );
   return 0;
}
sub get_title {
   my ($f) = @_;
   open $IF, "<$f" or die "Can not OPEN $f! ... aborting ...\n";
   my @lines = <$IF>; # slurp whole file, to an array of lines
   close($IF);
   my $titln = '';
   my $intit = 0;
   foreach my $ln (@lines) {
      chomp $ln;
      if ($intit) {
         if ($ln =~ /<\/title>/i) {
            $titln .= ' '.$ln;
            $intit = 0;
         } else {
            $titln .= ' '.$ln;
         }
      } elsif ( $ln =~ /<title/i ) {
         if ($ln =~ /<\/title>/i) {
            $titln = $ln;
         } else {
            $titln = $ln;
            $intit = 1;  # stay here until end
         }
      } else {
      }
   }
   if (length($titln)) {
      # strip <title
      $pos = index(uc($titln),'<TITLE>');
      if ($pos != -1) {
         $titln = substr($titln, ($pos + 7));
         $pos = index(uc($titln), '</TITLE>');
         if ($pos > 0) {
            $titln = substr($titln,0, $pos);
         }
      }
   }
   while(length($titln)) {
      if (substr($titln,0,1) eq ' ') {
         $titln = substr($titln,1);
      } else {
         last;
      }
   }
   while (substr($titln,-1) eq ' ') {
      $titln = substr($titln,0, length($titln)-1);
   }
   return $titln;
}
sub is_my_file {
   my ($f) = @_;
   my $ret = 0;
   if ($f =~ /(.*)\.htm$/i) {
      $ret = 1;
   } elsif ($f =~ /(.*)\.html$/i) {
      $ret = 1;
   } elsif ($f =~ /(.*)\.shtml$/i) {
      $ret = 1;
   } elsif ($f =~ /(.*)\.php$/i) {
      $ret = 1;
   }
   if ($ret) {
      foreach my $f2 (@in_excl) {
         ### prt("Comparing ".uc($f2)." with ".uc($f)." ...\n");
         if (uc($f2) eq uc($f)) {
            $ret = 0;
            last;
         }
      }
   }
   return $ret;
}
sub is_index {
   my ($f) = @_;
   foreach my $f2 (@indexs) {
      if (uc($f2) eq uc($f)) {
         return 1;
      }
   }
   return 0;
}
sub eat_sp {
   my ($l) = @_;
   while (substr($l,0,1) eq ' ') {
      $l = substr($l,1);
   }
   return $l;
}
sub prt {
   my ($m) = @_;
   print $m;
   print $OH $m;
}
sub log_close {
   if ($write_log) {
      close( $OH );
   }
}
sub close_log {
   if ($write_log) {
      prt( "Closing LOG file, and passing to 'system($outfile)'\nMay need to CLOSE notepad to continue ...\n" );
      log_close();
      system( $out_file );
   }
}
sub parse_arguments {
   my (@av) = @_;
   my $ac = 0;
   my $arg;
   if (! @av) {
      push(@av, $def_folder);
      ###die "ERROR: Must give input folder ... aborting ...\n";
      prt( "WARNING: Should give an input folder ...\n" );
      prt( "Using default [$def_folder] ...\n" );
   }
   while(@av) {
      $ac++; # bump argument count
      $arg = $av[0];
      if ($ac == 1) {
         $in_dir = $arg;
         if (! -d $in_dir) {
            die "ERROR: Can not locate folder $in_dir ... aborting ...\n";
         }
      } elsif ($ac == 2) {
         if (! -f $arg) {
            die "ERROR: Can not locate exclude file $arg ... aborting ...\n";
         }
         open $IF, "<$arg" or die "Can not OPEN $arg! ... aborting ...\n";
         @in_excl = <$IF>; # slurp whole file, to an array of lines
         close($IF);
         foreach $arg (@in_excl) {
            chomp $arg;
            prt ("Excluding [$arg] ...\n");
         }
      } else {
         die "ERROR: Too many arguments given ... aborting ...\n";
      }
      shift @av; # use up argument
   }
}
# eof - genindex2.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional