Generated: Mon Aug 16 14:14:21 2010 from genindex3.pl 2010/08/04 10.1 KB.
#!/Perl # AIM: genindex3.pl # to read a FOLDER, find ALL the HTML files there, and build a LIST # see genindex2.pl if you want - # to read a FOLDER, find the INDEX HTML file there, and build a LIST of LINK from it # # From that list of ALL HTM files - output name and title # FIX20060626 - added option to build list into an Array, giving lnk,file date,title ... # of form # var ma = new Array( # new item( "index.htm", "Index", "Link to main index" ), # new item( "fgfs-026.htm", "2006-06-06", "FlightGear 0.9.10 with MSVC8"), # to get the file data, using # ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, # $atime,$mtime,$ctime,$blksize,$blocks) # = stat($filename); # or # my $sa = stat($filename); # my $tm = scalar locatime $sa->$mtime; use File::stat; print "$0 ... Hello, World...\n"; # this should come from the command line, or an INPUT FILE my @indexs = ("index.htm", "index.html", "index.php"); my @in_excl = (); my $def_folder = 'C:\HOMEPAGE\GA\fg\srczips'; #my $def_folder = 'C:\HOMEPAGE\GA\java'; ##my $def_folder = 'C:\HOMEPAGE\Max5\test'; ##my $def_folder = 'C:\HOMEPAGE\P26\travel'; ##my $def_folder = 'c:\HOMEPAGE\P26\fg'; ##my $def_folder = 'c:\HOMEPAGE\P26\html'; ##my $def_folder = 'c:\HOMEPAGE\public_html\unix'; my $in_dir; # = shift || die "ERROR: Must give input folder ...\n"; my $out_file = 'temp.'.$0.'.txt'; my $htmfile = 'tempindex.htm'; my $OH; my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my @newarr = (); # contents of files found, as mdata|$file|$dtt|$title open $OH, ">$out_file" || die "ERROR: Can NOT create output file ... aborting ...\n"; my $write_log = 1; my $html_bgn = <<EOF; <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <meta http-equiv="Content-Language" content="en-gb"> <meta http-equiv="Content-Type" content="text/html; charset=windows-1252"> <meta name="generator" content="genindex3.pl"> <title>New Index</title> <style type="text/css"> <!-- /* some style 2006.10.20 */ body { margin:0cm 1cm; background-image:url('../clds3.jpg'); } h1{ background:#efefef; border-style:solid; border-color:#d9e2e2; border-width:1px; padding-top:2px; padding-bottom:2px; padding-left:2px; padding-right:2px; font-size:200%; text-align:center; } h2 { font-size: 12pt; font-weight: bold; background-color: #CCCCFF } .ctr { text-align:center; } --> </style> </head> <body> <h1>New Index</h1> <p class="ctr"><a href="../home2.htm">home</a></p> <div align="center"> <center> <table border="2" cellpadding="2" bordercolor="#0000FF" id="Num1" summary="Index of links"> <tr> <th>Link</th> <th>Title</th> <th>Date</th> </tr> EOF my $html_end = <<EOF; </table> </center> </div> <p class="ctr"><a href="../home2.htm">home</a></p> </body> </html> EOF parse_arguments(@ARGV); prt ("Processing directory $in_dir ...\n"); opendir( DIR, $in_dir) || die "ERROR: Can NOT open $in_dir ... aborting ...\n"; my @files = readdir(DIR); closedir DIR; prt ("Found ".scalar @files." items in the directory ...\n"); my $file; my $filcnt = 0; my @titles = (); my @links = (); my @htmlist = (); # first pass - find the file that will give us the ORDER my $got_ind = 0; my $ind_file = ''; my $line = ''; my @loc_files = (); foreach $file (@files) { if (($file eq '.')||($file eq '..')) { next; } my $ff = $in_dir . '/' . $file; if ( -d $ff ) { #prt( "Ignore Directory $file ...\n"); } else { if (is_my_file($file)) { push(@htmlist, $file); } } } $got_ind = scalar @htmlist; if (! $got_ind) { prt( "ERROR: Unable to locate any HTML files .../n" ); die "aborting .../n"; } else { prt( "Found $got_ind HTML files in folder $in_dir ... processing ...\n" ); } # building up a TITLE array foreach $file (@htmlist) { my $ff = $in_dir.'/'.$file; if (open $IF, "<$ff") { close($IF); my $sb = stat($ff); my $tit = get_title($ff); if (length($tit) == 0) { $tit = $file; } push(@titles, "$tit|$file"); ### prt( "\$mypages[$filcnt] = \"$file|$tit\";\n"); ### FIX20060626 - add other type of output ... my $tm = scalar localtime $sb->mtime; ###prt( "File $file, title = $tit, date = $tm \n" ); my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005' my $ac = scalar @arr; my $doff = 2; my $yoff = 4; if ($ac == 6) { $doff++; $yoff++; } if (($ac == 5)||($ac == 6)) { my $mn = mth_to_num( $arr[1] ); my $mnn = 0; if ($mn) { if( $mn < 10 ) { $mnn = '0'.$mn; } else { $mnn = ''.$mn; } } else { $mnn = '??'; } my $dn = $arr[$doff]; if ($dn < 10) { $dn = '0'.$dn; } my $dtt = $arr[$yoff].'/'.$mnn.'/'.$dn; ###my $ent = "$sb->mtime|$file|$dtt|$tit"; my $ent = $sb->mtime."|$file|$dtt|$tit"; prt( "Got '$arr[4]/$arr[1]/$arr[2]' ... $dtt ... $ent ...\n" ); push(@newarr, $ent); } else { prt( "\n*********************************\n"); prt( "WARNING: Split NOT 5 or 6!!!\n" ); prt( "**********************************\n"); die "SPLIT OF TIME FAILED!!!\n"; } ###prt("\n"); $filcnt++; } else { prt( "WARNING: Could not open [$ff]!\n" ); } } $got_ind = scalar @newarr; if (! $got_ind) { prt( "ERROR: Failed to generate HTML list .../n" ); die "aborting .../n"; } else { prt( "Got array of $got_ind entries ... processing ...\n" ); } my $fc = 0; ### now I want to OUTPUT in the REVERSE DATE ORDER ################################################## prt(" var ma = new Array(\n"); # new item( "index.htm", "Index", "Link to main index" ), # new item( "fgfs-026.htm", "2006-06-06", "FlightGear 0.9.10 with MSVC8"), foreach $file (reverse sort @newarr) { my @arr = split( /\|/, $file ); if (scalar @arr == 4) { if ($fc) { prt( ",\n" ); } prt(" new item( \"$arr[1]\", \"$arr[2]\", \"$arr[3]\" )"); } else { prt("WARNING: Split error on $file ???\n"); } $fc++; } prt("\n );\n"); ### now OUTPUT as a HTML table, in reverse date order ##################################################### write_html( $htmfile ); close_log(); exit(0); ### end of program ### sub write_html { my ($fil) = shift; open WOF, ">$fil" or mydie("ERROR: Unable to open $fil! $!\n"); print WOF $html_bgn; foreach $file (reverse sort @newarr) { my @arr = split( /\|/, $file ); if (scalar @arr == 4) { print WOF " <tr>\n"; print WOF " <td><a href=\"$arr[1]\">$arr[1]</a></td>\n"; print WOF " <td>$arr[3]</td>\n"; print WOF " <td>$arr[2]</td>\n"; print WOF " </tr>\n"; } else { prt("WARNING: Split error on $file ???\n"); } } print WOF $html_end; close WOF; system( $fil ); } ## month to number sub mth_to_num { my ($mth) = shift; my $cnt = 0; ###prt( "Chk [$mth] " ); foreach my $m (@mths) { $cnt++; if ($m eq $mth) { ###prt( "Is $m - return $cnt\n" ); return $cnt; } ###else { ### prt( "Not [$m] " ); ###} } prt( "WARNING: Returning 0!!!\n" ); return 0; } sub get_title { my ($f) = @_; open $IF, "<$f" or die "Can not OPEN $f! ... aborting ...\n"; my @lines = <$IF>; # slurp whole file, to an array of lines close($IF); my $titln = ''; my $intit = 0; foreach my $ln (@lines) { chomp $ln; if ($intit) { if ($ln =~ /<\/title>/i) { $titln .= ' '.$ln; $intit = 0; } else { $titln .= ' '.$ln; } } elsif ( $ln =~ /<title/i ) { if ($ln =~ /<\/title>/i) { $titln = $ln; } else { $titln = $ln; $intit = 1; # stay here until end } } else { } } if (length($titln)) { # strip <title $pos = index(uc($titln),'<TITLE>'); if ($pos != -1) { $titln = substr($titln, ($pos + 7)); $pos = index(uc($titln), '</TITLE>'); if ($pos > 0) { $titln = substr($titln,0, $pos); } } } while(length($titln)) { if (substr($titln,0,1) eq ' ') { $titln = substr($titln,1); } else { last; } } while (substr($titln,-1) eq ' ') { $titln = substr($titln,0, length($titln)-1); } return $titln; } sub is_my_file { my ($f) = @_; my $ret = 0; if ($f =~ /(.*)\.htm$/i) { $ret = 1; } elsif ($f =~ /(.*)\.html$/i) { $ret = 1; } elsif ($f =~ /(.*)\.shtml$/i) { $ret = 1; } elsif ($f =~ /(.*)\.php$/i) { $ret = 1; } if ($ret) { foreach my $f2 (@in_excl) { ### prt("Comparing ".uc($f2)." with ".uc($f)." ...\n"); if (uc($f2) eq uc($f)) { $ret = 0; last; } } } return $ret; } sub is_index { my ($f) = @_; foreach my $f2 (@indexs) { if (uc($f2) eq uc($f)) { return 1; } } return 0; } sub eat_sp { my ($l) = @_; while (substr($l,0,1) eq ' ') { $l = substr($l,1); } return $l; } sub prt { my ($m) = @_; print $m; print $OH $m; } sub log_close { if ($write_log) { close( $OH ); } } sub close_log { if ($write_log) { prt( "Closing LOG file, and passing to 'system($outfile)'\nMay need to CLOSE notepad to continue ...\n" ); log_close(); system( $out_file ); } } sub parse_arguments { my (@av) = @_; my $ac = 0; my $arg; if (! @av) { push(@av, $def_folder); ###die "ERROR: Must give input folder ... aborting ...\n"; prt( "WARNING: Should give an input folder ...\n" ); prt( "Using default [$def_folder] ...\n" ); } while(@av) { $ac++; # bump argument count $arg = $av[0]; if ($ac == 1) { $in_dir = $arg; if (! -d $in_dir) { die "ERROR: Can not locate folder $in_dir ... aborting ...\n"; } } elsif ($ac == 2) { if (! -f $arg) { die "ERROR: Can not locate exclude file $arg ... aborting ...\n"; } open $IF, "<$arg" or die "Can not OPEN $arg! ... aborting ...\n"; @in_excl = <$IF>; # slurp whole file, to an array of lines close($IF); foreach $arg (@in_excl) { chomp $arg; prt ("Excluding [$arg] ...\n"); } } else { die "ERROR: Too many arguments given ... aborting ...\n"; } shift @av; # use up argument } } # eof - genindex2.pl