Generated: Tue Feb 2 17:54:39 2010 from genindex2.pl 2006/06/26 8.1 KB.
#!/Perl # AIM: genindex2.pl # to read a FOLDER, find the INDEX HTML file there, and build a LIST of LINK from it # That list give the HTM file name and title # FIX20060626 - added option to build list into an Array, giving lnk,file date,title ... # of form # var ma = new Array( # new item( "index.htm", "Index", "Link to main index" ), # new item( "fgfs-026.htm", "2006-06-06", "FlightGear 0.9.10 with MSVC8"), # to get the file data, using # ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, # $atime,$mtime,$ctime,$blksize,$blocks) # = stat($filename); # or # my $sa = stat($filename); # my $tm = scalar locatime $sa->$mtime; # UGH - But this is ALL extracted from index.htm ... not exactly what I want now ... use File::stat; print "$0 ... Hello, World...\n"; # this should come from the command line, or an INPUT FILE my @indexs = ("index.htm", "index.html", "index.php"); my @in_excl = (); my $def_folder = 'c:\HOMEPAGE\P26\fg'; my $in_dir; # = shift || die "ERROR: Must give input folder ...\n"; my $out_file = 'temp'.$0.'.txt'; my $OH; my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my @newarr = (); # contents of files found, as mdata|$file|$dtt|$title open $OH, ">$out_file" || die "ERROR: Can NOT create output file ... aborting ...\n"; my $write_log = 1; parse_arguments(@ARGV); prt ("Processing directory $in_dir ...\n"); opendir( DIR, $in_dir) || die "ERROR: Can NOT open $in_dir ... aborting ...\n"; my @files = readdir(DIR); closedir DIR; prt ("Found ".scalar @files." items in the directory ...\n"); my $file; my $filcnt = 0; my @titles = (); my @links = (); # first pass - find the file that will give us the ORDER my $got_ind = 0; my $ind_file = ''; my $line = ''; my @loc_files = (); foreach $file (@files) { if (($file eq '.')||($file eq '..')) { next; } my $ff = $in_dir . '/' . $file; if ( -d $ff ) { #prt( "Ignore Directory $file ...\n"); } else { if (is_my_file($file)) { if (is_index($file)) { $got_ind = 1; $ind_file = $ff; last; } } } } if (! $got_ind) { prt( "ERROR: Unable to locate INDEX file .../n" ); die "aborting .../n"; } open $IF, "<$ind_file" or die "Can not OPEN $ind_file! ... aborting ...\n"; my @ind_lines = <$IF>; # slurp whole file, to an array of lines close($IF); prt( "Found ".scalar @ind_lines." lines in $ind_file ...\n" ); my $href = " HREF"; foreach $line (@ind_lines) { chomp $line; while ( $line =~ / href/io ) { my $pos = index(uc($line), $href); if ($pos > 0) { my $lp = substr($line,($pos+5)); $lp = eat_sp($lp); if (substr($lp,0,1) eq '=') { $lp = substr($lp,1); } $lp = eat_sp($lp); my $c = substr($lp,0,1); if (($c eq '"')||($c eq "'")) { $lp = substr($lp,1); } else { # hmm no delimiter - ok $c = ' '; } $pos = index($lp,$c); my $hr = substr($lp,0,$pos); if ($hr =~ /^http/i) { prt( "Discarded [$hr] in $line ... \n" ); } else { ###prt( "Found [$hr] in $line ... \n" ); push(@loc_files,$hr); } $line = substr($lp,($pos+1)); } } } prt( "Found ".scalar @loc_files." local files in $ind_file ...\n" ); # got through the files, and get the TITLE for each # building up a TITLE array foreach $file (sort @files) { if (($file eq '.')||($file eq '..')) { next; } my $ff = $in_dir . '/' . $file; if ( -d $ff ) { #prt( "Ignore Directory $file ...\n"); } else { if (is_my_file($file)) { my $sb = stat($ff); my $tit = get_title($ff); if (length($tit) == 0) { $tit = $file; } push(@titles, "$tit|$file"); ### prt( "\$mypages[$filcnt] = \"$file|$tit\";\n"); ### FIX20060626 - add other type of output ... my $tm = scalar localtime $sb->mtime; ###prt( "File $file, title = $tit, date = $tm \n" ); my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005' if (scalar @arr == 5) { my $mn = mth_to_num( $arr[1] ); my $mnn = 0; if ($mn) { if( $mn < 10 ) { $mnn = '0'.$mn; } else { $mnn = ''.$mn; } } else { $mnn = '??'; } my $dn = $arr[2]; if ($dn < 10) { $dn = '0'.$dn; } my $dtt = $arr[4].'/'.$mnn.'/'.$dn; ###my $ent = "$sb->mtime|$file|$dtt|$tit"; my $ent = $sb->mtime."|$file|$dtt|$tit"; ###prt( "Got '$arr[4]/$arr[1]/$arr[2]' ... $dtt ... $ent ...\n" ); push(@newarr, $ent); } ###prt("\n"); $filcnt++; } else { #prt( "IGNORE $file ...\n" ); } } } prt( "Done list in FILE order ... now title order ...\n" ); my $fc = 0; ### now I want to OUTPUT in the LINKS FOUND ORDER ################################################# # the 'index' file should come FIRST foreach $file (sort @titles) { prt( "\$mypages[$fc] = \"$file\";\n" ); $fc++; } ##close $OH; close_log(); exit(0); ### end of program ### ## month to number sub mth_to_num { my ($mth) = shift; my $cnt = 0; ###prt( "Chk [$mth] " ); foreach my $m (@mths) { $cnt++; if ($m eq $mth) { ###prt( "Is $m - return $cnt\n" ); return $cnt; } ###else { ### prt( "Not [$m] " ); ###} } prt( "WARNING: Returning 0!!!\n" ); return 0; } sub get_title { my ($f) = @_; open $IF, "<$f" or die "Can not OPEN $f! ... aborting ...\n"; my @lines = <$IF>; # slurp whole file, to an array of lines close($IF); my $titln = ''; my $intit = 0; foreach my $ln (@lines) { chomp $ln; if ($intit) { if ($ln =~ /<\/title>/i) { $titln .= ' '.$ln; $intit = 0; } else { $titln .= ' '.$ln; } } elsif ( $ln =~ /<title/i ) { if ($ln =~ /<\/title>/i) { $titln = $ln; } else { $titln = $ln; $intit = 1; # stay here until end } } else { } } if (length($titln)) { # strip <title $pos = index(uc($titln),'<TITLE>'); if ($pos != -1) { $titln = substr($titln, ($pos + 7)); $pos = index(uc($titln), '</TITLE>'); if ($pos > 0) { $titln = substr($titln,0, $pos); } } } while(length($titln)) { if (substr($titln,0,1) eq ' ') { $titln = substr($titln,1); } else { last; } } while (substr($titln,-1) eq ' ') { $titln = substr($titln,0, length($titln)-1); } return $titln; } sub is_my_file { my ($f) = @_; my $ret = 0; if ($f =~ /(.*)\.htm$/i) { $ret = 1; } elsif ($f =~ /(.*)\.html$/i) { $ret = 1; } elsif ($f =~ /(.*)\.shtml$/i) { $ret = 1; } elsif ($f =~ /(.*)\.php$/i) { $ret = 1; } if ($ret) { foreach my $f2 (@in_excl) { ### prt("Comparing ".uc($f2)." with ".uc($f)." ...\n"); if (uc($f2) eq uc($f)) { $ret = 0; last; } } } return $ret; } sub is_index { my ($f) = @_; foreach my $f2 (@indexs) { if (uc($f2) eq uc($f)) { return 1; } } return 0; } sub eat_sp { my ($l) = @_; while (substr($l,0,1) eq ' ') { $l = substr($l,1); } return $l; } sub prt { my ($m) = @_; print $m; print $OH $m; } sub log_close { if ($write_log) { close( $OH ); } } sub close_log { if ($write_log) { prt( "Closing LOG file, and passing to 'system($outfile)'\nMay need to CLOSE notepad to continue ...\n" ); log_close(); system( $out_file ); } } sub parse_arguments { my (@av) = @_; my $ac = 0; my $arg; if (! @av) { push(@av, $def_folder); ###die "ERROR: Must give input folder ... aborting ...\n"; prt( "WARNING: Should give an input folder ...\n" ); prt( "Using default [$def_folder] ...\n" ); } while(@av) { $ac++; # bump argument count $arg = $av[0]; if ($ac == 1) { $in_dir = $arg; if (! -d $in_dir) { die "ERROR: Can not locate folder $in_dir ... aborting ...\n"; } } elsif ($ac == 2) { if (! -f $arg) { die "ERROR: Can not locate exclude file $arg ... aborting ...\n"; } open $IF, "<$arg" or die "Can not OPEN $arg! ... aborting ...\n"; @in_excl = <$IF>; # slurp whole file, to an array of lines close($IF); foreach $arg (@in_excl) { chomp $arg; prt ("Excluding [$arg] ...\n"); } } else { die "ERROR: Too many arguments given ... aborting ...\n"; } shift @av; # use up argument } } # eof - genindex2.pl