htmimglist.pl to HTML.

index -|- end

Generated: Tue Jun 8 17:26:59 2010 from htmimglist.pl 2010/03/19 2.2 KB.

#!/perl -w
# NAME: htmimglist.pl
# AIM: given a HTML file, list all the 'image' links in the file.
# 19/03/2010 - also report existance of file...
# 06/05/2009 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
use File::Basename; # split path into ($nm,$dr)=fileparse($ff); or ($nm,$dir,$ext)=fileparse($fil,qr/\.[^.]*/);
unshift(@INC, 'C:/GTools/perl');
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
require 'htmltools.pl' or die "Unable to load htmltools.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $perl_base = 'C:/GTools/perl';
my $outfile = $perl_base."\\temp.$pgmname.txt";
open_log($outfile);

my $in_file = 'C:/HOMEPAGE/GA/fg/gshhs-02.htm';
my $load_log = 0;

# debug
my $dbg3 = 1;  # show processing...

#############################

sub parse_file($) {
   my ($fil) = shift;
   my ($lncnt, $text, $hrcnt, $i, $hfcnt, $typ, $filcnt);
   my ($min,$len,$ff,$msg);
   my @hrf = ();
   my ($nm,$dr) = fileparse($fil);  # get the directory
   if (open INF, "<$fil") {
      my @lines = <INF>;
      close INF;
      $lncnt = scalar @lines;
      prt( "Processing $lncnt lines from $fil ...\n" ) if ($dbg3);
      $text = join('',@lines);
      # sub write2file {    my ($txt,$fil) = @_;
      #my $scrp = return_tag($full,'script');
      my @imgs = ret_imgs_array($text);
      $min = 0;
      foreach my $img (@imgs) {
         $len = length($img);
         $min = $len if ($len > $min);
      }
      $dr .= '/' if !($dr =~ /(\\|\/)$/);
      foreach my $img (@imgs) {
         $ff = $dr.$img;
         $msg = (-f $ff) ? "ok" : "NOT FOUND";
         $img .= ' ' while (length($img) < $min);
         prt( "$img $msg\n" );
      }
   } else {
      prt( "ERROR: failed to open $fil...\n" );
   }
}

parse_args(@ARGV);
prt( "$pgmname: Getting images from [$in_file]...\n" );
parse_file($in_file);

close_log($outfile,$load_log);
exit(0);

###################################
sub parse_args {
   my (@av) = @_;
   while (@av) {
      my $arg = $av[0];
      $in_file = $arg;
      shift @av;
      last;
   }
}

# eof - htmimglist.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional