Generated: Tue Feb 2 17:54:31 2010 from extractwords.pl 2008/01/12 3.4 KB.
#!/perl -w # NAME: extractwords.pl # AIM: read a file, and extract the word within ... # 12/01/2008 - geoff mclane - http://geoffair.net/mperl use strict; use warnings; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); prt( "$0 ... Hello, World ...\n" ); my %wordlist = (); my $in_file = 'C:\Program Files\Microsoft Platform SDK for Windows Server 2003 R2\include\windows.h'; my $maxwrap = 8; my $ignorecomm = 1; my $ignerror = 1; process_file($in_file); show_words(); close_log($outfile,1); exit(0); sub is_hex_numb { my ($txt) = shift; if ($txt =~ /^0X/i) { $txt = substr($txt,2); } my $tl = length($txt); my ($t, $c); for ($t = 0; $t < $tl; $t++) { $c = substr($txt,$t,1); if ( !(($c =~ /\d/)||($c =~ /[A-F]/i)) ) { return 0; } } return 1; } # isallnums sub is_all_nums { my ($txt) = shift; my $tl = length($txt); my ($t, $c); for ($t = 0; $t < $tl; $t++) { $c = substr($txt,$t,1); if ( !($c =~ /\d/) ) { return 0; } } return 1; } sub add_word { my ($wd) = shift; if ((length($wd) > 1) && !is_all_nums($wd) && !is_hex_numb($wd) ) { if (defined $wordlist{$wd}) { $wordlist{$wd}++; } else { $wordlist{$wd} = 1; } } } sub process_directive { my ($ln, $dr) = @_; my ($ind, $dl, $ll, $tag, $ch, $i); if ($ignerror) { #if ($ln =~ /^\s*#\s*error\s+/) { if ($dr eq 'error') { add_word('error'); return; } } $dl = length($dr); $ind = index($ln, $dr); $tag = ''; if ($ind > 0) { $ln = substr($ln,$ind+$dl); } $ll = length($ln); for ($i = 0; $i < $ll; $i++) { $ch = substr($ln,$i,1); if ($ch =~ /\w/) { $tag .= $ch; } else { add_word($tag) if length($tag); $tag = ''; } } } sub process_file { my ($fil) = shift; my (@lines, $lc, $line, $i, $ll, $ch, $pch, $word, $incomm); if (open INF, "<$fil") { @lines = <INF>; close INF; $lc = scalar @lines; prt( "Processing $lc lines from $fil ...\n" ); $word = ''; $incomm = 0; foreach $line (@lines) { if ( !$incomm && ($line =~ /^\s*#\s*(\w+)\s+/)) { process_directive($line, $1); next; } $ll = length($line); for ($i = 0; $i < $ll; $i++) { $ch = substr($line,$i,1); if ($ignorecomm && $incomm) { if (($pch eq '*') && ($ch eq '/')) { $incomm = 0; } $pch = $ch; next; } if ($ch =~ /\w/) { $word .= $ch; } else { add_word($word) if length($word); $word = ''; if ($ignorecomm) { if (($ch eq '*')&& ($pch eq '/')) { $incomm = 1; } elsif (($ch eq '/')&& ($pch eq '/')) { $ch = ' '; $i = $ll; } } } $pch = $ch; } } add_word($word) if length($word); } else { prt( "WARNING: Failed to OPEN file [$fil] ...\n" ); } } sub show_words { my ($wd, $cnt, $tot, $wrap, $wcnt); $tot = 0; $wrap = 0; $wcnt = scalar keys(%wordlist); prt( "Output of $wcnt words found ...\n" ); foreach $wd (keys %wordlist) { $cnt = $wordlist{$wd}; $tot += $cnt; prt( "$wd " ); $wrap++; if ($wrap > $maxwrap) { prt("\n"); $wrap = 0; } } prt("\n") if ($wrap); prt( "Done $wcnt, $tot total words ...\n" ); } # eof - extractwords.pl