Generated: Tue Feb 2 17:54:29 2010 from countword.pl 2007/11/03 6.2 KB.
#!/perl -w # NAME: countword.pl # AIM: Read ALL source files, counting the total lines, and count how many times # a particular 'word' occurs in that source. Avoids certain BINARY files. # In this case the word is 'const' ... # 31/10/2007 geoff mclane - http://geoffair.net/mperl use strict; use warnings; use File::Basename; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); prt( "$0 ... Hello, World ...\n" ); my $base_folder = "C:\\FGCVS\\"; my @in_folders = qw( asterisk CMake expat fgcomtar FileZilla FlightGear/source fltk fltk-1.1 freeglut glut-3.7.6 iaxclient libidn libpcap libpri mozilla netpbm NSIS openal OpenSceneGraph OpenThreads openvpngui openvpntrun OTcvs plib portaudio Producer pthreads regex SimGear swig tcpdump TerraGear tidy tidywrap unxutils winmerge xiph xmlrpc++ xmlrpc-c xmprpc-java zaptel zlib-1.2.3 ); my $in_folder = ''; my $find = 'const'; ###my $out_file = "templines.txt"; my $out_file = "templine2.txt"; my @avoid = qw( .obj .lib .exe .dll .pdb .pch .ilk .ncb .plg .opt .idb .suo .manifest .exp .res .dep .htm .user .html .ac .in .m4 ); my @avoid2 = qw( .asm .xul .swg .tcl .w32 .tmpl .3 .xml .m .config .pm .lua .dsp .pike .README .min .info .php4 .programming .cs .h-msvc8 .php .mcp .rb .i .sgml .log .nb .idl .wat .conf .tbl .h-vms .src .dpr .charset .dll .0 .txt .make .win32 .tag .ds .cmake .js .trimedia .cmd .mm .ref .xhtml .h-msvc6 .tex .s .sh .cgi .o2 .wwtpl .py .h-msvc71 .tpu .tst .csv .csh=1 .ami .man .orig .xs .mk .xpm .patch .sym .pas .vcb .txt .dos .skl .dox .sub .wn .ps .1 .o9 .java .sda .cp .map .lisp .nonportable .perl .texi .ss .sed .diff .flex .l .current .nasm .w32 .my .y .tab .pl .mod .am .jar .commands .ch .properties .expected-xml .api .fl .vc5 .inc .hin .tar .dat .scm .rtf .doc ); my @included = qw( .hxx .cxx .h .c .hh .hpp .h .inl .cpp .cc .c++ ); my $recursive = 1; my $exclude2 = 1; # debug my $dbg1 = 0; my $dbg2 = 0; my $verb3 = 0; # variables my $word_count = 0; my $file_count = 0; my $line_count = 0; my $tword_count = 0; my $tfile_count = 0; my $tline_count = 0; my @results = (); my @keepline = (); my $pct = 0; my $pct100 = 0; my $inf = ''; foreach $inf (@in_folders) { $in_folder = $base_folder . $inf; prt( "Project: $inf:\n" ); $tword_count += $word_count; $tfile_count += $file_count; $tline_count += $line_count; $word_count = 0; $file_count = 0; $line_count = 0; process_folder( $in_folder ); prt( "Processed $file_count files, $line_count lines, for $word_count finds of $find ...\n" ); $pct = 0; if ($word_count && $line_count) { $pct = (($word_count * 100) / $line_count); $pct100 = int($pct * 100); $pct = $pct100 / 100; prt( "That is $pct% of the lines ...\n" ); } push(@results, [$pct, $inf, $file_count, $line_count, $word_count] ); } prt( "Processed $tfile_count files, $tline_count lines, for $tword_count finds of $find ...\n" ); if ($tword_count && $tline_count) { $pct = (($tword_count * 100) / $tline_count); $pct100 = int($pct * 100); $pct = $pct100 / 100; prt( "That is $pct% of the lines AVERAGE ...\n" ); } my @sresults = sort mycmp_decend @results; my $rescnt = scalar @sresults; prt( "Pct Project Files Lines Finds\n" ); for (my $i = 0; $i < $rescnt; $i++) { #push(@results, [$pct, $inf, $file_count, $line_count, $word_count] ); $pct = $sresults[$i][0]; $inf = $sresults[$i][1]; $file_count = $sresults[$i][2]; $line_count = $sresults[$i][3]; $word_count = $sresults[$i][4]; while (length($pct) < 5) { $pct .= ' '; } while (length($inf) < 15 ) { $inf .= ' '; } while (length($file_count) < 6 ) { $file_count .= ' '; } while (length($line_count) < 7 ) { $line_count .= ' '; } while (length($word_count) < 6 ) { $word_count .= ' '; } prt( "$pct $inf $file_count $line_count $word_count\n" ); } write2file(join('',@keepline), $out_file); close_log($outfile,1); exit(0); sub mycmp_decend { if (${$a}[0] < ${$b}[0]) { prt( "+[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3; return 1; } if (${$a}[0] > ${$b}[0]) { prt( "-[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3; return -1; } prt( "=[".${$a}[0]."] = [".${$b}[0]."]\n" ) if $verb3; return 0; } sub in_avoid { my ($ext) = shift; my $lcext = lc($ext); foreach my $tst (@avoid) { if ($tst eq $lcext) { return 1; } } if ($exclude2) { # more excluded foreach my $tst (@avoid2) { if ($tst eq $lcext) { return 1; } } } return 0; } sub process_file { my ($inf) = shift; my $len = length($find); prt( "Processing $inf file ...\n" ) if ($dbg2); if (open FIL, "<$inf") { my @lines = <FIL>; close FIL; $file_count++; my $pline = ''; my $dnfil = 0; foreach my $line (@lines) { $line_count++; ###if ($line =~ /$find/) { if ($line =~ /\W+$find\W+/) { push(@keepline,"FILE: $inf\n") if (!$dnfil); $dnfil = 1; push(@keepline,$line); my $mline = $line; my $ind = index($mline, $find); while ($ind != -1) { $word_count++; # bump the WORD count $mline = substr($mline, ($ind + $len)); $ind = index($mline, $find); } } $pline = $line; } } else { prt( "ERROR: FAILED TO OPEN [$inf] ... $! ...\n" ); } } sub process_folder { my ($inf) = shift; prt( "Processing $inf folder ...\n" ) if ($dbg1); my ($nm, $dir, $ext); my @flds = (); if ( opendir( DIR, $inf ) ) { my @files = readdir(DIR); closedir DIR; my $hfcnt = 0; foreach my $fl (@files) { if (($fl eq '.') || ($fl eq '..')) { next; } ($nm, $dir, $ext) = fileparse( $fl, qr/\.[^.]*/ ); my $ff = $inf; $ff .= "\\"; $ff .= $fl; if (-d $ff) { if ($recursive) { my $lcfl = lc($fl); if ( ($lcfl eq '.svn') || ($lcfl eq 'cvs') ) { # skip } else { push(@flds, $ff); } } } else { if ( !in_avoid($ext) ) { process_file( $ff ); } } } } foreach $dir (@flds) { process_folder($dir); } } # eof - countwords.pl