Generated: Sat Oct 12 17:23:21 2013 from txt2htm02.pl 2012/10/10 16.1 KB. text copy
#!perl -w # Module: txt2htm02.pl # coded using EditPlus v2.12 (76) # 16/02/2012 - Turn off $debug_on, and other tidying... # 06/08/2011 - Some BIG updates ;=)) # 2010/04/24 - more fixes # 2010/03/21 - some fixes, and improvements # March, 2005 geoff mclane # Sep 2006 - update # Orignal output was the whole file as one paragraph, using <br> to divide lines # Add option to use <pre [class="cd"]>...</pre> formating use strict; use warnings; use Cwd; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use File::Spec; # File::Spec->rel2abs($rel); # we are IN the SLN directory, get ABSOLUTE from RELATIVE my $perl_dir = 'C:\GTools\perl'; unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $perl_dir."\\temp.$pgmname.txt"; open_log($outfile); my $out_file = $perl_dir."\\tempt2h.htm"; # user variables my $load_log = 0; my $in_file = ''; my $add_urls = 0; my $debug_on = 0; #my $def_file = 'C:\Documents and Settings\Geoff McLane\My Documents\unix\Ubuntu-11-nautilus-fails.txt'; my $def_file = 'C:\Projects\OpenLayers\OpenLayers-2.12\examples\temp.txt'; my $M_VERSION = "0.3"; my $start_time = time(); my $verb2 = 0; my @input_files = (); my $file_lines = 0; my @files = (); my @file_list = (); my $WHITE_PATTERN = "^[ \t]*\$"; my $tab_stg = ' '; my $check_out = 0; my $use_pre = 0; # default to OFF my $doc_type = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n". '"http://www.w3.org/TR/html4/loose.dtd">'."\n"; my $add_links = 1; my $use_fixed_font = 1; my $verbosity = 0; ### program variables my @warnings = (); my $cwd = cwd(); my $os = $^O; sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { ###prt( "\nNo warnings issued.\n\n" ); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub init_out_file { my ($out_name, $in_name) = @_; prt("Creating $out_name\n"); open(DSP, ">$out_name") || die "Can not create $out_name: $!\n"; prt("Writing to $out_name ...\n") if (VERB1()); $file_lines++; print DSP $doc_type; print DSP <<"EOF"; <html> <head> <title>$in_name to HTML</title> <style type="text/css"> <!-- /* some style */ body { background-image:url('http://geoffair.org/images/clds3.jpg'); margin: 0cm 1cm 0cm 1cm; } h1 { background:#efefef; border-style: solid solid solid solid; border-color:#d9e2e2; border-width:1px; padding:2px 2px 2px 2px; font-size:200%; text-align:center; } .ctr { text-align: center; } .a { color:red; } .b { color:#006666; } .c { color:blue; } .d { color:#a52a2a; } .e { color:#9400d3; } .f { color:#666666; } .o { color:#008b8b; } .v { color:#a52a2a; } .t { color:#006600; } .cd { /* top, right, bottom, left */ padding: 0px 10px 0px 10px; margin: 1px 10px 1px 10px; background: #f0f8ff; border-width: 1px; border-style: solid solid solid solid; border-color: #cccccc; width: 90%; font-family:"Courier New"; } .cn { font-family:"Courier New"; } // --> </style> </head> <body> <h1>$in_name to HTML</h1> <p class="ctr"><a href="index.htm">index</a></p> EOF print DSP "<div class=\"cn\">\n" if ($use_fixed_font); prt("Closing $out_name.\n") if (VERB1()); close(DSP); } sub end_out_file { my ($out_name, $in_name) = @_; prt("Appending to $out_name\n") if (VERB1()); open(DSP, ">>$out_name") || die "Can not append to $out_name: $!\n"; prt("Writing to $out_name ...\n") if (VERB1()); print DSP "</div>\n" if ($use_fixed_font); print DSP "<p class=\"ctr\"><a href=\"index.htm\">index</a></p>\n"; print DSP '<p>Generated '; print DSP scalar localtime(time()); print DSP ", from <b>$in_name</b>, by <b>$pgmname</b>, my Perl text-to-html \n"; print DSP "<a href=\"http://geoffmclane.com/mperl/samples/index.htm\">'converter'</a></p>\n"; print DSP '<!-- generated '; print DSP scalar localtime(time()); print DSP ", from $in_name, by $pgmname, my text-to-html converter - geoffmclane.com -->\n"; print DSP <<"EOF"; </body> </html> EOF prt("Closing $out_name.\n"); close(DSP); } sub local_dirname($) { # passed a path, './dir1/dir2/file.name' returns './dir1/dir2/ my ($file) = @_; my ($sub); ($sub = $file) =~ s,/+[^/]+$,,g; $sub = '.' if $sub eq $file; return $sub; } sub do_this_file($$) { my ($out_name,$mfile) = @_; prt("Opening, for append $out_name\n") if (VERB1()); open(DSP, ">>$out_name") || die "Can't append to $out_name: $!\n"; prt("Writing to $out_name ...\n") if (VERB1()); $file_lines++; dsp_add_src(\*DSP, $mfile); close(DSP); prt("Closed $out_name.\n") if (VERB1()); } sub convert_links($) { my ($tx) = shift; my $len = length($tx); my ($i,$ch,$tag); my $ntx = ''; for ($i = 0; $i < $len; $i++) { $ch = substr($tx,$i,1); if ( !($ch =~ /\s/) ) { $tag = $ch; # start a tag $i++; # and go to next for (; $i < $len; $i++) { $ch = substr($tx,$i,1); if ($ch =~ /\s/) { # exit on space (or end of line) $i--; # back up to collect this char later last; } $tag .= $ch; # accumuate tag } if ($tag =~ /^\w+:{1}\/{1}\/{1}\w+(.+)$/) { $tag = "<a target=\"_blank\" href=\"$tag\">$tag</a>"; } $ntx .= $tag; next; } $ntx .= $ch; } return $ntx; } sub add_url_links($$) { my ($tx,$fdir) = @_; my $len = length($tx); my ($i,$ch,$tag,$ff); my $ntx = ''; $tag = ''; prt("Adding URLS to line [$tx]$len\n") if (VERB9()); for ($i = 0; $i < $len; $i++) { $ch = substr($tx,$i,1); if ($ch =~ /\s/) { $ff = $fdir.$tag; if (length($tag)) { if ((-f $tag)||(-f $ff)) { $ntx .= "<a target=\"_blank\" href=\"$tag\">$tag</a>"; } else { $ntx .= $tag; } } $tag = ''; $ntx .= $ch; } else { $tag .= $ch; } } $ff = $fdir.$tag; if (length($tag)) { if ((-f $tag)||(-f $ff)) { $ntx .= "<a target=\"_blank\" href=\"$tag\">$tag</a>"; } else { $ntx .= $tag; } } $tag = ''; $len = length($ntx); prt("Returning line [$ntx]$len\n") if (VERB9()); return $ntx; } sub line_to_html($) { my ($tx) = @_; $tx =~ s/&/&/g; # convert '&' to '&' $tx =~ s/\t/$tab_stg/g; # substitute TAB characters $tx =~ s/"/"/g; # sub double quotes $tx =~ s/\</</g; # sub less than tag beginning $tx =~ s/\>/>/g; # and html/xml tag ending my ($ch,$len,$i); my $nline = ''; $ch = ''; $len = length($tx); for ($i = 0; $i < $len; $i++) { $ch = substr($tx,$i,1); if ($ch =~ /\s/) { if (($nline =~ /\s$/) || ($nline =~ / $/)) { $ch = " "; } } $nline .= $ch; } return $nline; } sub contains_a_link($) { my ($tx) = shift; return 1 if ($tx =~ /\w+:{1}\/{1}\/{1}\w+\./); return 0; } sub dsp_add_src($$) { my ($fh,$file) = @_; my $line_num = 0; my $dn_para = 0; my (@lines,$tx,$ln); my $need_br = 0; if (-f $file) { open(INF, $file) || die "Unable to open $file!\n"; if ($use_pre) { print $fh '<pre class="cd">'."\n"; } @lines = <INF>; close INF; $line_num = scalar @lines; prt("Reading $file ... $line_num lines...\n"); $line_num = 0; my ($fnm,$fdir) = fileparse($file); ut_fix_directory(\$fdir); foreach $tx (@lines) { $line_num++; chomp $tx; # clear end of line $ln = length($tx); if ( $tx =~ /$WHITE_PATTERN/o ) { prt("white [$tx]$ln\n") if $verb2; if ($use_pre) { print $fh "\n"; # 2010-04-24 - add this blank line } else { # if ( !$use_pre ) print $fh "\</p\>\n" if $dn_para; $dn_para = 0; $need_br = 0; } } else { if (!$use_pre) { print $fh "\<p\>\n" if ! $dn_para; $dn_para = 1; } $tx = line_to_html($tx); $ln = length($tx); # get the final length if ($use_pre) { #if ($add_links && ($tx =~ /\s+\w+:{1}\/{1}\/{1}\w+/)) { if ( $add_links && contains_a_link($tx) ) { $tx = convert_links($tx); } if ($add_urls) { $tx = add_url_links($tx,$fdir); } print $fh "$tx\n"; # out the line } else { # if ( !$use_pre ) { if (substr ($tx, 0, 2) eq ' ') { # if starts with 2 spaces my $sps = 0; my $nbs = ' '; for ($sps = 2; $sps < $ln; $sps++) { if (substr ($tx, $sps, 1) ne ' ') { last; } $nbs .= ' ' if $sps > 1; } $sps-- if $sps > 1; # back off last space, if more than 1 prt("Replacing $sps with [$nbs] ...\n") if $verb2; $tx =~ s/ {$sps}/$nbs/; # replace (N) spaces with ' x N if ($verb2) { my @vals = split(/\s/,$tx); while (@vals) { my $vc = shift (@vals); prt("[$vc] "); } prt("\n"); } } # if it was space beginning #if ($add_links && ($tx =~ /\s+\w+:{1}\/{1}\/{1}\w+/)) { if ( $add_links && contains_a_link($tx) ) { $tx = convert_links($tx); } if ($add_urls) { $tx = add_url_links($tx,$fdir); } if ($need_br) { print $fh "\<br\>\n"; # out a line separator } print $fh "$tx\n"; # out the line $need_br = 1; } prt("sig [$tx]$ln\n") if $verb2; } } $need_br = 0; if ($use_pre) { print $fh '</pre>'."\n"; } else { print $fh "\</p\>\n" if $dn_para; } prt("Done $file ... $line_num lines ...\n"); } else { print $fh "WARNING: Missed SOURCE [$file]\n"; prtw("WARNING: Missed SOURCE [$file]\n"); } } sub get_dir_list { my $name = shift; # put all files in the current directory in @files: # opendir(THEDIR, ".") || die("Couldn't open current directory\n"); opendir(THEDIR, $name) || die("Couldn't open current directory\n"); @files = readdir(THEDIR); closedir(THEDIR); my $f_cnt = 0; my $d_cnt = 0; prt("Found " . $#files . " files and folders ...\n"); foreach my $dfile (@files) { if ( -d $dfile ) { # if ($dfile eq '.' || $dfile eq '..') or if ($dfile =~ '^\.$' || $dfile =~ '^\.\.$') { # do nothing with DOT and DOUBLE DOT } else { $d_cnt++; prt("$dfile <DIR>\n") if $verb2; } } else { $f_cnt++; my $ff = $name . '\\' . $dfile; # $ff =~ s/\//\\/g; # set DOS path separators ... $ff =~ s/\\/\//g; # set *nix path separators ... my $sb = dirname($ff); $ff =~ s/\//\\/g; # set DOS path separators ... $sb =~ s/\//\\/g; # set DOS path separators ... prt("$dfile dos [$ff] [$sb] ") if $verb2; if ($f_cnt == 1) { $sb =~ s/\\/\//g; # set *nix path separators ... prt("[$sb]") if $verb2; } prt("\n") if $verb2; } } prt("Found " . $#files . " - folders = $d_cnt, files = $f_cnt ...\n"); } ############################################ ### MAIN ### parse_args(@ARGV); pgm_exit(1,"ERROR: no input files found or specified\n") if ! @input_files; # pre-process foreach my $inf (@input_files) { if (-f $inf) { prt("File: $inf ok\n"); } else { pgm_exit(1,"ERROR: Can not locate file [$inf] ... check command ...\n"); } } $in_file = $input_files[0]; # get the FIRST my ($nm,$dr) = fileparse($in_file); init_out_file($out_file, $nm); # abort, if no create ... # show count in the array ... prt("Adding $#input_files lines to file $out_file.\n") if (VERB1()); foreach $in_file (@input_files) { do_this_file($out_file, $in_file); } end_out_file($out_file,$in_file); prt("Done $out_file on " . localtime(time()) . ".\n"); system($out_file); pgm_exit(0,""); ################################################### sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^v/) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/) { $verbosity++; $sarg = substr($sarg,1); } } prt("Verbosity = $verbosity\n") if (VERB1()); } elsif ($sarg =~ /^l/) { if ($sarg =~ /^ll/) { $load_log = 2; } else { $load_log = 1; } prt("Set to load log at end. ($load_log)\n") if (VERB1()); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_file = $sarg; prt("Set out file to [$out_file].\n") if (VERB1()); } elsif ($sarg =~ /^p/) { $use_pre = 1; prt("Use <pre> tag for text.\n") if (VERB1()); } elsif ($sarg =~ /^u/) { $add_urls = 1; prt("Set to add links to file names.\n") if (VERB1()); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; prt("Added input to [$in_file]\n") if (VERB1()); push(@input_files,$in_file); } shift @av; } if ((length($in_file) == 0) && $debug_on) { $in_file = $def_file; prt("Set DEFAULT input to [$in_file]\n"); $add_urls = 1; push(@input_files,$in_file); $verbosity = 9; } if (length($in_file) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } if (! -f $in_file) { pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); } } sub give_help { prt("$pgmname: version $M_VERSION\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n"); prt(" --load (-l) = Load LOG at end. ($outfile)\n"); prt(" --out <file> (-o) = Write output to this file.\n"); prt(" --pre (-p) = Use <pre> tag for text.\n"); prt(" --urls (-u) = Convert valid file names to url links.\n"); } # eof - txt2htm02.pl