Generated: Tue Feb 2 17:54:49 2010 from p2html11.pl 2005/05/22 62.3 KB.
#!/perl -w ### ################################################# ### p2html - perl code to HTML document format ### Works, mostly - still a SPACE-REPLACEMENT problem ... ### Geoff - geoffmclane.com - geoffair _at_ hotmail _dot_ com ### ################################################## use strict; use warnings; require "colours.pl"; require "colour2.pl"; ###contains my $perlstx = 'C:/Program Files/EditPlus 2/perl.stx'; ### fix location - should maintain separate list??? require "eppearl.pl"; require "p2hutil.pl"; ### die ("Remove me at your own risk!\n"); ### global variables my $vers = '0.0.11'; # eleventh iteration ... LOOKING GOOD ... still regex, line no my $refnum = 'P26.2005.05.20'; ### regex is now NOT expanded, but only by xceptchr of '/', so still some problems ... ### space is not 'exactly' maintained in quotes ... should try not to parse inside a word array ... ### search and replace rules - http://www.rexswain.com/perl5.html#search ### [ EXPR =~ ] [ m ] /PATTERN/ [g][i][m][o][s][x] ### [ $VAR =~ ] s/PATTERN/REPLACEMENT/ [e][g][i][m][o][s][x] ### [ $VAR =~ ] tr/SEARCHLIST/REPLACEMENTLIST/ [c][d][s] ### add line number list of user 'variables' =~ !~ Search pattern, substitution, or translation (negated) ### see seq print $fh <<EOF; and mark as "..." data until EOF ### maybe load, and output 'require "filename"' below parent ### list of 'sub' found, give colour to NAMED .... my $addspace1 = 0; ### 1 = use 1 space only (in red) for DIAGNOSTICS ONLY my $addlinenums = 0; # ! ONLY for diagnostic, mainly, since it DESTROYS simple copy-paste ;=(( my $AddRequired = 0; ### add tables for included perl files ... 1 = add_include_tables (); my $verb2 = 0; ### massive additional diagnostics my $verb3 = 0; ### add perl.stx parsing diag log my $dbgon = 0; # 1 DOUBLES HTML OUTPUT FOR COMPARISON my $ColTab1 = 1; # add colour table my $ColTab2 = 0; # add FULL color table my $NewRes = 0; # switch from perl.stx file my $colorON = 1; ### add the COLOUR/STYLE - main PURPOSE of program!!! my $WHITE_PATTERN2 = "^[ \t\r\n]*\$"; # spacey if ($var =~ /$WHITE_PATTERN2/o ) { ...} my $tab_stg = ' '; # replace tabs, with 3 spaces my $DELIMITER = '(){}[]-+*/=~!&|<>?:;.,'; ### set of perl delimeters, for parsing ... my $logfil = 'templog.txt'; my @logmsgs = (); my ($OF, $IF, $LF, $STX); my $name; my $lc = 0; my $dnpara = 1; my @lnbits; my @spbits; my @copybits; ## keep, for ORIGINAL space work 'replacement' my @parsebits; ## modified copy, with 'print [?] << TOKEN or "TOKEN" ... TOKEN mark as " or ' quote text my @colorbits; my $acttoken = ''; ### print [] << TOKEN my $inprttok = 0; ### processing a print token my $chk; ###my $istxt = 1; ###my $gotfes = 0; # no frontend space ###my $txsp = ''; # frontend SPACEY stuff ### set if ispunctuat($c), which calls isbracechr($c) my $actpunc = ''; ### store the active punctuation my @actpuncs = (); ### stack of punctuation my $actpunc2 = ''; ### paired punctuation (){}[]<> my $actbrace = ''; ### last brace found my @incfiles = (); # stack of include files, if any my $actifile = ''; my $file; my %HPuncsFnd = (); # hash of Punctuation FOUND in parse my $expanOFF = 0; ### stop expansion temporarily ... my $actresword = ''; my %HResWdFnd = (); my $actfunc = ''; ### store the active built-in functions my %HFuncsFnd = (); my $actlnnum = ''; my %HArrayFnd = (); my $actarray = ''; my %HHashFnd = (); my $acthash = ''; my %HScalarFnd = (); my $actscalar = ''; ### sub add_ucomment my $actcomment = ''; ### sub add_usingleq my $actsingleq = ''; ### sub add_udoubleq my $actdoubleq = ''; ### require "colours.pl" and "eppearl.pl"; to fill these our @PPairs; our @DolVars; our @PBPunc; our @TTset; our @PPunct; our @ResWds2; ## canned reserved words our %HColorIE; # in color2.pl ... ### start of program #################### ### Get command line input ... my $infile = shift || '.'; my $outfil = shift || 'tempout.htm'; ### my $DELIMITER = '(){}[]-+*/=~!&|<>?:;.,'; my @DelimList = split (//, $DELIMITER); ### form a list ## my $func; my @TTColrs = qw(l.blue brown l.br s.green pink mauve b.green color4 color5 l.brn blue white l.grey); my @TTTypes = qw(array comment unass s-quote scalar functions d-quote color4 color5 hash reserved other punctuation); my @TTAttrib = qw(match orange regex green color1 color2 color3 color4 color5 peach blue white grey); for $name (@TTAttrib) { no strict 'refs'; # allow symbol table manipulation *$name = *{uc $name} = sub { "<TT class='$name'>@_</TT>" }; ### *$name = *{uc $name} = sub { "<TT class='$name'>\n@_\n</TT>" }; } ###my @colors = qw(red blue green yellow orange purple violet); ##my @colors = qw(red yellow purple violet); ##for $name (@colors) { ## no strict 'refs'; # allow symbol table manipulation ## *$name = *{uc $name} = sub { "<FONT COLOR='$name'>@_</FONT>" }; ##} my $ss = 5; ##our @TTset; ##our @PPunct; ##require "colours.pl"; ##require "eppearl.pl"; my $msg = ''; my ($line, $txt); my $i = 0; my ($cnt1, $cnt2); my $inbraces = 0; my $c; my $c3; if ($infile eq '.') { die "No input file given ...\n"; } open $LF, ">$logfil" or die "Can NOT open LOG file $logfil!\n"; tolog ("$0 Started " . localtime(time()) . " ...\n"); if (! -f $infile) { die "Input file [$infile] NOT FOUND! ...\n"; } tolog ("Opening $infile ...\n"); open $IF, "<$infile" or die "Can not OPEN $infile!\n"; tolog ("Loading $infile ...\n"); my @lines = <$IF>; # slurp whole file, to an array of lines close($IF); open $OF, ">$outfil" or die "Can not create $outfil!\n"; ###my %stxh; our @ResWds = (); our @BFuncs = (); our %HResWds; our %HBFuncs; do_stx_file(); ###### start HTML output ####### add_html_head( $OF, $infile ); my $lncnt = @lines; # get count my $countlines = 0; my $txhtml; ### add_color_samp($OF); tolog ("Processing $infile ... $lncnt lines\n"); #### processing the table, that is the HTML output for the $infile data lines do_the_table(); # the perl code is output to a table format ############################################################################### tolog ("Processed $lc lines of $infile ... written to $outfil ... add tail ...\n"); if ($AddRequired) { add_include_tables (); } if ($ColTab1) { # add colour table add_color_samp($OF); } prt ( get_parse_stats () ); if ($ColTab2){ # add FULL color table add_colour2_table(); ### spray %HColorIE } add_html_tail($OF); showarrcnts(); tolog ("$0 Ended " . localtime(time()) . " ...\n"); close($OF); system $outfil; # system $logfil; sub prt { tolog (@_); print $OF @_; } my @TypeColors_NOTUSED = ( ###if ($c eq '#') { # comment component - should be to end-of-line, or more ... "comment", ### $func = \&orange; ###} elsif ($c eq "'") { ## "' # does it start with quotes DOUBLE or SINGLE "s.quote", ### $func = \&green; ### } elsif ($c eq '"') { "d.quote", ### $func = \&color3; ###} elsif ($c eq '$') { # start of scalar "scalar", ### $func = \&color1; ###} elsif ($c eq '@') { # start of array "array", ### $func = \&match; ###} elsif ($c eq '%') { # start of hash "hash", ### $func = \&peach; ###} elsif ( exists $HResWds{$tx2} ) { "reserved", ### $func = \&blue; ### } elsif ( exists $HBFuncs{$tx2} ) { "functions", ### $func = \&color2; ### } else { "other" ### $func = \&white;} ); sub a2f { my ($f,$t) = @_; print $f $t; } sub n_row { ###my ($f) = @_; a2f (@_, " <tr>"); } sub n_col { ###my ($f) = @_; a2f (@_, " <td>"); } sub c_row { ###my ($f) = @_; a2f (@_, " </tr>"); } sub c_col { ###my ($f) = @_; a2f (@_, " </td>"); } sub n_hcol { ###my ($f) = @_; a2f (@_, " <th>"); } sub c_hcol { ###my ($f) = @_; a2f (@_, " </th>"); } ## my $func; ### my @TTColrs = qw(l.blue brown l.br s.green pink mauve b.green l.brn blue white l.grey); ### my @TTTypes = qw(array comment unass s-quote scalar functions d-quote hash reserved other punctuation); ### my @TTAttrib = qw(match orange regex green color1 color2 color3 peach blue white grey); sub add_color_samp { my ($fh) = @_; $i = 0; print $fh <<EOF; <p>Colour Key :<br>Function, Description., Colour<br> <table border="1" bgcolor="#eeeeee"> EOF ### out attributes n_row $fh; # add " <tr>\n"; # open ROW n_hcol $fh; # add " <td>\n"; # open COLUMN a2f $fh, "Style"; c_hcol $fh; # add " </td>\n"; # close COLUMN n_hcol $fh; # add " <td>\n"; # open COLUMN a2f $fh, "Description"; c_hcol $fh; # add " </td>\n"; # close COLUMN n_hcol $fh; # add " <td>\n"; # open COLUMN a2f $fh, "Colour"; c_hcol $fh; # add " </td>\n"; # close COLUMN c_row $fh; ### " </tr>\n"; # close ROW foreach $name (@TTAttrib) { ###no strict 'refs'; # allow symbol table manipulation my $fun = \&$name; ## get the function - the auto-generated sub n_row $fh; # add " <tr>\n"; # open ROW n_col $fh; # add " <td>\n"; # open COLUMN ### a2f $fh, "Attributes"; $msg = $name; $txt = $fun->($msg); a2f $fh, $txt; c_col $fh; # add " </td>\n"; # close COLUMN n_col $fh; # add " <td>\n"; # open COLUMN ### a2f $fh, "Function"; $msg = $TTTypes[$i]; $txt = $fun->($msg); a2f $fh, $txt; c_col $fh; # add " </td>\n"; # close COLUMN n_col $fh; # add " <td>\n"; # open COLUMN ### a2f $fh, "Colour"; @TTColrs $msg = $TTColrs[$i]; $txt = $fun->($msg); a2f $fh, $txt; c_col $fh; # add " </td>\n"; # close COLUMN c_row $fh; ### " </tr>\n"; # close ROW $i++; # bump to next } ### end if all print $fh <<EOF; </table> </p> EOF ### all done ... } sub tolog { print @_; print $LF @_; } sub xceptchr { my ($chr) = @_; ###if (($chr eq ':') || ($chr eq '=') || ($chr eq '|') || ($chr eq ',')) { if ( ($chr eq '/') || ($chr eq ':') || ($chr eq '|') ) { return 1; } return 0; } sub is_a_quote { my ($chr) = @_; if (($chr eq '"') || ($chr eq "'")) { return 1; } return 0; } sub get_a_quote { my ($t) = @_; my $mx = length($t); my $i; if ($t =~ /['"]/) { # match quote for ($i = 0; $i < $mx; $i++) { my $chr = substr ($t, $i, 1); if (is_a_quote($chr)) { return $chr; } } } return 0; } sub get_line_array2 { my ($tx1) = @_; my @ar = (); ## if not in print << token my ($i, $mx); my $insp = 0; my $ibgn = 0; my $i2 = 0; tolog ("Get LA[$tx1]\n"); for ($i = 0; $i < $mx; $i++) { my $ch1 = substr ($tx1, $i, 1); # get char tolog (" got ".($i + 1)." char [$ch1]\n"); if (($ch1 eq ' ')||($ch1 eq "\t")) { if ($ch1 eq ' ') { tolog ("char [$ch1] is spacey\n"); } else { tolog ("char [tab] is spacey\n"); } if ($i2 && ($insp == 0)) { tolog ("get part [" . substr ($tx1, $ibgn, $i2) . "]1!\n"); push (@ar, substr ($tx1, $ibgn, $i2)); $ibgn = $i; $i2 = 0; } $insp++; # count spaces } else { if ($insp) { tolog ("storing spacey front for $insp chars\n"); tolog ("get part [" . substr ($tx1, $ibgn, $insp) . "]2!\n"); push (@ar, substr ($tx1, $ibgn, $insp)); $ibgn = $i; ##$tx1 = substr ($tx1, $i); $insp = 0; ##tolog (" tx1 chopped to [$tx1]\n"); ##$i = 0; ##last; } elsif ($ch1 eq '#') { if ($i2) { tolog ("storing front of # for $i2 chars\n"); tolog ("get part [" . substr ($tx1, $ibgn, $i2) . "]3!\n"); push (@ar, substr ($tx1, $ibgn, $i2)); $ibgn = $i; $i2 = 0; } tolog ("get part [" . substr ($tx1, $i) . "]3-1!\n"); push (@ar, substr ($tx1, $i)); $i = $mx; ##$tx1 = ''; ##tolog (" tx1 chopped blank\n"); ##$i = 0; last; } elsif (($ch1 eq '"')||($ch1 eq "'")) { $i++; for (; $i < $mx; $i++) { if (substr ($tx1, $i, 1) eq $ch1) { ### check next char $i++; ## include this char tolog ("found end [$ch1] at $i\n"); last; } } ### got quoted block tolog ("get part [" . substr ($tx1, $ibgn, ($i - $ibgn)) . "]4!\n"); push (@ar, substr ($tx1, $ibgn, ($i - $ibgn))); $ibgn = $i; ### continue; ###$tx1 = substr ($tx1, $i); ###tolog (" tx1 chopped to [$tx1]\n"); ##$i = 0; ##last; } elsif (gotdelim($ch1)) { ### found a delimiter - split at delim if ($i) { tolog ("get part [" . substr ($tx1, 0, $i) . "]5!\n"); push (@ar, substr ($tx1, 0, $i)); } $i++; tolog ("get part [$ch1]6!\n"); push (@ar, $ch1); $tx1 = substr ($tx1, $i); tolog (" tx1 chopped to [$tx1]\n"); $i = 0; last; } } $i2++; ### count a char } ### for length $tx1 if ($i) { tolog ("get part [" . substr ($tx1, 0, $i) . "]7!\n"); push (@ar, substr ($tx1, 0, $i)); $tx1 = ''; tolog ("tx1 ended\n"); } return @ar; } sub get_line_array { my ($tx1) = @_; my @ar = (); ## if not in print << token my $i; my $mx; my $insp = 0; tolog ("Get LA[$tx1]\n"); while ($mx = length ($tx1) ) { for ($i = 0; $i < $mx; $i++) { my $ch1 = substr ($tx1, $i, 1); # get char tolog (" got ".($i + 1)." char [$ch1]\n"); if (($ch1 eq ' ')||($ch1 eq "\t")) { if ($ch1 eq ' ') { tolog ("char [$ch1] is spacey\n"); } else { tolog ("char [tab] is spacey\n"); } if ($i && ($insp == 0)) { tolog ("get part [" . substr ($tx1, 0, $i) . "]1!\n"); push (@ar, substr ($tx1, 0, $i)); $tx1 = substr ($tx1, $i); tolog (" tx1 chopped to [$tx1]\n"); $i = 0; last; } $insp++; # count spaces } else { if ($insp) { tolog ("storing spacey front for $i chars\n"); tolog ("get part [" . substr ($tx1, 0, $i) . "]2!\n"); push (@ar, substr ($tx1, 0, $i)); $tx1 = substr ($tx1, $i); $insp = 0; tolog (" tx1 chopped to [$tx1]\n"); $i = 0; last; } elsif ($ch1 eq '#') { if ($i) { tolog ("storing front of # for $i chars\n"); tolog ("get part [" . substr ($tx1, 0, $i) . "]3!\n"); push (@ar, substr ($tx1, 0, $i)); } tolog ("get part [" . substr ($tx1, $i) . "]3-1!\n"); push (@ar, substr ($tx1, $i)); $tx1 = ''; tolog (" tx1 chopped blank\n"); $i = 0; last; } elsif (($ch1 eq '"')||($ch1 eq "'")) { $i++; for (; $i < $mx; $i++) { if (substr ($tx1, $i, 1) eq $ch1) { ### check next char $i++; ## include this char tolog ("found end [$ch1] at $i\n"); last; } } ### got quoted block tolog ("get part [" . substr ($tx1, 0, $i) . "]4!\n"); push (@ar, substr ($tx1, 0, $i)); $tx1 = substr ($tx1, $i); tolog (" tx1 chopped to [$tx1]\n"); $i = 0; last; } elsif (gotdelim($ch1)) { ### found a delimiter - split at delim if ($i) { tolog ("get part [" . substr ($tx1, 0, $i) . "]5!\n"); push (@ar, substr ($tx1, 0, $i)); } $i++; tolog ("get part [$ch1]6!\n"); push (@ar, $ch1); $tx1 = substr ($tx1, $i); tolog (" tx1 chopped to [$tx1]\n"); $i = 0; last; } } } ### for length $tx1 if ($i) { tolog ("get part [" . substr ($tx1, 0, $i) . "]7!\n"); push (@ar, substr ($tx1, 0, $i)); $tx1 = ''; tolog ("tx1 ended\n"); } } return @ar; } sub get_space_array { my ($tx) = @_; my $lb; my @a = (); my $i = 0; my $pos1 = 0; foreach $lb (@lnbits) { my $pos2 = index ($tx , $lb); $a[$i] = substr ($tx, $pos1, $pos2); $tx = substr ($tx, ($pos2 + length ($lb))); ###$a[$i] = substr ($tx, $pos1, ($pos2 - $pos1)); ###$pos1 += $pos2 + length ($lb); $i++; } return @a; } ### NOT passed an ALL-SPACEY line ### returns line in HTML form, with STYLE encoding ### note : this is line by line, thus multiple line items will FAIL ### Presently the ONLY line-sets, like 'print $OF <<TOKEN ... TOKEN' == sub do_line_parse { my ($tx) = @_; chomp $tx; ### my @copybits; ## keep, for ORIGINAL space work 'replacement' my $tx2 = $tx; my $tx3; my $tx4 = htmlise($tx); ## the HTML'ISED string my $istxt = 1; my $gotfes = 0; # no frontend space my $txsp = ''; # frontend SPACEY stuff ### no way ! my $txsp = $htmnbs; # frontend SPACEY stuff my $tx5; my $tx6; my $i = 0; my $i3 = 0; my $c1 = substr ($tx, 0, 1); # get and keep first char ### no go with ? @lnbits = split (/ /, $tx); # initial split spaces ### As a special case, specifying a PATTERN of space (' ') will split on white space ### FRONT END SPACE HANDLING ############################## ### experimental @lnbits = get_line_array($tx); ### foreach $tx3 (@lnbits) { ### tolog ("[$tx3]"); ### } ### tolog("\n"); # this has some BIG drawbacks!!! It is needed to begin separation into LINE-BITS # BUT, it collapses 'space' in quoted strings, and possibly split up a regex expression = ugh! @lnbits = split (' ', $tx); # initial split spaces @spbits = get_space_array($tx); my $c2 = substr ($lnbits[0], 0, 1); # get POTENTIAL new first char my $pos1 = index ($tx, $c2); # get pos of first array char, in string $gotfes = 0; # no frontend space if ($pos1 > 0) { $gotfes = 1; # mark, got frontend space $txsp = substr($tx, 0, $pos1); # get SPACEY at FRONT if ($txsp ne $spbits[0]) { die "Make array FAILED ITS JOB!!!\n"; } tolog ('Spaces ['); foreach $txsp (@spbits) { tolog ("[$txsp]"); } tolog (" SA = " . scalar @spbits . ".\n"); } ############################## my $cnt = @lnbits; # count of componets, so far my $cntorg = $cnt; # keep original SIZE, $cnt is 'adjusted' during ... my @lnadd; # when ADDING to the array my @spadd; # add to SPACE array also my $nct = 0; # count AFTER array 'adjustments' ... my $ln = length($tx2); # get length of line, not soooo important my $ch = substr ($tx2, 0, 1); # get first char, for fast decisions my $c = $ch; ### copy of FIRST char my $run1chg = 0; ### if ($lnbits[0] =~ m/^\#/) { if ($inprttok && ($tx ne $acttoken) ) { ### NO PARSING of this data, except scalars ... return color3 ($tx4); } if ($c1 eq '#') { ####################################################### # is comment tolog ("Is comment - try ...\n"); ###$tx3 = green($tx4); if ($colorON) { $tx3 = orange($tx4); } else { $tx3 = $tx4; } ### $tx3 .= "<br>\n"; ### prt ($tx3); ####################################################### } else { ## does not START with a # comment char #### tolog ("########### parse run one ###############################(c=$cnt)\n") if $verb2; if ($verb2) { tolog ("########### parse run one ###############################(c=$cnt)\n"); $msg = ''; foreach $tx2 (@lnbits) { $msg .= "[$tx2]"; } $msg .= "\n"; tolog ($msg); } $i3 = 0; my $ichg = 0; ### count of bit changes ### first run - to re-combine quoted text within LINE ARRAY $ichg = 0; @logmsgs = (); ### clear LOG message stack ###tolog ("{ comps $cntorg\n"); # log COUNT at start $msg = ("{ comps $cntorg\n"); # log COUNT at start push(@logmsgs,$msg); ## accumulate ### this pre-run JOINS or SPLITS = ENSURE EACH QUOTED BLOCK is in its own bucket my $icnt = 0; ### init line 'bits' counter do_line_reset (); ########### parse run one ############################### foreach $tx2 (@lnbits) { my $spb = $spbits[$icnt]; ### get the SPACE BIT, if ANY $icnt++; # PRE-BUMP THE COUNT $msg = "Bit$icnt: [$spb][$tx2]"; ###$msg = $tx2; # set line bit ###$msg .= ' =>'; $ln = length($tx2); $ch = substr($tx2, 0, 1); $i = 0; ### special +?.*^$()[]{}|\ ### if ($tx2 =~ /^['"]/ ) { ## "' # does it start with quotes d or s if (($ch eq '"')||($ch eq "'")) { $msg .= " Begin Q (l=$ln)["; $msg .= $tx2; $msg .= ']'; $i3 = 1; # set JOIN if ($ln > 1) { $i3 = 1; # set JOIN/SPLIT $tx3 = substr ($tx2, 1); # get past quote if (($ln > 1) && ($tx3 =~ /$ch/)) { $pos1 = index ($tx3, $ch); # get position of next quote $msg .= ' and end ['; $msg .= $tx3; $msg .= "](p=$pos1)"; $tx5 = substr ($tx2, 0, ($pos1 + 1 + 1)); # get WHOLE QUOTE $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if ANY if (length($tx3)) { $msg .= ' quote split '; $msg .= '['; $msg .= $tx5; $msg .= ']'; $msg .= '['; $msg .= $tx3; $msg .= ']?'; $lnbits[$icnt - 1] = $tx5; # put back adjusted first @lnadd = ($tx3); ### bit-to-insert @spadd = (''); ### a non-space ### if ( $tx3 =~ /$ch/ ) { if ((length($tx3) > 1) && ( $tx3 =~ /['"]/ )) { ### zeek, there are more of these ... $i = 0; $tx5 = ''; while(1) { $c = substr ($tx3, $i, 1); if (($c eq '"')|| ($c eq "'") ) { last; } $i++; # bump to next if ($i >= ($ln - 1)) { $c = 0; last; } } if ($i) { if (($c eq '"')||($c eq "'")) { $tx5 = substr ($tx3, 0, $i); # get before QUOTE $tx3 = substr ($tx3, $i ); # get balance $lnadd[0] = $tx5; push(@lnadd,$tx3); push(@spadd, '' ); ### a non-space $ichg++; } } $msg .= " found [$c] split [$tx5] [$tx3]* "; } splice (@lnbits, $icnt, 0, @lnadd); # insert 1 or more new items splice (@spbits, $icnt, 0, @spadd); # insert 1 or more new items ### splice (@lnbits, $i2, 0, $tx3); # insert 1 new items $cnt = @lnbits; ### ADJUST COUNT ITERATOR $ichg++; } $msg .= " b&e same quotes"; $i3 = 0; } } if ($i3) { ### JOIN, until the END OF THIS QUOTE $i3 = 0; $tx6 = $tx2; ### start feeding, until the END OF QUOTE, or EOL!!! for ($i = $icnt; $i < $cnt; $i++) { $tx3 = $lnbits[$i]; # get next $msg .= ('+[' . $tx3 . ']'); ###$tx6 .= ' '; # add back space $tx6 .= $spbits[$i]; # add back 'actual' space, 1 or more $tx6 .= $tx3; ### $lnbits[$i]; $i3++; ### count 'bits' to DELETE $ichg++; ### count a CHANGE if ($tx3 =~ /$ch/) { @lnadd = (); @spadd = (); $msg .= '-'; $pos1 = index ($tx3, $ch); # get position of next quote if ($pos1 > 0) { $tx5 = substr ($tx3, 0, $pos1); # get BEFORE QUOTE $tx3 = substr ($tx3, $pos1); # get ending text, if ANY $msg .= " *CHK [$tx5] [$tx3]???\n"; if ((length($tx3) > 1) && ( $tx3 =~ /['"]/ )) { ### zeek, there are more of these ... $i = 0; $tx5 = ''; while(1) { $c = substr ($tx3, $i, 1); if (($c eq '"')|| ($c eq "'") ) { last; } $i++; # bump to next if ($i >= ($ln - 1)) { $c = 0; last; } } if ($i) { if (($c eq '"')||($c eq "'")) { $tx5 = substr ($tx3, 0, $i); # get before QUOTE $tx3 = substr ($tx3, $i ); # get balance @lnadd = ($tx5,$tx3); @spadd = ('',''); ## also add non-spaces $ichg++; } } } $msg .= " could split [$tx5] [$tx3]* "; } $msg .= " found end [$c] split "; last; # exit when terminator found } } $msg .= " *REPLACING [$tx2] with [$tx6]!"; $lnbits[$icnt - 1] = $tx6; # put back single quoted message splice (@lnbits, $icnt, $i3); # collapse following items splice (@spbits, $icnt, $i3); # collapse following items $msg .= ", now joined, to its end (1)"; $cnt = @lnbits; ### UPDATE THE COUNT } } elsif ($tx2 =~ /['"]/ ) { ## "' # does it CONTAIN quotes, d OR s $c = get_a_quote($tx2); $pos1 = index ($tx2, $c); # get position of next quote if (($pos1 > 0) && $c) { $msg .= " QUOTE $c split, at $pos1 "; $tx5 = substr ($tx2, 0, $pos1); # get before QUOTE $tx3 = substr ($tx2, $pos1 ); # get balance ### check back $msg .= "would replace [".$lnbits[$icnt - 1]."][$tx5]"; $lnbits[$icnt - 1] = $tx5; # fix this 'line-bit' @lnadd = ($tx3); ### add this one @spadd = (''); splice (@lnbits, $icnt, 0, @lnadd); # add bucket splice (@spbits, $icnt, 0, @spadd); # add bucket $msg .= ", now sep [$tx5][$tx3]"; $cnt = @lnbits; ### UPDATE THE COUNT } else { die "ERROR: Handler above does BITS-OF-LINE that begin with a QUOTE!!!\n"; } } elsif ($ch eq '#') { # if line-bit starts with a perl comment ## join to end of line $i3 = 0; $tx5 = $tx2; $tx6 = $lnbits[$icnt - 1]; for ($i = $icnt; $i < $cnt; $i++) { $tx3 = $lnbits[$i]; ###$tx5 .= ' '; $tx5 .= $spbits[$i]; # add back 'actual' space, 1 or more $tx5 .= $tx3; ### $lnbits[$i]; $i3++; $ichg++; } if ($i3) { $msg .= ' Joined ['; $msg .= $tx6; ### = $lnbits[$icnt - 1]; $msg .= '] to ['; $msg .= $tx5; $lnbits[$icnt - 1] = $tx5; # put back single quoted message $msg .= '] sp ' . $icnt . ' ' . $i3; splice (@lnbits, $icnt, $i3); # collapse following items splice (@spbits, $icnt, $i3); # collapse following items $msg .= " end-of-line comment"; $cnt = @lnbits; } } else { ## not begin quote ' or ", nor begin # ... ## dealt with on NEXT iteration of line bits - left for diagnostic only ### $c = 0; if (($ch eq '$') || ($ch eq '@') || ($ch eq '%')) { # start of a scalor, array, hash ... move on to next letter $tx3 = substr($tx2,1); $c = gotdelim($tx3); ### any more in this line if ( length($tx3) && ($c) && ! xceptchr($c) ) { # got first split point, AFTER $var ... $pos1 = index ($tx3,$c); } } else { $tx3 = $tx2; ### check full line $c = gotdelim($tx3); if ( length($tx3) && ($c) ) { # got first split point $pos1 = index ($tx3,$c); } # process $tx3 } $msg .= ' =nc='; if ( isresword ($tx2) ) { ### exists $HResWds{$tx2} $msg .= ' *B*'; ### blue('R'); } if ( isbinfun ($tx2) ) { ## exists $HBFuncs{$tx2} $msg .= ' *P*'; } if ( $ln < 4 ) { ### tolog ( "*PUNC* CHECK [" . $tx2 . "]\n" ); if ( ispunctuat ( $tx2 ) ) { ###$actpunc = $tx2; ### store the active punctuation $msg .= ' *PUNC*'; } } } ###tolog ($msg . "\n"); $msg .= "\n"; # add end of line push(@logmsgs, $msg); ### store the LOG } # for array list of line components === ONLY DOING JOINING ########### END parse run one END ######################## $nct = @lnbits; if ($cnt != $nct) { die "***FIX a COUNT UPDATE $cnt $nct $cntorg ????\n"; } if ($cntorg == $nct) { $msg = "} end comps $cntorg\n"; } else { $msg = ("} end comps $cntorg, adj. $nct " . ($cntorg - $nct) . "\n"); } push(@logmsgs, $msg); if ($ichg || $verb2 || $addlinenums) { tolog ( "Run 1 made " . $ichg . " changes in line - CHECK CHANGE\n" ); foreach $msg (@logmsgs) { tolog($msg); } } else { ### no change if ($verb2) { tolog ("No change\n"); } } @copybits = @lnbits; ### take a SNAP of this QUOTE ONLY EXPANSION ### want to RETURN the line to this SPACING, if possible ### $run1chg = $ichg; tolog ("########### parse run two ###############################\n") if $verb2; #################### DO IT ALL NOW ################### ###tolog ("{ comps $nct\n"); # log COUNT at start @logmsgs = (); $msg = ("{ comps $nct\n"); # log COUNT at start push(@logmsgs,$msg); ## accumulate $icnt = 0; ### init line 'bits' counter $ichg = 0; ### clear change TOTAL do_line_reset (); ########### parse run two ############################### foreach $tx2 (@lnbits) { my $ichg1 = 0; # change to THIS line-bit $icnt++; # PRE-BUMP THE COUNT $ln = length($tx2); ### set length $ch = substr ($tx2, 0, 1); $msg = "B$icnt:[$tx2]=$ln"; ### open DIAG message ###$msg = $tx2; ### diag - add the bit-of-the-line to log output ###$msg .= " =$ln"; ### separate to ACTION $i = 0; ### special +?.*^$()[]{}|\ ### if ($tx2 =~ /^['"]/ ) { ## "' # does it start with quotes d or s if ($ln < 2) { $msg .= " s.chr"; ### just one char } elsif (($ch eq '"')||($ch eq "'")) { ######################################### ### $msg .= " begin quote (p2)"; $i = 1; # set JOIN if ($ln > 1) { $tx3 = substr ($tx2, 1, $ln - 1); # get past quote if ( $tx3 =~ /$ch/) { $pos1 = index ($tx3, $ch); # get position of next quote if ($pos1 > 0) { $tx5 = substr ($tx2, 0, ($pos1 + 1 + 1)); # get WHOLE QUOTE $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if ANY if (length($tx3)) { ### error case ### "_","|", DONE WOULD SPLIT ["_"][,"|",]? b&e same quotes $msg .= ' DONE WOULD SPLIT '; $msg .= '['; $msg .= $tx5; $msg .= ']'; $msg .= '['; $msg .= $tx3; $msg .= ']?'; $lnbits[$icnt - 1] = $tx5; # put back adjusted first ### if ( $tx3 =~ /$ch/ ) { if ( $tx3 =~ /['"]/ ) { ### zeek, there are more of these ... $msg .= ' *MESS if , excepted '; } splice (@lnbits, $icnt, 0, $tx3); # insert 1 new items splice (@spbits, $icnt, 0, ''); # insert 1 new NON-SPACE items $cnt = @lnbits; ### ADJUST COUNT ITERATOR $ichg++; $ichg1 = 1; } } $msg .= " b&e same quotes"; $i = 0; } } if ($i) { # should JOIN until the END $i3 = 0; for ($i = $icnt; $i < $cnt; $i++) { $tx3 = $lnbits[$i]; # get next ###$tx2 .= ' '; # add back space $tx2 .= $spbits[$i]; # add back space, 1 or more $tx2 .= $tx3; ### $lnbits[$i]; $i3++; $ichg++; $ichg1 = 2; if ($tx3 =~ /$ch/) { last; # exit when terminator found } } $lnbits[$icnt - 1] = $tx2; # put back single quoted message ###splice (@lnbits, $i2, $cnt - $i2); # collapse following items splice (@lnbits, $icnt, $i3); # collapse following items splice (@spbits, $icnt, $i3); # collapse following items $msg .= ", now joined, to its end (2)"; $cnt = @lnbits; ### UPDATE THE COUNT } $i3++; ######################################### } elsif ($ch eq '#') { # if starts with a comment ######################################### ## should join to end of line, if NEEDED, ie not last line-bit $i3 = 0; if ($icnt < $cnt) { for ($i = $icnt; $i < $cnt; $i++) { $tx3 = $lnbits[$i]; ###$tx2 .= ' '; $tx2 .= $spbits[$i]; $tx2 .= $tx3; ### $lnbits[$i]; $i3++; $ichg++; $ichg1 = 3; } $msg .= ' joineD ['; $msg .= $lnbits[$icnt - 1]; $msg .= '] to ['; $msg .= $tx2; $msg .= ']'; $lnbits[$icnt - 1] = $tx2; # put back single quoted message ###splice (@lnbits, $i2, $cnt - $i2); # collapse following items $msg .= ' del frm ' . $icnt . ' for ' . $i3; splice (@lnbits, $icnt, $i3); # collapse following items splice (@spbits, $icnt, $i3); # collapse following items ### $msg = $tx2; $cnt = @lnbits; } $msg .= ", line comment"; ######################################### } else { ######################################### ## not begin quote ' or ", nor begin # ... and is more than one char $c = 0; $tx3 = substr($tx2,1); if (($ch eq '$') || ($ch eq '@') || ($ch eq '%')) { # start of a scalar, array, hash ... move on to next $c = gotdelim($tx3); if ( length($tx3) && ($c) && ! xceptchr($c) ) { # got first split point, AFTER $var ... ### headed for a SPLIT off of the END $pos1 = index ($tx3,$c); ### get index in SUB-STRING $msg .= " SP [$c] at " . ($pos1 + 1 + 1); ###if ($pos1 > 0) { $i3 = 0; ### assume SPLIT @lnadd = ($c); @spadd = (''); # start non-space $tx5 = $ch; # put first char back [$@%] if ($pos1 > 0) { $tx5 .= substr ($tx3, 0, $pos1); # get up to CHAR = variable $tx6 = substr ($tx3, ($pos1 + 1)); # get ending text, if any if (length($tx6)) { ###if ((($c eq '(') && (substr($tx6,0,1) eq ')')) || ### (($c eq '+') && (substr($tx6,0,1) eq '+')) ) { # eg check *split* [$sock->accept][(][);] if (( ispunctuat($c) ) && ( ispunctuat($c.substr($tx6,0,1)) ) ) { ## yay, new SPLIT! $c .= substr($tx6,0,1); ## add this to first @lnadd = ($c); ### set NEW line-bit @spadd = (''); # start non-space $tx6 = substr ($tx6, 1); ## get to end } if (length($tx6)) { push(@lnadd, $tx6); # put in slurp push(@spadd, '' ); # add non-space } ### $i3 = 1; # some EXCEPTIONS ?????? } } if ($i3) { $msg .= '*NO* *split* ['; } else { $msg .= 'DONE *split* ['; } $msg .= $tx5 . ']['; $msg .= $c . ']'; if (length($tx6)) { $msg .= '['; $msg .= $tx6 . ']'; } ###tolog ($msg . "\n"); if ($i3 == 0) { $lnbits[$icnt - 1] = $tx5; # put back first split - end of var splice (@lnbits, $icnt, 0, @lnadd); # insert 1 or 2 new items splice (@spbits, $icnt, 0, @spadd); # insert 1 or 2 new items $cnt = @lnbits; ### ADJUST COUNT ITERATOR $ichg++; $ichg1 = 4; } } } else { ## not begin quote ' or ", nor begin # ... ### and is NOT if (($ch eq '$') || ($ch eq '@') || ($ch eq '%')) { $tx3 = $tx2; my $c3 = gotdelim($tx3); ###if ( length($tx3) && ($c3) ) { # got first split point if ( ($ln) && ($c3) ) { # got first split point $pos1 = index ($tx3,$c3); if ( $pos1 > 0 ) { # if the first char, or ... ### we have something, a million other variations ##my $ts = '\\'; ##$ts .= $c3; ##@lnadd = split ($ts, $tx3); $tx5 = substr ($tx3, 0, $pos1); # get up to CHAR ###@lnadd = ($tx5, $c3); @lnadd = ($c3); @spadd = (''); $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if any if (length($tx3)) { push(@lnadd, $tx3); # put in slurp push(@spadd, '' ); # put in non-space } ###if (($c3 ne ':') && ($c3 ne '=') && ($c3 ne '|')) { if ( ! xceptchr($c3) ) { $msg .= ' done Split ['; $msg .= $tx5 . ']['; $msg .= $c3 . ']'; if (length($tx3)) { $msg .= '['; $msg .= $tx3 . ']'; } ###tolog ($msg . "\n"); $lnbits[$icnt - 1] = $tx5; # put back first split ###splice (@lnbits, $i2, 0, $c3); ###if (length($tx3)) { ### splice (@lnbits, ($i2+1), 0, $tx3); ###} splice (@lnbits, $icnt, 0, @lnadd); # insert 1 or 2 new items splice (@spbits, $icnt, 0, @spadd); # insert 1 or 2 new items ##splice (@lnbits, ($i2 - 1), 1, @lnadd); # INSERT into array at this pos $cnt = @lnbits; ### ADJUST COUNT ITERATOR $ichg++; $ichg1 = 5; } } elsif ( $pos1 == 0 ) { $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if any if (length($tx3)) { $msg .= " sP-[$c3][$tx3](c=$c3)"; ### @lnadd = ($c3, $tx3); # put in slurp ### if (($c3 ne ':') && ($c3 ne '=') && ($c3 ne '|')) $i = 1; ### set to slpit if ( xceptchr($c3) ) { $msg .= ' *SPLIT EXCEPTED CHR*'; $i = 0; # kill split } elsif ($c3 eq substr ($tx3, 0, 1)) { if ($ln > 2) { $tx6 = substr ($tx2, 2); ### slurp balance if (substr ($tx6,0,1) eq $c3) { ### zeek, we have three ... $msg .= ' *SPLIT EXCEPTED* X3'; $i = 0; # kill split??? } else { ### setup for split $c3 .= $c3; $tx3 = $tx6; $msg .= " Sp+[$c3][$tx3]"; $i = 2; # set split } } else { ### length == 2 if ((ispunctuat($c3))&& (ispunctuat($c3.$tx3))){ ### but is it ispunctuat - NO split $msg .= ' =EXCEPTED= punctuation'; $i = 0; } } } else { if ( ispunctuat( $c3 . substr ($tx3, 0, 1) ) ) { $msg .= ' =EXCEPTED= punc'; $i = 0; } else { $msg .= 'ok'; $i = 1; } } if ($i) { $lnbits[$icnt - 1] = $c3; # put back first split splice (@lnbits, $icnt, 0, $tx3); splice (@spbits, $icnt, 0, '' ); # and a NON-SPACE $ichg++; $ichg1 = 6; $cnt = @lnbits; ### ADJUST COUNT ITERATOR $msg .= " DONE SPLIT [$c3][$tx3]"; } } } else { ### last; die "ERROR: Unresolved POSITION - can not happen ...\n"; } } # process $tx3 } ######################################### ###if ($c && ! xceptchr($c) ) { if ($ichg1) { $msg .= " *CHG2* #[$ichg1]"; } else { $msg .= ' *NC* '; } if ( isresword ($tx2) ) { ### exists $HResWds{$tx2} $msg .= ' *B*'; ### blue('R'); $i3++; } if ( isbinfun($tx2) ) { ## exists $HBFuncs{$tx2} $msg .= ' *P*'; $i3++; } if ( $ln < 4 ) { ### tolog ( "*PUNC* CHECK [" . $tx2 . "]\n" ); if ( ispunctuat ( $tx2 ) ) { $msg .= ' *PUNC*'; } } ######################################### } ### tolog ($msg . "\n"); $msg .= "\n"; push(@logmsgs,$msg); } # for array list of line components ########### END parse run two END ######################## $nct = @lnbits; if ($cnt != $nct) { die "***FIX a COUNT UPDATE $cnt $nct $cntorg ????\n"; } if ($cntorg == $nct) { $msg = ("} end comps $cntorg\n"); } else { $msg = ("} end comps $cntorg, adj. $nct " . ($cntorg - $nct) . "\n"); } push(@logmsgs,$msg); if ($run1chg || $ichg || $verb2) { tolog ( "Run 2 Made " . $ichg . " changes in line - CHECK CHANGE\n" ); foreach $msg (@logmsgs) { tolog($msg); } } else { ### no change if ($verb2) { tolog ("Run 2 - No change\n"); } } ##@parsebits = @lnbits; ## copy to modified copy, ##@colorbits = @lnbits; ## create two arrays parse_it(); ## set the STYLE functions tolog ("########### output run ###############################\n") if $verb2; ### tolog ("{{ $nct"); @logmsgs = (); $msg = ("{{ $nct OUTPUT RUN ..."); push(@logmsgs,$msg); ### prepare for HTML output ########################### $tx3 = ''; # clear FRONTEND output $c1 = substr ($tx, 0, 1); # get and keep first char ### $tx3 = $txsp; # get the FRONTEND SPACE if (($c1 eq ' ') || ($c1 eq "\t")) { die "ERROR: Missed a case above ...\n" if ! $gotfes; # MISSED FRONTEND SPACE ### $tx3 .= ' '; # add last space back $tx3 = htmlise($txsp); if ($colorON) { $tx3 = white($tx3); } ## $tx3 = ' '; ## $tx3 = htmlise($txsp); # space to HTML if ($verb2) { $msg = "\nSpace=[\n"; $msg .= $txsp; $msg .= "]\n["; $msg .= $tx3; $msg .= ']'; tolog ($msg . "\n"); } } else { die "ERROR: Missed a case above ...\n" if $gotfes; # MISS FRONTEND SPACE } ############################################# $i3 = 0; # init COUNTER $icnt = 0; $i = 0; $ln = 0; do_line_reset (); foreach $tx2 (@lnbits) { # process for OUTPUT my $txsp2 = $spbits[$i3]; my $txspl = length ($txsp2); ### we have @copybits = @lnbits; ### take a SNAP of this QUOTE ONLY EXPANSION ### my $addspace1 = 1; ### 0 returns to original spacing (1 = 1 space for each) if ($i3) { # was (length($tx3)) { ### this should REMEMBER the original 'line-spacing', and re-apply it now $tx6 = substr ($tx6, $ln); ### get next line 'bit' ### note, no actual CHECK that they are the EQUAL!!! ### if ($msg eq $tx2) { ### should work also ... if (length($tx6)) { $nct = 0; ### no SPACE addition yet if ($addspace1) { ### DIAGNOSTIC ADDITION OF A SPACE ### ###$tx3 .= ' '; # add back 'space' between LINE components/bits ###$tx3 .= white(' '); ### add a space, with style $tx3 .= color5(' '); ### add a space, with style } } else { $icnt++; ### bump to NEXT $tx6 = $copybits[$icnt]; ### get the 'copy', for 'formatting' $i = length($tx6); ## len of COPY $c1 = substr ($tx6, 0, 1); ### and first char $nct = 1; ### add back SPACE, per original file } if ($nct) { ###$tx3 .= white(' '); # add back 'space' between LINE components ###$tx3 .= ' '; # add back 'space' between LINE components/bits if ($txspl) { $tx3 .= white($txsp2); } elsif ($addspace1) { $tx3 .= color5(' '); # add back 'space' between LINE components/bits } } } else { ## first, so no space added = START 'spacer' $tx6 = $copybits[$icnt]; ### get the 'copy', for 'formatting' $i = length($tx6); ## len of COPY $c1 = substr ($tx6, 0, 1); ### and first char } $ln = length($tx2); # length this line 'bit' $c = substr ($tx2, 0, 1); # get FIRST CHAR $msg = $tx2; # get copy of the line $tx5 = htmlise($msg); # make it HTML form ### $func2->($tx2); ### service the parser ### ### $parsebits[$i3]->($tx2); if ($colorON) { ###$msg = $func->($tx5); ### get some STYLE, for HTML'ised form of text $msg = $colorbits[$i3]->($tx5); ## = $func; $tx3 .= $msg; } else { $msg = $tx5; ### get some STYLE, for HTML'ised form of text $tx3 .= $msg; } ###tolog (' [' . $msg . ']'); ###tolog (' [' . $tx2 . ']'); $msg = (' [' . $tx2 . ']'); push(@logmsgs,$msg); $i3++; ## count a line item $msg = $tx2; ### keep LAST line 'bit' ... } ### loop while line 'bits' ##### done line output ##### ### tolog ("}}\n"); $msg = ("}}\n"); push(@logmsgs,$msg); foreach $msg (@logmsgs) { tolog($msg); } ### $tx3 .= "<br>\n"; ### tolog ($tx3); ### prt ($tx3); ####################################################### } ### comment line summarily dealt with ... return $tx3; # return prepared line of HTML } sub parse_it { my $tx2; my $i3; my ($ln, $c); my $func; my $func2; ###@parsebits = @lnbits; ## copy to modified copy, ###@colorbits = @lnbits; ## create two arrays #### with 'print [?] << TOKEN or "TOKEN" ... TOKEN mark as " or ' quote text case ... $i3 = 0; my $sz = @lnbits; ### get LENGTH of line-bits foreach $tx2 (@lnbits) { # process for OUTPUT $ln = length($tx2); # length this line 'bit' $c = substr ($tx2, 0, 1); # get FIRST CHAR if ($c eq '#') { # comment component - should be to end-of-line ... $func = \&orange; $func2 = \&add_ucomment; } elsif ($c eq "'") { ## "' # does it start with quotes DOUBLE or SINGLE $func = \&green; $func2 = \&add_usingleq; } elsif ($c eq '"') { $func = \&color3; $func2 = \&add_udoubleq; } elsif ($c eq '$') { # start of scalar $func = \&color1; $func2 = \&add_uscalar; } elsif ($c eq '@') { # start of array $func = \&match; $func2 = \&add_uarray; } elsif ($c eq '%') { # start of hash $func = \&peach; $func2 = \&add_uhash; } elsif ( isresword ($tx2) ) { ### exists $HResWds{$tx2} $func = \&blue; $func2 = \&add_uresword; } elsif ( isbinfun ($tx2) ) { ### exists $HBFuncs{$tx2} $func = \&color2; $func2 = \&add_ubfuncs; } else { $func = \&white; # set default, white $func2 = \&add_udefault; if ($ln < 4) { # if it is a short 'bit' of the line if ( ispunctuat ($tx2) ) { # check if punc $func = \&grey; # yup, switch to grey $func2 = \&add_upunc; } } } $parsebits[$i3] = $func2; $colorbits[$i3] = $func; $func2->($tx2); ### service the parser ### ###if ($colorON) { ### $msg = $func->($tx2); ### get some STYLE, for HTML'ised form of text ###} ### post primary parse 'corrections' ### my @actpuncs = (); ### stack of punctuation $func = \&color3; my $ssz = @actpuncs; ### my $acttoken = ''; ### print [] << TOKEN ### my $inprttok = 0; ### processing a print token if ($inprttok) { ### NO PARSING of this data, except scalars ... $colorbits[$i3] = $func; ### SET NEW COLOR FUNCTION if (($tx2 eq $acttoken) && ($sz == 1)) { ### line-bit count is 1 $inprttok = 0; # if this first-and-only line-bit eq $acttoken, tolog ("CLOSED PRINT punct = $ssz ... $acttoken ...\n"); $acttoken = ''; # KILL any active TOKEN } } elsif ($tx2 eq ';') { ### at end of PROGRAM statement, unless in REGEX!!! *TBD* if ($actfunc eq 'print') { ## actioning a PRINT ## my $ssz = @actpuncs; if ($ssz > 1) { if ($actpuncs[($ssz - 2)] eq '<<') { ## ok, previous line-bit has to be the TOKEN string $acttoken = $lnbits[$i3 - 1]; $acttoken =~ s/\"//g; ### dish the quotes, if any ... tolog ("GOT PRINT punct = $ssz ... $acttoken ...\n"); $inprttok = 1; $colorbits[$i3 - 1] = $func; ### SET NEW COLOR FUNCTION } } } tolog ("Active Reserved Word = [$actresword] ... \n") if $verb2; ### tolog ("Active Double Quote = [$actdoubleq] ... \n"); if ($actresword eq 'require') # %HResWdFnd { $actifile = $actdoubleq; $actifile =~ s/"//g; ### my $actdoubleq = ''; my $fl = $actifile; if ( -f $fl) { push (@incfiles, $fl); # stack of include files, if any tolog ("STACKED include file [$fl]\n"); } else { tolog ("STACK FAILED include file [$fl]\n"); } } @actpuncs = (); ### clear punctuation stack, on ';' char ... } $i3++; } } ### bug the code line '$txt =~ s/"/"/g; # sub double quotes' did not produce ### the required HTML of '$txt =~ s/"/&quot;/g; # sub double quotes' sub htmlise { my ($txt) = @_; my $htmsps = 0; my $htmnbs = ''; # convert to HTML $txt =~ s/&/&/g; # substitute any '&' with '&' string ... $txt =~ s/\t/$tab_stg /g; # substitute TAB characters $txt =~ s/"/"/g; # sub double quotes $txt =~ s/\</</g; # sub less than tag beginning $txt =~ s/\>/>/g; # and html/xml tag ending my $ln = length($txt); # get the final length if (substr ($txt, 0, 1) eq ' ') { # if starts with a space $htmnbs = ' '; for ($htmsps = 1; $htmsps < $ln; $htmsps++) { if (substr ($txt, $htmsps, 1) ne ' ') { last; } $htmnbs .= ' ' if $htmsps > 1; } $htmsps-- if $htmsps > 1; # back off last space, if more than 1 tolog ("Replacing $htmsps with [$htmnbs] ...\n") if $verb2; $txt =~ s/ {$htmsps}/$htmnbs/; # replace (N) spaces with ' x N if ($verb2) { my (@vals) = split; while (@vals) { my ($vc) = shift (@vals); tolog ("[$vc] "); } tolog ("\n"); } } # if it was space beginning return $txt; } ### note : Regular Expressions ### Each character matches itself, unless it is one of the ### special characters + ? . * ^ $ ( ) [ ] { } | \. ### The special meaning of these characters can be escaped using a \. my $regexspecs = "+?.*^$()[]{}|\\"; ## my $regexspecs = "^$\\"; ## my $DELIMITER = '-/=~!&<>:;,'; ## my $DELIMITER = '(){}[]-+*/=~!&|<>?:;.,'; sub is_regex_spl { my ($tx) = @_; my $c; my $mx = length($regexspecs); ### = '(){}[]-+*/=~!&|<>?:;.,'; my @ar = split (//, $regexspecs); foreach $c (@ar) { if ($tx eq $c) { return $c; } } return 0; } sub gotdelim { my ($tx) = @_; my $c; my $mx = length($DELIMITER); ### = '(){}[]-+*/=~!&|<>?:;.,'; ### my @DelimList = split (//, $DELIMITER); ### form a list ### my @ar = split (//, $DELIMITER); my $i = 0; #### tolog ("gotdelim: [$tx] Searching ...\n"); #### foreach $c (@ar) { foreach $c (@DelimList) { my $ts = '\\'; $ts .= $c; if ($tx =~ /$ts/) { ## does this char EXIST in string if (substr($tx,0,1) ne $c) { ### if NOT first char my $ps = index ($tx, $c); ### get index of char if ($ps > 1) { ## 0 means it is second char, but first delim ### EEK not $t2 = substr ($tx, 0, ($ps - 1)); ;=(( my $t2 = substr ($tx, 0, $ps); # up to, excluding delim my $cc = gotdelim ($t2); if ($cc) { ### tolog (" *MISSED SPLIT* [$t2]has[$cc]nd[$c] "); #### tolog ("gotdelim($i): [$tx] Returning [$cc], in place of [$c], pos=$ps\n"); return $cc; ### return SHORTEST, closest to front, split character } } } #### tolog ("gotdelim($i): [$tx] Returning [$c] ...\n"); return $c; } $i++; } #### tolog ("gotdelim($i): [$tx] NONE ...\n"); return 0; } ###my $actpunc = ''; ### store the active punctuation ###my %HPuncsFnd = (); # hash of Punctuation FOUND in parse ###my $actresword = ''; ###my %HResWdFnd = (); ###my $actfunc = ''; ### store the active built-in functions ###my %HFuncsFnd = (); ### my %HPuncsFnd = (); # hash of Punctuation FOUND in parse ### case of the first CHARACTER - established TYPE of this line bit ##if ($c eq '#') { # comment component - should be to end-of-line ... ## $func = \&orange; sub add_ucomment { my ($cp) = @_; $actcomment = $cp; } ##} elsif ($c eq "'") { ## "' # does it start with quotes DOUBLE or SINGLE ## $func = \&green; sub add_usingleq { my ($cp) = @_; $actsingleq = $cp; } ## } elsif ($c eq '"') { ## $func = \&color3; sub add_udoubleq { my ($cp) = @_; $actdoubleq = $cp; tolog ("Active DOUBLE QUOTE = [$actdoubleq]\n") if $verb2; } ##} elsif ($c eq '$') { ## # start of scalar ## $func = \&color1; ### my %HScalarFnd = (); sub add_uscalar { my ($cp) = @_; if ( exists $HScalarFnd{$cp} ) { $HScalarFnd{$cp}++; # another count $actscalar = $cp; } else { $HScalarFnd{$cp} = 1; # set FOUND 1 $actscalar = $cp; return 1; } return 0; } ## } elsif ($c eq '@') { ## # start of array ## $func = \&match; ### my %HArrayFnd = (); sub add_uarray { my ($cp) = @_; if ( exists $HArrayFnd{$cp} ) { $HArrayFnd{$cp}++; # another count $actarray = $cp; } else { $HArrayFnd{$cp} = 1; # set FOUND 1 $actarray = $cp; return 1; } return 0; } ## } elsif ($c eq '%') { ## # start of hash ## $func = \&peach; ### my %HHashFnd = (); sub add_uhash { my ($cp) = @_; if ( exists $HHashFnd{$cp} ) { $HHashFnd{$cp}++; # another count $acthash = $cp; } else { $HHashFnd{$cp} = 1; # set FOUND 1 $acthash = $cp; return 1; } return 0; } ## } elsif ( isresword ($tx2) ) { ### exists $HResWds{$tx2} ## $func = \&blue; sub add_uresword { my ($rw) = @_; if (exists $HResWdFnd{$rw}) { $HResWdFnd{$rw}++; # another count } else { $HResWdFnd{$rw} = 1; # start count } $actresword = $rw; } ## } elsif ( isbinfun ($tx2) ) { ### exists $HBFuncs{$tx2} ## $func = \&color2; ### see seq print $fh <<EOF; and mark as "..." data until EOF sub add_ubfuncs { my ($rw) = @_; if (exists $HFuncsFnd{$rw}) { ### tolog ( "Bumped Funcs $rw ...\n" ); $HFuncsFnd{$rw}++; # another count } else { ### tolog ( "Created Funcs $rw ...\n" ); $HFuncsFnd{$rw} = 1; # start count } $actfunc = $rw; } ## } else { ## $func = \&white; # set default, white sub add_udefault { } ## if ($ln < 4) { # if it is a short 'bit' of the line ## if ( ispunctuat ($tx2) ) { # check if punc ## $func = \&grey; # yup, switch to grey sub add_upunc { my ($cp) = @_; if ( exists $HPuncsFnd{$cp} ) { $HPuncsFnd{$cp}++; # another count } else { $HPuncsFnd{$cp} = 1; # set FOUND 1 } $actpunc = $cp; ### store the active punctuation push(@actpuncs,$cp); ### stack of punctuation } sub isbracechr { my ($cp) = @_; foreach my $cc (@PPairs) { if ($cc eq $cp) { $actbrace = $cp; ### store the active punctuation return 1; } } return 0; } sub ispunctuat { my ($cp) = @_; foreach my $cc (@PPunct) { ###tolog ("Comaring [$cc] with [$cp]...\n"); if ($cc eq $cp) { $actpunc = $cp; ### store the active punctuation return 1; } } if ( isbracechr($cp) ) { $actpunc2 = $cp; ### store the active punctuation return 2; } return 0; } sub isresword { my ($rw) = @_; if ( exists $HResWds{$rw} ) { $actresword = $rw; return 1; } return 0; } sub isbinfun { my ($rw) = @_; if ( exists $HBFuncs{$rw} ) { $actfunc = $rw; return 1; } return 0; } sub do_PARSE_reset { my $k; $actfunc = ''; $actresword = ''; $actpunc = ''; } sub do_line_reset { # WHAT TO RESET EACH LINE??? } ## if ($c eq '#') { # comment component - should be to end-of-line ... ## $func = \&orange; ## $func2 = \&add_ucomment; ## } elsif ($c eq "'") { ## "' # does it start with quotes DOUBLE or SINGLE ## $func = \&green; ## $func2 = \&add_usingleq; ## } elsif ($c eq '"') { ## $func = \&color3; ## $func2 = \&add_udoubleq; ## } elsif ($c eq '$') { ## # start of scalar ## $func = \&color1; ## $func2 = \&add_uscalar; ## } elsif ($c eq '@') { ## # start of array ## $func = \&match; ## $func2 = \&add_uarray; ## } elsif ($c eq '%') { ## # start of hash ## $func = \&peach; ## $func2 = \&add_uhash; ## } elsif ( isresword ($tx2) ) { ### exists $HResWds{$tx2} ## $func = \&blue; ## $func2 = \&add_uresword; ## } elsif ( isbinfun ($tx2) ) { ### exists $HBFuncs{$tx2} ## $func = \&color2; ## $func2 = \&add_ubfuncs; ## } else { ## $func = \&white; # set default, white ## $func2 = \&add_udefault; ## if ($ln < 4) { # if it is a short 'bit' of the line ## if ( ispunctuat ($tx2) ) { # check if punc ## $func = \&grey; # yup, switch to grey ## $func2 = \&add_upunc; ## } ## } ## } sub get_parse_stats { my $ms = "<p>Parse stats<br>\n"; my ($key, $value); my $k; my $i = 0; my $at; my $fu; ### $ms .= "<p>\n"; ## ========================================== $at = %HResWdFnd; $fu = \&blue; $ms .= '<table border=1><tr>'; $ms .= '<td>'; $ms .= $fu->('Reserved Words') . "<br>\n"; $ms .= '<table border="1">'; $i = 0; $ms .= "<tr><th>#</th><th>" . $fu->('ResWd') . "</th><th>Count</th></tr>\n"; foreach $key (keys %HResWdFnd) { ###foreach $key (keys %$at) { $i++; $ms .= '<tr>'; $ms .= '<td>'; $ms .= "$i"; $ms .= '</td>'; $ms .= '<td>'; $ms .= $fu->($key); ## "$key"; $ms .= '</td>'; $ms .= '<td>'; $ms .= $HResWdFnd{$key}; ###$ms .= "$$at{$key}"; $ms .= '</td>'; $ms .= '</tr>'; $ms .= "\n"; } $ms .= '</table>'; $ms .= "List of $i used reserve words ...<br> <br>\n"; $ms .= '</td>'; ## ========================================== ## ========================================== $ms .= '<td>'; $i = 0; $fu = \&color2; $ms .= $fu->('Built-in Functions') . "<br>\n"; $ms .= '<table border="1">'; $ms .= "<tr><th>#</th><th>" . $fu->('Funcs') . "</th><th>Count</th></tr>\n"; foreach $key (keys %HFuncsFnd) { $i++; $ms .= '<tr>'; $ms .= '<td>'; $ms .= "$i"; $ms .= '</td>'; $ms .= '<td>'; $ms .= $fu->($key); $ms .= '</td>'; $ms .= '<td>'; $ms .= $HFuncsFnd{$key}; $ms .= '</td>'; $ms .= '</tr>'; $ms .= "\n"; } $ms .= '</table>'; $ms .= "List of $i used built-in function words ...<br> <br>\n"; $ms .= '</td>'; ## ========================================== ## ========================================== $ms .= '<td>'; $i = 0; $fu = \&grey; $ms .= $fu->('Punctuation Used') . "<br>\n"; ### if ( exists $HPuncsFnd{$cp} ) { $ms .= '<table border="1">'; $ms .= "<tr><th>#</th><th>" . $fu->('Puncuat') . "</th><th>Count</th></tr>\n"; foreach $key (keys %HPuncsFnd) { $i++; $ms .= '<tr>'; $ms .= '<td>'; $ms .= "$i"; $ms .= '</td>'; $ms .= '<td>'; $ms .= $fu->(htmlise($key)); $ms .= '</td>'; $ms .= '<td>'; $ms .= $HPuncsFnd{$key}; $ms .= '</td>'; $ms .= '</tr>'; $ms .= "\n"; } $ms .= '</table>'; $ms .= "List of $i used punctuation ...<br> <br>\n"; $ms .= '</td>'; ## ========================================== ## ========================================== ### my %HArrayFnd = (); $ms .= '<td>'; $i = 0; $fu = \&match; $ms .= $fu->('Arrays') . "<br>\n"; $ms .= '<table border="1">'; $ms .= "<tr><th>#</th><th>" . $fu->('U.Arrays') . "</th><th>Count</th></tr>\n"; foreach $key (keys %HArrayFnd) { $i++; $value = $HArrayFnd{$key}; if ($value < 2) { ### $value = "<font color='red'>$value</font>"; $value = "<tt class='color1'>$value</tt>"; $key = "<tt class='color1'>$key</tt>"; } else { $key = $fu->($key); } $ms .= "<tr><td>$i</td><td>$key</td><td>$value</td></tr>\n"; } $ms .= '</table>'; $ms .= "List of $i user arrays ...<br> <br>\n"; $ms .= '</td>'; ## ========================================== ## ========================================== ### my %HHashFnd = (); $ms .= '<td>'; $i = 0; $fu = \&peach; $ms .= $fu->('Hash') . "<br>\n"; $ms .= '<table border="1">'; $ms .= "<tr><th>#</th><th>" . $fu->('U.Hash') . "</th><th>Count</th></tr>\n"; foreach $key (keys %HHashFnd) { $i++; $value = $HHashFnd{$key}; if ($value < 2) { ### $value = "<font color='red'>$value</font>"; $value = color1($value); ### "<tt class='color1'>$value</tt>"; $key = color1($key); ### "<tt class='color1'>$key</tt>"; } else { $key = $fu->($key); } $ms .= "<tr><td>$i</td><td>$key</td><td>$value</td></tr>\n"; } $ms .= '</table>'; $ms .= "List of $i user hash (associative arrays) ...<br> <br>\n"; $ms .= '</td>'; ## ========================================== ## ========================================== $ms .= '<td>'; ### my %HScalarFnd = (); $i = 0; $fu = \&color1; $ms .= $fu->('Scalar') . "<br>\n"; $ms .= '<table border="1">'; $ms .= "<tr><th>#</th><th>". $fu->('U.Scalar')."</th><th>Count</th></tr>\n"; foreach $key (keys %HScalarFnd) { $i++; $value = $HScalarFnd{$key}; if ($value < 2) { ### $value = "<font color='red'>$value</font>"; $value = orange($value); $key = orange($key); } else { $key = $fu->($key); } $ms .= "<tr><td>$i</td><td>$key</td><td>$value</td></tr>\n"; } $ms .= '</table>'; $ms .= "List of $i user scalars ...<br> <br>\n"; $ms .= '</td>'; ## ========================================== $ms .= "</tr>\n</table>\n"; $ms .= "</p>\n"; return $ms; } sub showarrcnts { my $i = @PPunct; tolog ("PPunct array count = $i\n"); $i = @PPairs; tolog ("PPairs array count = $i\n"); $i = @DolVars; tolog ("DolVars array count = $i\n"); $i = @PBPunc; tolog ("PBPunc array count = $i\n"); } sub get_line_num { my ($lnn) = @_; while (length($lnn) < 4) { $lnn = '0' . $lnn; } return $lnn; } ############################################################################# # process a perl file, adding 'style' to the code, line by line, mostly ... # File has been slurped into @lines (public) array ... # sub do_the_table { prt ("<p>File = [$infile]<br>\n"); add_html_table($OF); ### like <table align="center" width="90%" border="2" bgcolor="#eeeeee"> <tr> <td> if (! $addlinenums) { prt ("<tr>\n"); prt ("<td>\n"); } ### process LINE by LINE - but perhaps there should be states carried over # how to establish these states - particularly catch things like # s/"/"/g !!! foreach $line (@lines) { $txt = $line; chomp $txt; $countlines++; $actlnnum = get_line_num ($countlines); ## if ($addlinenums) { tolog ("\nLine $actlnnum:[$txt]\n"); ## } my $istx = 1; # assume text if ($txt =~ /$WHITE_PATTERN2/o ) { $istx = 0; # NOT text } else { $istx = 1; # have TEXT to deal with } if ( $istx ) { if ($dbgon) { tolog ("Simple WHITE-ised to HTML file ...\n") if $verb2; prt (htmlise($txt)); # just for COMPARISON } ###do_line_parse ($line); tolog ("Per line component parsing to HTML file ...\n") if $verb2; ###do_line_parse ($actlnnum . ' ' . $line); $txhtml = do_line_parse ($line); } else { ## if (! $istx) { tolog ("Simple WHITE-ised to HTML file ...\n") if $verb2; $txhtml = " "; # set no line } ### prt ($txt); # print this HTML line $txhtml .= "<br>\n"; if ($addlinenums) { prt (" <tr>\n"); prt (" <td>\n"); prt ($countlines); prt (" </td><td>\n"); prt ($txhtml); # print this HTML line prt (" </td>\n"); prt (" </tr>\n"); } else { prt ($txhtml); # print this HTML line } tolog ("\nLine $actlnnum:[" . join ('|', split (' ', $txt)) . "]\n"); } ### prt ("</p>\n"); if (! $addlinenums) { prt ("</td>\n"); prt ("</tr>\n"); } prt ("</table></p>"); } ############################################################################# sub add_include_tables { ### my @incfiles = (); # stack of include files, if any tolog ("Processing " . scalar @incfiles . " required files ...\n"); foreach $file (@incfiles) { if ( -f $file) { $infile = $file; tolog ("Opening $infile ...\n"); if (open $IF, "<$infile") { tolog ("Loading $infile ...\n"); @lines = <$IF>; # slurp whole file, to an array of lines close($IF); $lncnt = @lines; # get count tolog ("Processing $infile ... $lncnt lines\n"); do_the_table(); } else { tolog ("FAILED! no locate, open of $infile ...\n"); } } else { tolog ("FAILED! no locate, open of $file ...\n"); } } } # end add_include_tables = in @incfiles collected in parse ################################# ### FONT-FAMILY: 'Andale Mono', 'Lucida Console', monospace ### FONT-FAMILY: 'Courier New'; sub add_html_style { my ($fh) = @_; print $fh <<"EOF1"; <style><!-- TT { FONT-FAMILY: 'Andale Mono', 'Lucida Console', monospace } EOF1 ################## ###my @TTset = qw( match #0066ff #e8f4ff ... ); my $nm; my $bd; my $bg; my $mx = @TTset; #### my $ss = 3; tolog ("Processing $mx / 3 styles ...\n"); tolog ( @TTset . "\n" ); my $i; ## my $additem = \&addTTitem_bkgrd; ## my $additem = \&addTTitem_full; ## my $add_item = \&addTTitem_simp; ## ??while (($nm, $bd, $bg) = @TTset) { for ($i = 0; $i < ($mx / $ss); $i++) { $nm = $TTset[($i*$ss)+0]; $bd = $TTset[($i*$ss)+1]; $bg = $TTset[($i*$ss)+2]; ##addTTitem_full ($fh, $nm, $bd, $bg); ##addTTitem_bkgrd($fh, $nm, $bd, $bg); addTTitem_bkgrd2 ($fh, $nm, $bd, $bg); ##addTTitem_simp ($fh, $nm, $bd, $bg); } ################### print $fh <<"EOF2"; --> </style> EOF2 ### add_body_style ($fh); ### add little to the above .. } ### end of sub ######################### ### EOF