#!/perl -w
### #################################################
### p2html - perl code to HTML document format
### Works, mostly - still a SPACE-REPLACEMENT problem ...
### Geoff - geoffmclane.com - geoffmclane@hotmail.com
### ##################################################
use strict;
use warnings;
require "colours.pl" ;
require "eppearl.pl" ;
### global variables
my $vers = '0.0.8' ; # eighth iteration ... LOOKS GOOD ... still space replacement, regex, line no, in src ...
### regex is now NOT expanded, but only by exceptchr of '/', so still some problems ...
### space is not 'exactly' maintained ... should try not to parse inside a word array ...
my $WHITE_PATTERN2 = "^[ \t\n\r]*\$"; # spacey if ($var =~ /$WHITE_PATTERN2/o ) { ...}
my $tab_stg = ' ' ; # replace tabs, with 3 spaces
my $verb2 = 0;
my $verb3 = 0; ### add perl.stx parsing diag log
my $dbgon = 0; # 1 DOUBLES HTML OUTPUT FOR COMPARISON
my $perlstx = 'C:/Program Files/EditPlus 2/perl.stx';
my $DELIMITER = '(){}[]-+*/=~!&|<>?:;.,' ;
my $logfil = 'templog.txt' ;
my @logmsgs = ();
my ($OF, $IF, $LF, $STX);
my $colorON = 1;
my $name;
my $lc = 0;
my $dnpara = 1;
my @lnbits;
my @spbits;
my @copybits; ## keep, for ORIGINAL space work 'replacement'
my $chk;
my $istxt = 1;
my $actpunc = ''; ### store the active punctuation
my %HPuncsFnd = (); # hash of Punctuation FOUND in parse
my $expanOFF = 0; ### stop expansion temporarily ...
my $actresword = '';
my %HResWdFnd = ();
my $actfunc = ''; ### store the active built-in functions
my %HFuncsFnd = ();
my $actlnnum = '';
our @PPairs;
our @DolVars;
our @PBPunc;
our @TTset;
our @PPunct;
###require "colours.pl";
###require "eppearl.pl";
### start of program
####################
### Get command line input ...
my $infile = shift || '.' ;
my $outfil = shift || 'tempout.htm' ;
## my $func;
my @TTColrs = qw(l.blue brown l.br s.green pink mauve b.green l.brn blue white l.grey);
my @TTTypes = qw(array comment unass s-quote scalar functions d-quote hash reserved other punctuation);
my @TTAttrib = qw(match orange regex green color1 color2 color3 peach blue white grey);
for $name (@TTAttrib) {
no strict 'refs' ; # allow symbol table manipulation
*$name = *{uc $name} = sub { "<TT class='$name'>@_</TT>" };
### *$name = *{uc $name} = sub { "<TT class='$name'>\n@_\n</TT>" };
}
###my @colors = qw(red blue green yellow orange purple violet);
my @colors = qw(red yellow purple violet);
for $name (@colors) {
no strict 'refs' ; # allow symbol table manipulation
*$name = *{uc $name} = sub { "<FONT COLOR='$name'>@_</FONT>" };
}
my $ss = 5;
##our @TTset;
##our @PPunct;
##require "colours.pl";
##require "eppearl.pl";
my $msg = '';
my ($line, $txt);
my $i = 0;
my ($cnt1, $cnt2);
my $inbraces = 0;
my $c;
my $c3;
if ($infile eq '.' ) {
die "No input file given ...\n";
}
open $LF, ">$logfil" or die "Can NOT open LOG file $logfil!\n";
tolog ( "$0 Started " . localtime(time()) . " ...\n");
if (! -f $infile) {
die "Input file [$infile] NOT FOUND! ...\n";
}
tolog ( "Opening $infile ...\n");
open $IF, "<$infile" or die "Can not OPEN $infile!\n";
tolog ( "Loading $infile ...\n");
my @lines = <$IF>; # slurp whole file, to an array of lines
close($IF);
open $OF, ">$outfil" or die "Can not create $outfil!\n";
###### pre-process perl.stx file ######################################
open $STX, "<$perlstx" or die "Can NOT locate $perlstx file...\n";
my @stx = <$STX>;
close($STX);
$i = @stx;
tolog ( "List of $i STX file lines...\n");
my %stxh;
my @ResWds = ();
my @BFuncs = ();
my %HResWds;
my %HBFuncs;
my $sw = 0; # no switch on
foreach $line (@stx) {
chomp $line;
my $ll = length($line); # get LENGTH of file line
my @a;
my $k;
my $v;
$c = substr ($line, 0, 1);
$msg = '';
if ($c eq ';' ) { # comment
$msg = 'comment only';
} elsif ($c eq '#' ) { # hash item=value
$msg = ' hash';
@a = split( '=' , $line); # get key/value
($k, $v) = @a;
$k = substr($k, 1);
###$stxh{$a[0]} = $a[1];
if ( exists $stxh{$k} ) {
if ($stxh{$k} eq $v) {
$msg .= ' same ';
} else {
$msg .= ' new ';
}
$stxh{$k} .= '|' . $v;
###$v = $stxh{$k};
} else {
$stxh{$k} = $v;
}
### $msg .= ' k=' . $a[0] . ' v=' . $a[1] . '-';
###$msg .= ' k=' . $k . ' v=' . $v . ' - ';
$msg .= ' k=' . $k . ' v=' . $stxh{$k} . ' - ';
#KEYWORD=Reserved words
#KEYWORD=Built-in functions
if ($k eq 'KEYWORD' ) {
if ($v eq 'Reserved words') {
$sw = 1;
$msg .= '(ResWds)' ;
} elsif ($v eq 'Built-in functions') {
$sw = 2;
$msg .= '(BFuncs)' ;
} else {
$sw = 0;
}
}
}
if ($ll > 1) {
if ($sw == 1) {
push(@ResWds, $line);
if ( exists $HResWds{$line} ) {
die "Duplicate RESERVE WORD [$line]\n"
}
$HResWds{$line} = $line;
$msg .= " - rw+";
} elsif ($sw == 2) {
push(@BFuncs, $line);
if ( exists $HBFuncs{$line} ) {
die "Duplicate BUILT-IN FUNCTION [$line]\n"
}
$HBFuncs{$line} = $line;
$msg .= " - bf+";
}
}
tolog ($line . $msg . "\n" ) if $verb3;
}
$line = 'new' ;
if ( ! exists $HBFuncs{$line} ) {
$msg = ' ++Added';
push(@BFuncs, $line);
$HBFuncs{$line} = $line;
tolog ($line . $msg . "\n" );
}
$cnt1 = @ResWds;
$cnt2 = @BFuncs;
tolog ( "END List of $i STX file lines...rw=$cnt1 bf=$cnt2 \n");
###### end-process perl.stx file ######################################
add_html_head( $OF, $infile );
add_html_table($OF); ### like <table align="center" width="90%" border="2" bgcolor="#eeeeee"> <tr> <td>
my $lncnt = @lines; # get count
my $cntlns = 0;
tolog ( "Processing $infile ... $lncnt lines\n");
prt ( "<p>\n" );
foreach $line (@lines) {
$txt = $line;
chomp $txt;
$cntlns++;
$actlnnum = get_line_num ($cntlns);
tolog ( "\nLine $actlnnum:[$txt]\n");
$istxt = 1; # assume text
if ($txt =~ /$WHITE_PATTERN2/o ) {
$txt = "</p>\n<p>\n" ; # CLOSE paragraph, and open
$istxt = 0; # NOT text
} else {
### $txt = white(htmlise($txt));
$txt = htmlise($txt);
$txt .= "<br>\n" ;
}
if ( $istxt ) {
if ($dbgon) {
tolog ( "Simple WHITE-ised to HTML file ...\n") if $verb2;
prt ($txt); # just for COMPARISON
}
} else { ## if (! $istxt) {
tolog ( "Simple WHITE-ised to HTML file ...\n") if $verb2;
prt ($txt); # just for COMPARISON
}
if ($istxt) {
###do_line_parse ($line);
tolog ( "Per line component parsing to HTML file ...\n") if $verb2;
###do_line_parse ($actlnnum . ' ' . $line);
do_line_parse ($line);
}
}
print $OF <<EOF;
</td>
</tr>
</table>
EOF
tolog ( "Processed $lc lines of $infile ... written to $outfil ... add tail ...\n");
add_color_samp($OF);
prt ( get_parse_stats () );
add_html_tail($OF);
showarrcnts();
tolog ( "$0 Ended " . localtime(time()) . " ...\n");
close($OF);
system $outfil;
# system $logfil;
sub prt {
tolog (@_);
print $OF @_;
}
### COLOR: #00008b;
sub addTTitem_simp {
my ($fh, $nm, $bd, $bg) = @_;
print $fh << "EOF3" ;
.$nm { COLOR: $bd }
EOF3
}
sub addTTitem_vOK {
my ($fh, $nm, $bd, $bg) = @_;
print $fh << "EOF3" ;
.$nm { BACKGROUND-COLOR: $bg }
EOF3
}
sub addTTitem_full {
my ($fh, $nm, $bd, $bg) = @_;
print $fh << "EOF3" ;
.$nm
{
COLOR: $bd;
BORDER-TOP: $bd 1px solid;
BORDER-LEFT-WIDTH: 1px;
BORDER-LEFT-COLOR: $bd;
PADDING-BOTTOM: 1px;
PADDING-TOP: 1px;
BORDER-BOTTOM: $bd 1px solid;
WHITE-SPACE: nowrap;
BACKGROUND-COLOR: $bg;
BORDER-RIGHT-WIDTH: 1px;
BORDER-RIGHT-COLOR: $bd
}
EOF3
}
sub add_html_style {
my ($fh) = @_;
print $fh << "EOF1" ;
<style><!--
TT
{
FONT-FAMILY: 'Courier New';
}
EOF1
### FONT-FAMILY: 'Andale Mono', 'Lucida Console', monospace
#################################
###my @TTset = qw( match #0066ff #e8f4ff string #0000ff #ccccff );
my $nm;
my $bd;
my $bg;
my $mx = @TTset;
#### my $ss = 3;
tolog ( "Processing $mx / 3 styles ...\n");
tolog ( @TTset . "\n" );
my $i;
###my $additem = \&addTTitem_vOK;
###my $additem = \&addTTitem_full;
### my $add_item = \&addTTitem_simp;
## ??while (($nm, $bd, $bg) = @TTset) {
for ($i = 0; $i < ($mx / $ss); $i++) {
$nm = $TTset[($i*$ss)+0];
$bd = $TTset[($i*$ss)+1];
$bg = $TTset[($i*$ss)+2];
addTTitem_full ($fh, $nm, $bd, $bg);
###addTTitem ($fh, $nm, $bd, $bg);
###&add_item->($fh, $nm, $bd, $bg);
}
###################################
print $fh << "EOF2" ;
--></style>
EOF2
}
sub add_html_head {
my ($fh, $hdr) = @_;
print $fh << "EOF" ;
<html>
<!-- P26.2005.05.10 geoffmclane.com perl
HTML generated using p2html5.pl -
-->
<head>
<title>$hdr</title>
</head>
EOF
# dynamic block of style - could be put to a file ...
add_html_style($fh);
print $fh << "EOF" ;
<body>
<h1 align= "center" >$hdr</h1>
<p align= "center" ><a href= "perl.htm" >back</a></p>
EOF
}
sub add_html_table {
my ($fh) = @_;
print $OF <<EOF;
<table align= "center" width= "90%" border= "2" bgcolor= "#eeeeee" >
<tr>
<td>
EOF
}
sub add_html_tail {
my ($fh) = @_;
print $fh << "EOF" ;
<p align= "center" ><a href= "perl.htm" >back</a></p>
</body>
</html>
EOF
}
my @TypeColors_NOTUSED = (
###if ($c eq '#') { # comment component - should be to end-of-line, or more ...
"comment" , ### $func = \&orange;
###} elsif ($c eq "'") { ## "' # does it start with quotes DOUBLE or SINGLE
"s.quote" , ### $func = \&green;
### } elsif ($c eq '"') {
"d.quote" , ### $func = \&color3;
###} elsif ($c eq '$') { # start of scalar
"scalar" , ### $func = \&color1;
###} elsif ($c eq '@') { # start of array
"array" , ### $func = \&match;
###} elsif ($c eq '%') { # start of hash
"hash" , ### $func = \&peach;
###} elsif ( exists $HResWds{$tx2} ) {
"reserved" , ### $func = \&blue;
### } elsif ( exists $HBFuncs{$tx2} ) {
"functions" , ### $func = \&color2;
### } else {
"other" ### $func = \&white;}
);
sub a2f {
my ($f,$t) = @_;
print $f $t;
}
sub n_row {
###my ($f) = @_;
a2f (@_, " <tr>");
}
sub n_col {
###my ($f) = @_;
a2f (@_, " <td>");
}
sub c_row {
###my ($f) = @_;
a2f (@_, " </tr>");
}
sub c_col {
###my ($f) = @_;
a2f (@_, " </td>");
}
## my $func;
### my @TTColrs = qw(l.blue brown l.br s.green pink mauve b.green l.brn blue white l.grey);
### my @TTTypes = qw(array comment unass s-quote scalar functions d-quote hash reserved other punctuation);
### my @TTAttrib = qw(match orange regex green color1 color2 color3 peach blue white grey);
sub add_color_samp {
my ($fh) = @_;
$i = 0;
print $fh <<EOF;
<p>Colour Key :<br>Function, Description., Colour<br>
<table border= "1" bgcolor= "#eeeeee" >
EOF
### out attributes
n_row $fh; # add " <tr>\n"; # open ROW
n_col $fh; # add " <td>\n"; # open COLUMN
a2f $fh, "Style" ;
c_col $fh; # add " </td>\n"; # close COLUMN
n_col $fh; # add " <td>\n"; # open COLUMN
a2f $fh, "Description" ;
c_col $fh; # add " </td>\n"; # close COLUMN
n_col $fh; # add " <td>\n"; # open COLUMN
a2f $fh, "Colour" ;
c_col $fh; # add " </td>\n"; # close COLUMN
c_row $fh; ### " </tr>\n"; # close ROW
foreach $name (@TTAttrib) {
###no strict 'refs'; # allow symbol table manipulation
my $fun = \&$name; ## get the function - the auto-generated sub
n_row $fh; # add " <tr>\n"; # open ROW
n_col $fh; # add " <td>\n"; # open COLUMN
### a2f $fh, "Attributes";
$msg = $name;
$txt = $fun->($msg);
a2f $fh, $txt;
c_col $fh; # add " </td>\n"; # close COLUMN
n_col $fh; # add " <td>\n"; # open COLUMN
### a2f $fh, "Function";
$msg = $TTTypes[$i];
$txt = $fun->($msg);
a2f $fh, $txt;
c_col $fh; # add " </td>\n"; # close COLUMN
n_col $fh; # add " <td>\n"; # open COLUMN
### a2f $fh, "Colour"; @TTColrs
$msg = $TTColrs[$i];
$txt = $fun->($msg);
a2f $fh, $txt;
c_col $fh; # add " </td>\n"; # close COLUMN
c_row $fh; ### " </tr>\n"; # close ROW
$i++; # bump to next
}
### end if all
print $fh <<EOF;
</table>
</p>
EOF
### all done ...
}
sub tolog {
print @_;
print $LF @_;
}
sub xceptchr {
my ($chr) = @_;
###if (($chr eq ':') || ($chr eq '=') || ($chr eq '|') || ($chr eq ',')) {
if (
($chr eq '/' ) ||
($chr eq ':' ) ||
($chr eq '|' )
) {
return 1;
}
return 0;
}
sub is_a_quote {
my ($chr) = @_;
if (($chr eq '"' ) || ($chr eq "'" )) {
return 1;
}
return 0;
}
sub get_a_quote {
my ($t) = @_;
my $mx = length($t);
my $i;
if ($t =~ /[ '"]/) { # match quote
for ($i = 0; $i < $mx; $i++) {
my $chr = substr ($t, $i, 1);
if (is_a_quote($chr)) {
return $chr;
}
}
}
return 0;
}
### NOT passed an ALL-SPACEY line
sub do_line_parse {
my ($tx) = @_;
chomp $tx;
### my @copybits; ## keep, for ORIGINAL space work 'replacement'
my $tx2 = $tx;
my $tx3;
my $tx4 = htmlise($tx); ## the HTML'ISED string
my $txsp = ''; # frontend SPACEY stuff
### no way ! my $txsp = $htmnbs; # frontend SPACEY stuff
my $tx5;
my $tx6;
my $c1 = substr ($tx, 0, 1); # get and keep first char
@lnbits = split ( ' ', $tx); # initial split spaces
my $c2 = substr ($lnbits[0], 0, 1); # get POTENTIAL new first char
my $pos1 = index ($tx, $c2); # get pos of first array char, in string
my $gotfes = 0; # no frontend space
if ($pos1 > 0) {
$gotfes = 1; # mark, got frontend space
$txsp = substr($tx, 0, $pos1); # get SPACEY at FRONT
}
my $cnt = @lnbits; # count of componets, so far
my $cntorg = $cnt; # keep original SIZE, $cnt is 'adjusted' during ...
my $i = 0;
my $i3 = 0;
my @sp11;
my $nct = 0; # count AFTER array 'adjustments' ...
my $ln = length($tx2); # get length of line, not soooo important
my $ch = substr ($tx2, 0, 1); # get first char, for fast decisions
my $c = $ch; ### copy of FIRST char
### if ($lnbits[0] =~ m/^\#/) {
if ($c1 eq '#' ) {
#######################################################
# is comment
tolog ( "Is comment - try ...\n");
###$tx3 = green($tx4);
$tx3 = orange($tx4);
$tx3 .= "<br>\n" ;
prt ($tx3);
#######################################################
} else {
## does not START with a # comment char
#### tolog ("########### parse run one ###############################(c=$cnt)\n") if $verb2;
if ($verb2) {
tolog ( "########### parse run one ###############################(c=$cnt)\n");
$msg = '';
foreach $tx2 (@lnbits) {
$msg .= "[$tx2]" ;
}
$msg .= "\n" ;
tolog ($msg);
}
$i3 = 0;
my $ichg = 0; ### count of bit changes
### first run - to re-combine quoted text within LINE ARRAY
$ichg = 0;
@logmsgs = (); ### clear LOG message stack
###tolog ("{ comps $cntorg\n"); # log COUNT at start
$msg = ( "{ comps $cntorg\n"); # log COUNT at start
push(@logmsgs,$msg); ## accumulate
### this pre-run JOINS or SPLITS = ENSURE EACH QUOTED BLOCK is in its own bucket
my $icnt = 0; ### init line 'bits' counter
doparsereset ();
foreach $tx2 (@lnbits) {
$icnt++; # PRE-BUMP THE COUNT
$msg = $tx2; # set line bit
$ln = length($tx2);
$ch = substr($tx2, 0, 1);
$i = 0;
### special +?.*^$()[]{}|\
### if ($tx2 =~ /^['"]/ ) { ## "' # does it start with quotes d or s
if (($ch eq '"' )||($ch eq "'" )) {
$msg .= " Begin Q (l=$ln)[";
$msg .= $tx2;
$msg .= ']' ;
$i3 = 1; # set JOIN
if ($ln > 1) {
$i3 = 1; # set JOIN
###$tx3 = substr ($tx2, 1, $ln - 1); # get past quote
$tx3 = substr ($tx2, 1); # get past quote
if (($ln > 1) && ($tx3 =~ /$ch/)) {
$pos1 = index ($tx3, $ch); # get position of next quote
$msg .= ' and end [';
$msg .= $tx3;
$msg .= "](p=$pos1)" ;
if ($pos1 > 0) {
$tx5 = substr ($tx2, 0, ($pos1 + 1 + 1)); # get WHOLE QUOTE
$tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if ANY
if (length($tx3)) {
### error case
### "_","|", DONE WOULD SPLIT ["_"][,"|",]? b&e same quotes
$msg .= ' quote split ';
$msg .= '[' ;
$msg .= $tx5;
$msg .= ']' ;
$msg .= '[' ;
$msg .= $tx3;
$msg .= ']?' ;
$lnbits[$icnt - 1] = $tx5; # put back adjusted first
@sp11 = ($tx3); ### bit-to-insert
### if ( $tx3 =~ /$ch/ ) {
if ((length($tx3) > 1) &&
( $tx3 =~ /[ '"]/ )) {
### zeek, there are more of these ...
$i = 0;
$tx5 = '';
while(1) {
$c = substr ($tx3, $i, 1);
if (($c eq '"' )||
($c eq "'" ) ) {
last;
}
$i++; # bump to next
if ($i >= ($ln - 1)) {
$c = 0;
last;
}
}
if ($i) {
if (($c eq '"' )||($c eq "'" )) {
$tx5 = substr ($tx3, 0, $i); # get before QUOTE
$tx3 = substr ($tx3, $i ); # get balance
$sp11[0] = $tx5;
push(@sp11,$tx3);
$ichg++;
}
}
$msg .= " found [$c] split [$tx5] [$tx3]* ";
}
splice (@lnbits, $icnt, 0, @sp11); # insert 1 or more new items
### splice (@lnbits, $i2, 0, $tx3); # insert 1 new items
$cnt = @lnbits; ### ADJUST COUNT ITERATOR
$ichg++;
}
}
$msg .= " b&e same quotes";
$i3 = 0;
}
}
if ($i3) {
### JOIN, until the END OF THIS QUOTE
$i3 = 0;
$tx6 = $tx2; ### start slurping
for ($i = $icnt; $i < $cnt; $i++) {
$tx3 = $lnbits[$i]; # get next
$msg .= ( '+[' . $tx3 . ']' );
$tx6 .= ' '; # add back space
$tx6 .= $tx3; ### $lnbits[$i];
$i3++; ### count 'bits' to DELETE
$ichg++; ### count a CHANGE
if ($tx3 =~ /$ch/) {
@sp11 = ();
$msg .= '-' ;
$pos1 = index ($tx3, $ch); # get position of next quote
if ($pos1 > 0) {
$tx5 = substr ($tx3, 0, $pos1); # get BEFORE QUOTE
$tx3 = substr ($tx3, $pos1); # get ending text, if ANY
$msg .= " *CHK [$tx5] [$tx3]???\n";
if ((length($tx3) > 1) &&
( $tx3 =~ /[ '"]/ )) {
### zeek, there are more of these ...
$i = 0;
$tx5 = '';
while(1) {
$c = substr ($tx3, $i, 1);
if (($c eq '"' )||
($c eq "'" ) ) {
last;
}
$i++; # bump to next
if ($i >= ($ln - 1)) {
$c = 0;
last;
}
}
if ($i) {
if (($c eq '"' )||($c eq "'" )) {
$tx5 = substr ($tx3, 0, $i); # get before QUOTE
$tx3 = substr ($tx3, $i ); # get balance
@sp11 = ($tx5,$tx3);
$ichg++;
}
}
}
$msg .= " could split [$tx5] [$tx3]* ";
}
$msg .= " found end [$c] split ";
last; # exit when terminator found
}
}
$msg .= " *REPLACING [$tx2] with [$tx6]!";
$lnbits[$icnt - 1] = $tx6; # put back single quoted message
splice (@lnbits, $icnt, $i3); # collapse following items
$msg .= ", now joined, to its end\n";
$cnt = @lnbits; ### UPDATE THE COUNT
}
} elsif ($tx2 =~ /[ '"]/ ) { ## "' # does it CONTAIN quotes, d OR s
$c = get_a_quote($tx2);
$pos1 = index ($tx2, $c); # get position of next quote
if (($pos1 > 0) && $c) {
$msg .= " QUOTE $c split, at $pos1 ";
$tx5 = substr ($tx2, 0, $pos1); # get before QUOTE
$tx3 = substr ($tx2, $pos1 ); # get balance
### check back $msg .= "would replace [".$lnbits[$icnt - 1]."][$tx5]";
$lnbits[$icnt - 1] = $tx5; # fix this 'line-bit'
@sp11 = ($tx3); ### add this one
splice (@lnbits, $icnt, 0, @sp11); # add bucket
$msg .= ", now sep [$tx5][$tx3]";
$cnt = @lnbits; ### UPDATE THE COUNT
} else {
die "ERROR: Handler above does BITS-OF-LINE that begin with a QUOTE!!!\n";
}
} elsif ($ch eq '#' ) { # if line-bit starts with a perl comment
## join to end of line
$i3 = 0;
$tx5 = $tx2;
$tx6 = $lnbits[$icnt - 1];
for ($i = $icnt; $i < $cnt; $i++) {
$tx3 = $lnbits[$i];
$tx5 .= ' ';
$tx5 .= $tx3; ### $lnbits[$i];
$i3++;
$ichg++;
}
if ($i3) {
$msg .= ' Joined [';
$msg .= $tx6; ### = $lnbits[$icnt - 1];
$msg .= '] to [';
$msg .= $tx5;
$lnbits[$icnt - 1] = $tx5; # put back single quoted message
$msg .= '] sp ' . $icnt . ' ' . $i3;
splice (@lnbits, $icnt, $i3); # collapse following items
$msg .= " end-of-line comment";
$cnt = @lnbits;
}
} else {
## not begin quote ' or ", nor begin # ...
## dealt with on NEXT iteration of line bits - left for diagnostic only ###
$c = 0;
$tx3 = substr($tx2,1);
if (($ch eq '$' ) || ($ch eq '@' ) || ($ch eq '%' )) {
# start of a scalor, array, hash ... move on to next letter
$c = gotdelim($tx3); ### any more in this line
if ( length($tx3) && ($c) && ! xceptchr($c) ) { # got first split point, AFTER $var ...
$pos1 = index ($tx3,$c);
}
} else {
$tx3 = $tx2; ### check full line
$c3 = gotdelim($tx3);
if ( length($tx3) && ($c3) ) { # got first split point
$pos1 = index ($tx3,$c3);
} # process $tx3
}
if ($c && ! xceptchr($c) ) {
$msg .= ' *EXCEPTED* ';
$msg .= $c;
$msg .= '* ';
}
if ( isresword ($tx2) ) { ### exists $HResWds{$tx2}
$msg .= ' *B*'; ### blue('R');
}
if ( isbinfun ($tx2) ) { ## exists $HBFuncs{$tx2}
$msg .= ' *P*';
}
if ( $ln < 4 ) {
### tolog ( "*PUNC* CHECK [" . $tx2 . "]\n" );
if ( ispunctuat ( $tx2 ) ) {
###$actpunc = $tx2; ### store the active punctuation
$msg .= ' *PUNC*';
}
}
}
###tolog ($msg . "\n");
$msg .= "\n" ; # add end of line
push(@logmsgs, $msg); ### store the LOG
} # for array list of line components === ONLY DOING JOINING
$nct = @lnbits;
if ($cnt != $nct) {
die "***FIX a COUNT UPDATE $cnt $nct $cntorg ????\n";
}
if ($cntorg == $nct) {
$msg = "} end comps $cntorg\n";
} else {
$msg = ( "} end comps $cntorg, adj. $nct " . ($cntorg - $nct) . "\n" );
}
push(@logmsgs, $msg);
if ($ichg || $verb2) {
tolog ( "Run 1 made " . $ichg . " changes in line - CHECK CHANGE\n" );
foreach $msg (@logmsgs) {
tolog($msg);
}
} else {
### no change
if ($verb2) {
tolog ( "No change\n");
}
}
@copybits = @lnbits; ### take a SNAP of this QUOTE ONLY EXPANSION
### want to RETURN the line to this SPACING, if possible ###
tolog ( "########### parse run two ###############################\n") if $verb2;
#################### DO IT ALL NOW ###################
###tolog ("{ comps $nct\n"); # log COUNT at start
@logmsgs = ();
$msg = ( "{ comps $nct\n"); # log COUNT at start
push(@logmsgs,$msg); ## accumulate
$icnt = 0; ### init line 'bits' counter
$ichg = 0;
doparsereset ();
foreach $tx2 (@lnbits) {
$icnt++; # PRE-BUMP THE COUNT
$msg = $tx2; ### diag - add the bit-of-the-line to log output
$ln = length($tx2);
$ch = substr ($tx2, 0, 1);
$i = 0;
### special +?.*^$()[]{}|\
### if ($tx2 =~ /^['"]/ ) { ## "' # does it start with quotes d or s
if (($ch eq '"' )||($ch eq "'" )) {
#########################################
### $msg .= " begin quote (p2)";
$i = 1; # set JOIN
if ($ln > 1) {
$tx3 = substr ($tx2, 1, $ln - 1); # get past quote
if ( $tx3 =~ /$ch/) {
$pos1 = index ($tx3, $ch); # get position of next quote
if ($pos1 > 0) {
$tx5 = substr ($tx2, 0, ($pos1 + 1 + 1)); # get WHOLE QUOTE
$tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if ANY
if (length($tx3)) {
### error case
### "_","|", DONE WOULD SPLIT ["_"][,"|",]? b&e same quotes
$msg .= ' DONE WOULD SPLIT ';
$msg .= '[' ;
$msg .= $tx5;
$msg .= ']' ;
$msg .= '[' ;
$msg .= $tx3;
$msg .= ']?' ;
$lnbits[$icnt - 1] = $tx5; # put back adjusted first
### if ( $tx3 =~ /$ch/ ) {
if ( $tx3 =~ /[ '"]/ ) {
### zeek, there are more of these ...
$msg .= ' *MESS if , excepted ';
}
splice (@lnbits, $icnt, 0, $tx3); # insert 1 new items
$cnt = @lnbits; ### ADJUST COUNT ITERATOR
$ichg++;
}
}
$msg .= " b&e same quotes";
$i = 0;
}
}
if ($i) {
# should JOIN until the END
$i3 = 0;
for ($i = $icnt; $i < $cnt; $i++) {
$tx3 = $lnbits[$i]; # get next
$tx2 .= ' '; # add back space
$tx2 .= $tx3; ### $lnbits[$i];
$i3++;
$ichg++;
if ($tx3 =~ /$ch/) {
last; # exit when terminator found
}
}
$lnbits[$icnt - 1] = $tx2; # put back single quoted message
###splice (@lnbits, $i2, $cnt - $i2); # collapse following items
splice (@lnbits, $icnt, $i3); # collapse following items
$msg = $tx2;
$msg .= ", now joined, to its end";
$cnt = @lnbits; ### UPDATE THE COUNT
}
$i3++;
#########################################
} elsif ($ch eq '#' ) { # if starts with a comment
#########################################
## should join to end of line
$i3 = 0;
for ($i = $icnt; $i < $cnt; $i++) {
$tx3 = $lnbits[$i];
$tx2 .= ' ';
$tx2 .= $tx3; ### $lnbits[$i];
$i3++;
$ichg++;
}
$msg .= ' joined ';
$msg .= $lnbits[$icnt - 1];
$msg .= ' to ';
$msg .= $tx2;
$lnbits[$icnt - 1] = $tx2; # put back single quoted message
###splice (@lnbits, $i2, $cnt - $i2); # collapse following items
$msg .= ' sp ' . $icnt . ' ' . $i3 . '[' ;
splice (@lnbits, $icnt, $i3); # collapse following items
### $msg = $tx2;
$msg .= "], line comment";
$cnt = @lnbits;
$i3++;
#########################################
} else {
#########################################
## not begin quote ' or ", nor begin # ...
$c = 0;
$tx3 = substr($tx2,1);
if (($ch eq '$' ) || ($ch eq '@' ) || ($ch eq '%' )) {
# start of a scalor, array, hash ... move on to next
$c = gotdelim($tx3);
if ( length($tx3) && ($c) && ! xceptchr($c) ) { # got first split point, AFTER $var ...
$pos1 = index ($tx3,$c);
if ($pos1 > 0) {
$i3 = 0;
$tx5 = $ch; # put first char back
$tx5 .= substr ($tx3, 0, $pos1); # get up to CHAR
@sp11 = ($c);
$tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if any
if (length($tx3)) {
push(@sp11, $tx3); # put in slurp
if ((($c eq '(' ) && (substr($tx3,0,1) eq ')' )) ||
(($c eq '+' ) && (substr($tx3,0,1) eq '+' )) ) { # eg check *split* [$sock->accept][(][);]
$i3 = 1; # some EXCEPTIONS
}
}
if ($i3) {
$msg = '*NO* *split* [';
} else {
$msg = 'DONE *split* [';
}
$msg .= $tx5 . '][' ;
$msg .= $c . ']' ;
if (length($tx3)) {
$msg .= '[' ;
$msg .= $tx3 . ']' ;
}
$msg .= "\n" ;
push(@logmsgs,$msg);
###tolog ($msg . "\n");
if ($i3 == 0) {
$lnbits[$icnt - 1] = $tx5; # put back first split
splice (@lnbits, $icnt, 0, @sp11); # insert 1 or 2 new items
$cnt = @lnbits; ### ADJUST COUNT ITERATOR
$ichg++;
}
}
$msg = $tx2; # put original message back
}
} else {
## not begin quote ' or ", nor begin # ...
### and is NOT if (($ch eq '$') || ($ch eq '@') || ($ch eq '%')) {
$tx3 = $tx2;
my $c3 = gotdelim($tx3);
###if ( length($tx3) && ($c3) ) { # got first split point
if ( ($ln) && ($c3) ) { # got first split point
$pos1 = index ($tx3,$c3);
if ( $pos1 > 0 ) { # if the first char, or ...
### we have something, a million other variations
##my $ts = '\\';
##$ts .= $c3;
##@sp11 = split ($ts, $tx3);
$tx5 = substr ($tx3, 0, $pos1); # get up to CHAR
###@sp11 = ($tx5, $c3);
@sp11 = ($c3);
$tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if any
if (length($tx3)) {
push(@sp11, $tx3); # put in slurp
}
###if (($c3 ne ':') && ($c3 ne '=') && ($c3 ne '|')) {
if ( ! xceptchr($c3) ) {
$msg = 'done Split [';
$msg .= $tx5 . '][' ;
$msg .= $c3 . ']' ;
if (length($tx3)) {
$msg .= '[' ;
$msg .= $tx3 . ']' ;
}
tolog ($msg . "\n" );
$lnbits[$icnt - 1] = $tx5; # put back first split
###splice (@lnbits, $i2, 0, $c3);
###if (length($tx3)) {
### splice (@lnbits, ($i2+1), 0, $tx3);
###}
splice (@lnbits, $icnt, 0, @sp11); # insert 1 or 2 new items
##splice (@lnbits, ($i2 - 1), 1, @sp11); # INSERT into array at this pos
$cnt = @lnbits; ### ADJUST COUNT ITERATOR
$ichg++;
}
} elsif ( $pos1 == 0 ) {
$tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if any
if (length($tx3)) {
@sp11 = ($c3, $tx3); # put in slurp
### if (($c3 ne ':') && ($c3 ne '=') && ($c3 ne '|')) {
if ( ! xceptchr($c3) ) {
$msg = 'DONE SPLIT [';
$msg .= $c3 . '][' ;
$msg .= $tx3 . ']' ;
##tolog ($msg . "\n");
$msg .= "\n" ;
push(@logmsgs,$msg);
###tolog (@sp11 . "\n");
##splice (@lnbits, ($i2 - 1), 1, @sp11); # INSERT into array at this pos
$lnbits[$icnt - 1] = $c3; # put back first split
splice (@lnbits, $icnt, 0, $tx3);
$ichg++;
$cnt = @lnbits; ### ADJUST COUNT ITERATOR
}
}
} else {
### last;
die "ERROR: Unresolved POSITION - can not happen ...\n";
}
} # process $tx3
}
#########################################
$msg = $tx2;
if ($c && ! xceptchr($c) ) {
$msg .= ' *EXCEPTED* ';
$msg .= $c;
$msg .= '* ';
}
if ( isresword ($tx2) ) { ### exists $HResWds{$tx2}
$msg .= ' *B*'; ### blue('R');
$i3++;
}
if ( isbinfun($tx2) ) { ## exists $HBFuncs{$tx2}
$msg .= ' *P*';
$i3++;
}
if ( $ln < 4 ) {
### tolog ( "*PUNC* CHECK [" . $tx2 . "]\n" );
if ( ispunctuat ( $tx2 ) ) {
$msg .= ' *PUNC*';
}
}
#########################################
}
### tolog ($msg . "\n");
$msg .= "\n" ;
push(@logmsgs,$msg);
} # for array list of line components
$nct = @lnbits;
if ($cnt != $nct) {
die "***FIX a COUNT UPDATE $cnt $nct $cntorg ????\n";
}
if ($cntorg == $nct) {
$msg = ( "} end comps $cntorg\n");
} else {
$msg = ( "} end comps $cntorg, adj. $nct " . ($cntorg - $nct) . "\n" );
}
push(@logmsgs,$msg);
if ($ichg || $verb2) {
tolog ( "Run 2 Made " . $ichg . " changes in line - CHECK CHANGE\n" );
foreach $msg (@logmsgs) {
tolog($msg);
}
} else {
### no change
if ($verb2) {
tolog ( "Run 2 - No change\n");
}
}
tolog ( "########### output run ###############################\n") if $verb2;
### tolog ("{{ $nct");
@logmsgs = ();
$msg = ( "{{ $nct");
push(@logmsgs,$msg);
### perpare for HTML output
###########################
$tx3 = ''; # clear FRONTEND output
### $tx3 = $txsp; # get the FRONTEND SPACE
if (($c1 eq ' ') || ($c1 eq "\t" )) {
die "ERROR: Missed a case above ...\n" if ! $gotfes; # MISS FRONTEND SPACE
### $tx3 .= ' '; # add last space back
$tx3 = white(htmlise($txsp));
## $tx3 = ' ';
## $tx3 = htmlise($txsp); # space to HTML
if ($verb2) {
$msg = "\nSpace=[\n" ;
$msg .= $txsp;
$msg .= "]\n[" ;
$msg .= $tx3;
$msg .= ']' ;
tolog ($msg . "\n" );
}
} else {
die "ERROR: Missed a case above ...\n" if $gotfes; # MISS FRONTEND SPACE
}
#############################################
$i3 = 0; # init COUNTER
my $func;
$icnt = 0;
$i = 0;
$ln = 0;
doparsereset ();
foreach $tx2 (@lnbits) { # process for OUTPUT
### we have @copybits = @lnbits; ### take a SNAP of this QUOTE ONLY EXPANSION
if ($i3) { # was (length($tx3)) {
### this should REMEMBER the original 'line-spacing', and re-apply it now
$tx6 = substr ($tx6, $ln); ### get next line 'bit'
### note, no actual CHECK that they are the EQUAL!!!
### if ($msg eq $tx2) { ### should work also ...
if (length($tx6)) {
$nct = 0; ### no SPACE addition yet
} else {
$icnt++; ### bump to NEXT
$tx6 = $copybits[$icnt]; ### get the 'copy', for 'formatting'
$i = length($tx6); ## len of COPY
$c1 = substr ($tx6, 0, 1); ### and first char
$nct = 1; ### add back SPACE, per original file
}
if ($nct) {
###$tx3 .= white(' '); # add back 'space' between LINE components
$tx3 .= ' '; # add back 'space' between LINE components/bits
}
} else {
## first, so no space added = START 'spacer'
$tx6 = $copybits[$icnt]; ### get the 'copy', for 'formatting'
$i = length($tx6); ## len of COPY
$c1 = substr ($tx6, 0, 1); ### and first char
}
$ln = length($tx2); # length this line 'bit'
$c = substr ($tx2, 0, 1); # get FIRST CHAR
$msg = $tx2; # get copy of the line
$tx5 = htmlise($msg); # make it HTML form
### case of the first CHARACTER - established TYPE of this line bit
if ($c eq '#' ) { # comment component - should be to end-of-line, or more ...
$func = \&orange;
} elsif ($c eq "'" ) { ## "' # does it start with quotes DOUBLE or SINGLE
$func = \&green;
} elsif ($c eq '"' ) {
$func = \&color3;
} elsif ($c eq '$' ) {
# start of scalar
$func = \&color1;
} elsif ($c eq '@' ) {
# start of array
$func = \&match;
} elsif ($c eq '%' ) {
# start of hash
$func = \&peach;
} elsif ( isresword ($tx2) ) { ### exists $HResWds{$tx2}
$func = \&blue;
} elsif ( isbinfun ($tx2) ) { ### exists $HBFuncs{$tx2}
$func = \&color2;
} else {
$func = \&white; # set default, white
if ($ln < 4) { # if it is a short 'bit' of the line
if ( ispunctuat ($tx2) ) { # check if punc
$func = \&grey; # yup, switch to grey
}
}
}
$msg = $func->($tx5); # get the HTML form mainly '<' -> '<' changes
$tx3 .= $msg;
###tolog (' [' . $msg . ']');
###tolog (' [' . $tx2 . ']');
$msg = ( ' [' . $tx2 . ']' );
push(@logmsgs,$msg);
$i3++; ## count a line item
$msg = $tx2; ### keep LAST line 'bit' ...
} ### loop while line 'bits'
##### done line output #####
### tolog ("}}\n");
$msg = ( "}}\n" );
push(@logmsgs,$msg);
foreach $msg (@logmsgs) {
tolog($msg);
}
$tx3 .= "<br>\n" ;
### tolog ($tx3);
prt ($tx3);
#######################################################
} ### comment line summarily dealt with ...
}
sub htmlise {
my ($txt) = @_;
my $htmsps = 0;
my $htmnbs = '';
# convert to HTML
$txt =~ s/\t/$tab_stg /g; # substitute TAB characters
$txt =~ s/ "/"/g; # sub double quotes
$txt =~ s/\</</g; # sub less than tag beginning
$txt =~ s/\>/>/g; # and html/xml tag ending
my $ln = length($txt); # get the final length
if (substr ($txt, 0, 1) eq ' ') { # if starts with a space
### my $htmsps = 0;
### my $htmnbs = ' ';
## $htmsps = 0;
$htmnbs = ' ' ;
for ($htmsps = 1; $htmsps < $ln; $htmsps++) {
if (substr ($txt, $htmsps, 1) ne ' ') {
last;
}
$htmnbs .= ' ' if $htmsps > 1;
}
$htmsps-- if $htmsps > 1; # back off last space, if more than 1
tolog ( "Replacing $htmsps with [$htmnbs] ...\n") if $verb2;
$txt =~ s/ {$htmsps}/$htmnbs/; # replace (N) spaces with ' x N
if ($verb2) {
my (@vals) = split;
while (@vals) {
my ($vc) = shift (@vals);
tolog ( "[$vc] ");
}
tolog ( "\n" );
}
} # if it was space beginning
return $txt;
}
sub gotdelim {
my ($tx) = @_;
my $c;
my $mx = length($DELIMITER); ### = '(){}[]-+*/=~!&|<>?:;.,';
my @ar = split (//, $DELIMITER);
my $i = 0;
foreach $c (@ar) {
my $ts = '\\' ;
$ts .= $c;
if ($tx =~ /$ts/) {
# return 1;
return $c;
}
$i++;
}
return 0;
}
###my $actpunc = ''; ### store the active punctuation
###my %HPuncsFnd = (); # hash of Punctuation FOUND in parse
###my $actresword = '';
###my %HResWdFnd = ();
###my $actfunc = ''; ### store the active built-in functions
###my %HFuncsFnd = ();
### my %HPuncsFnd = (); # hash of Punctuation FOUND in parse
sub ispunctuat {
my ($cp) = @_;
foreach my $cc (@PPunct) {
###tolog ("Comaring [$cc] with [$cp]...\n");
if ($cc eq $cp) {
$actpunc = $cp; ### store the active punctuation
if ( exists $HPuncsFnd{$cp} ) {
$HPuncsFnd{$cp}++; # another count
} else {
$HPuncsFnd{$cp} = 1; # set FOUND 1
}
return 1;
}
}
return 0;
}
sub isresword {
my ($rw) = @_;
if ( exists $HResWds{$rw} ) {
$actresword = $rw;
if (exists $HResWdFnd{$rw}) {
$HResWdFnd{$rw}++; # another count
} else {
$HResWdFnd{$rw} = 1; # start count
}
return 1;
}
return 0;
}
sub isbinfun {
my ($rw) = @_;
if ( exists $HBFuncs{$rw} ) {
$actfunc = $rw;
if (exists $HFuncsFnd{$rw}) {
tolog ( "Bumped Funcs $rw ...\n" );
$HFuncsFnd{$rw}++; # another count
} else {
tolog ( "Created Funcs $rw ...\n" );
$HFuncsFnd{$rw} = 1; # start count
}
return 1;
}
return 0;
}
sub doparsereset {
my $k;
$actfunc = '';
$actresword = '';
$actpunc = '';
}
sub get_parse_stats {
my $ms = "Parse stats<br>\n";
my $key;
my $k;
my $i = 0;
my $at;
$at = %HResWdFnd;
$ms .= '<table border="1">';
$i = 0;
foreach $key (keys %HResWdFnd) {
###foreach $key (keys %$at) {
$i++;
$ms .= '<tr>' ;
$ms .= '<td>' ;
$ms .= "$i" ;
$ms .= '</td>' ;
$ms .= '<td>' ;
$ms .= "$key" ;
$ms .= '</td>' ;
$ms .= '<td>' ;
$ms .= $HResWdFnd{$key};
###$ms .= "$$at{$key}";
$ms .= '</td>' ;
$ms .= '</tr>' ;
$ms .= "\n" ;
}
$ms .= '</table>' ;
$ms .= "List of $i used reserve words ...<br>\n";
$i = 0;
$ms .= '<table border="1">';
foreach $key (keys %HFuncsFnd) {
$i++;
$ms .= '<tr>' ;
$ms .= '<td>' ;
$ms .= "$i" ;
$ms .= '</td>' ;
$ms .= '<td>' ;
$ms .= "$key" ;
$ms .= '</td>' ;
$ms .= '<td>' ;
$ms .= $HFuncsFnd{$key};
$ms .= '</td>' ;
$ms .= '</tr>' ;
$ms .= "\n" ;
}
$ms .= '</table>' ;
$ms .= "List of $i used built-in function words ...<br>\n";
$i = 0;
### if ( exists $HPuncsFnd{$cp} ) {
$ms .= '<table border="1">';
foreach $key (keys %HPuncsFnd) {
$i++;
$ms .= '<tr>' ;
$ms .= '<td>' ;
$ms .= "$i" ;
$ms .= '</td>' ;
$ms .= '<td>' ;
$ms .= htmlise($key);
$ms .= '</td>' ;
$ms .= '<td>' ;
$ms .= $HPuncsFnd{$key};
$ms .= '</td>' ;
$ms .= '</tr>' ;
$ms .= "\n" ;
}
$ms .= '</table>' ;
$ms .= "List of $i used punctuation ...<br>\n";
return $ms;
}
sub showarrcnts {
my $i = @PPunct;
tolog ( "PPunct array count = $i\n");
$i = @PPairs;
tolog ( "PPairs array count = $i\n");
$i = @DolVars;
tolog ( "DolVars array count = $i\n");
$i = @PBPunc;
tolog ( "PBPunc array count = $i\n");
}
sub get_line_num {
my ($lnn) = @_;
while (length($lnn) < 4) {
$lnn = '0' . $lnn;
}
return $lnn;
}
### EOF
|