poconvert.pl to HTML.

Generated: Mon Aug 29 19:34:56 2016 from poconvert.pl 2016/03/21 33.6 KB. text copy
#!/usr/bin/perl -w
# NAME: poconvert.pl
# AIM: Actions with po files...
# 20/03/2016 - review and update
# 26/01/2016 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
use utf8;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.6 2016-03-21";
##my $VERS = "0.0.5 2015-01-09";
my $load_log = 0;
my $in_header = '';
my $in_file = '';
my $in_file2 = '';
my $verbosity = 0;
my $out_file = '';
my $action = "h2p";
my $language = '';
my $po_file = '';

my $out_file1 = $temp_dir.$PATH_SEP."tempout1.po";
my $out_file2 = $temp_dir.$PATH_SEP."tempout2.po";
my $out_comm = $temp_dir.$PATH_SEP."tempcomm.txt";
my $out_diff = $temp_dir.$PATH_SEP."tempdiff.txt";

# ### DEBUG ###
my $debug_on = 1;
my $def_proj = 'F:\Projects\tidy-html5';
##my $def_proj = 'F:\Projects\tidy-local';
my $def_file1 = $def_proj.'\localize\translations\tidy.pot';
my $def_file2 = $def_proj.'\localize\translations\language_fr.po';
my $def_hdr = $def_proj.'\src\language_en.h';

### program variables
my @warnings = ();
my $cwd = cwd();
my @msgids = ();
my @blocks = ();

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub count_pp($) {
    my $str = shift;
    my $ind = index($str,'%');
    my $cnt = 0;
    while ($ind >= 0) {
        $cnt++;
        $str = substr($str,$ind+1);
        $ind = index($str,'%');
    }
    return $cnt;
}

# POT file
# tidy.pot
#msgid ""
#msgstr ""
#"Content-Type: text/plain; charset=UTF-8\n"
#"Language: en\n"
#"Plural-Forms: nplurals=2; plural=n != 1;\n"
#"X-Generator: HTML Tidy poconvert.rb\n"
#"Project-Id-Version: \n"
#"POT-Creation-Date: 2016-01-15 11:38:40\n"
#"Last-Translator: jderry\n"
#"Language-Team: \n"
#
##. Specify the ll or ll_cc language code here.
#msgctxt "TIDY_LANGUAGE"
#msgid "en"
#msgstr ""
#
##. Only translate if a URL to the target language can be found.
#msgctxt "ACCESS_URL"
#msgid "http://www.w3.org/WAI/GL"
#msgstr ""
#...

sub get_po_header($$$) {
    my ($lang,$date,$user) = @_;
    my $txt = <<EOF;
msgid ""
msgstr ""
"Content-Type: text/plain; charset=UTF-8"
"Language: $lang"
"Plural-Forms: nplurals=2; plural=n != 1;"
"X-Generator: HTML Tidy poconvert.pl"
"Project-Id-Version: "
"POT-Creation-Date: $date"
"Last-Translator: $user"
"Language-Team: "

EOF
    return $txt;
}

sub find_msgid($) {
    my $mid = shift;
    my ($ra,$tst);
    foreach $ra (@blocks) {
        $tst = ${$ra}[0];
        if ($mid eq $tst) {
            prt("Found MID $mid\n") if (VERB9());
            #pgm_exit(1,"CHECK EXIT\n");
            return $ra;
        }
    }
    prt("MID $mid NOT FOUND\n") if (VERB2());
    return undef;
}

# split_space2 - space split - 
# like split(/\s/,$txt), but honour double inverted commas
# but not an escaped inverted comma, like '\"'
# also accept and split '"something"/>', but ONLY if in the tail
# 2010/05/05 - also want to avoid a tag of '"zlib">'
sub space_split2 {
   my ($txt) = shift;
   my $len = length($txt);
   my ($k,$ch,$tag,$incomm,$k2,$nch,$pc);
   my @arr = ();
   $tag = '';
   $incomm = 0;
    $ch = '';
   for ($k = 0; $k < $len; $k++) {
        $pc = $ch;
      $ch = substr($txt,$k,1);
        $k2 = $k + 1;
        $nch = ($k2 < $len) ? substr($txt,$k2,1) : "";
      if ($incomm) {
         $incomm = 0 if (($ch eq '"') && ($pc ne "\\"));
         $tag .= $ch;
            # add 2010/05/05 to avoid say '"zlib">' begin a tag
            if (!$incomm) {
                push(@arr,$tag);
                $tag = '';
            }
      } elsif ($ch =~ /\s/) { # any spacey char
            push(@arr, $tag) if (length($tag));
         $tag = '';
      } elsif (($ch =~ /\//)&&($nch eq '>')) { # 04/10/2008, but only if before '>' 24/09/2008 add this as well
         push(@arr, $tag) if (length($tag));
         $tag = $ch; # restart tag with this character
      } else {
         $tag .= $ch;
         $incomm = 1 if (($ch eq '"') && ($pc ne "\\"));
      }
   }
   push(@arr, $tag) if (length($tag));
   return @arr;
}

sub clean_text($) {
    my $txt = shift;
    my $len = length($txt);
    my $ctxt = '';
    my ($i,$ch,$pc);
    $ch = '';
    for ($i = 0; $i < $len; $i++) {
        $pc = $ch;
        $ch = substr($txt,$i,1);
        next if ($ch eq '"');   # drop all '"'
        if ($ch =~ /\s/) {  # only ever one space
            if (! $pc =~ /\s/) {
                $ctxt .= $ch;
            }
        } else {
            $ctxt .= $ch;
        }
    }
    return $ctxt;
}

sub clean_text_is_not_equal($$) {
    my ($txt1,$txt2) = @_;
    return 0 if ($txt1 eq $txt2);
    my $ctxt1 = clean_text($txt1);
    my $ctxt2 = clean_text($txt2);
    return 0 if ($ctxt1 eq $ctxt2);
    return 1;
}

sub process_msgids() {
    my $cnt = scalar @msgids;
    prt("Collected $cnt msgids..\n");
    my $date = lu_get_YYYYMMDD_hhmmss(time());
    my $user = 'geoff'; # TODO: Get user name
    my $lang = 'en';
    if (length($language) == 0) {
        prtw("WARNING: Set default language '$lang'!\n");
    } else {
        $lang = $language;
    }
    my $commtxt = '';
    my %comments = ();
    my $pot = get_po_header($lang,$date,$user);
    my ($i,$mid,$plu,$txt,$comm,$ra,$i2,$ra2,$mid2,$plu2,$txt2,@arr,$tmp,$ncomm);
    my ($rba,$ttxt,$tid,$rta,$rtb,$rtc,$rtd,$ttxtp,$trid);
    my $isplu = 0;
    my $plucnt = 0;
    my $rptcnt = 0;
    my $isdiff = 0;
    my $difftxt = '';
    for ($i = 0; $i < $cnt; $i++) {
        $i2 = $i + 1;
        ##             0    1    2    3
        #push(@msgids,[$mid,$plu,$txt,$comment]);
        $ra = $msgids[$i];
        $mid = ${$ra}[0];
        $plu = ${$ra}[1];
        $txt = ${$ra}[2];   # unbroken string "abc abc" "def def"...?
        $comm = ${$ra}[3];
        @arr = space_split2($txt);
        $txt = join("\n",@arr);
        if (length($comm)) {
            if (defined $comments{$comm}) {
                $rptcnt++;
                $comments{$comm}++;
            } else {
                @arr = split("\n",$comm);
                $ncomm = '';
                foreach $tmp (@arr) {
                    $tmp = substr($tmp,0,length($tmp) - 1) while ($tmp =~ /\s$/g); # remove all TRAILING space
                    if (length($tmp)) {
                        if (defined $comments{$tmp}) {
                            $rptcnt++;
                            $comments{$tmp}++;
                        } else {
                            $ncomm .= "\n" if (length($ncomm));
                            $ncomm .= $tmp;
                            $comments{$tmp} = 1;
                        }
                    }
                }
                if (length($ncomm)) {
                    $pot .= $ncomm."\n";
                    $commtxt .= $ncomm."\n\n";
                }
                $comments{$comm} = 1;
            }
        }
        $isplu = 0;
        if ($i2 < $cnt) {
            $ra2 = $msgids[$i2];
            $mid2 = ${$ra2}[0];
            $plu2 = ${$ra2}[1];
            $txt2 = ${$ra2}[2];
            if ($mid eq $mid2) {
                $isplu = 1;
                $i++;
            }
        }
        $pot .= "msgctxt \"$mid\"\n";
        $ttxt  = '""';
        $ttxtp = '""';
        $rba = find_msgid($mid);
        $isdiff = 0;
        if (defined $rba) {
            #               0     1   2   3   4
            # push(@blocks,[$mid,\@a,\@b,\@c,\@d]);
            $rta = ${$rba}[1];
            $rtb = ${$rba}[2];
            $rtc = ${$rba}[3];
            $rtd = ${$rba}[4];
            $trid = join("\n",@{$rta});
            $ttxt = join("\n",@{$rtc});
            $ttxtp = join("\n",@{$rtd});
            $isdiff = 1 if clean_text_is_not_equal($txt,$trid);
            if ($isdiff) {
                $difftxt .= "\nCheck Translation:\nWas '$txt'\nNow '$trid'\n";
            }
        }
        if ($isplu) {
            $pot .= "msgid $txt\n";
            $pot .= "msgid_plural $txt2\n";
            $pot .= "msgstr[0] $ttxt\n";
            $pot .= "msgstr[1] $ttxtp\n";
            $plucnt++;
        } else {
            $pot .= "msgid $txt\n";
            $pot .= "msgstr $ttxt\n";
        }
        $pot .= "\n";
    }
    prt("Had $plucnt plurals...\n");
    if (length($out_file) == 0) {
        prt($pot);
        prt("Above output due to no 'out' file given. use -o file\n");
    } else {
        ##write2file($pot,$out_file);
        # This has to be UTF-8 only
        open OUT, ">$out_file" or mydie("ERROR: Unable to open $out_file! $!\n");
        binmode(OUT, ":utf8");
        print OUT $pot;
        close OUT;
        prt("Output written to $out_file\n");
    }
    if (length($commtxt)) {
        write2file($commtxt,$out_comm);
        prt("Comments written to $out_comm, with $rptcnt repeats deleted.\n");
    }
    if (VERB9()) {
        foreach $txt (keys %comments) {
            $cnt = $comments{$txt};
            prt("\n$cnt: $txt\n");
        }
        $load_log = 1;
    }
    if (length($difftxt)) {
        write2file($difftxt,$out_diff);
        prt("Different msgid text written to '$out_diff'\n");
    }
}


sub process_po_file($) {
    my ($inf) = @_;
    open INF, '<:encoding(UTF-8)', $inf
        or pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    #if (! open INF, "<$inf") {
    #    pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    #}
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($i,$line,$len,$tline);
    my ($mid,$lnn,$txt,$str,$num);
    # process the header
    my $hdr = '';
    my $lang = '';
    for ($i = 0; $i < $lncnt; $i++) {
        $line = $lines[$i];
        chomp $line;
        $tline = trim_all($line);
        $len = length($tline);
        last if ($len == 0);
        $hdr .= $line."\n";
        $tline = strip_double_quotes($line);
        $tline =~ s/\\n$//;
        if ($tline =~ /Language:\s+(\w+)$/) {
            $lang = $1;
            prt("Language is '$lang'\n");
        }
    }
    prt($hdr) if (VERB9());
    my $blkcnt = 0;
    my $inmsgid = 0;
    my $inmsgidp = 0;
    my $inmsgstr = 0;
    my $inmsgstrp = 0;
    my @idblk = ();
    my @idpblk = ();
    my @strblk = ();
    my @strpblk = ();
    my $skip = 0;

    $mid = '';
    for (; $i < $lncnt; $i++) {
        $lnn = $i + 1;
        $line = $lines[$i];
        chomp $line;
        $tline = trim_all($line);
        $len = length($tline);
        $skip = 0;
        if ($len == 0) {
            $skip = 1;
        }
        if ($tline =~ /^\#/) {
            $skip = 1;
        }
        if ($skip) {
            if (length($mid)) {
                my @a = @idblk;
                my @b = @idpblk;
                my @c = @strblk;
                my @d = @strpblk;
                #               0     1   2   3   4
                # push(@blocks,[$mid,\@a,\@b,\@c,\@d]);
                push(@blocks,[$mid,\@a,\@b,\@c,\@d]);
            }
            $mid = '';
            $blkcnt = 0;

            $inmsgid = 0;
            $inmsgidp = 0;
            $inmsgstr = 0;
            $inmsgstrp = 0;
            @idblk = ();
            @idpblk = ();
            @strblk = ();
            @strpblk = ();
            next;
        }
        if ($blkcnt == 0) {
            if ($tline =~ /^msgctxt\s+\"(\w+)\"/) {
                $mid = $1;
                prt("$lnn: '$mid'\n") if (VERB9());
            } else {
                pgm_exit(1, "FAILED: $lnn: did not find a 'msgctxt'!\n");
            }
        } else {
            if ($line =~ /^\"/) {
                if ($inmsgid) {
                    push(@idblk,$line);
                } elsif ($inmsgidp) {
                    push(@idpblk,$line);
                } elsif ($inmsgstr) {
                    push(@strblk,$line);
                } elsif ($inmsgstrp) {
                    push(@strpblk,$line);
                } else {
                    pgm_exit(1, "FAILED: $lnn: not in any group!\n");
                }
            } elsif ($line =~ /^msgid/) {
                if ($line =~ /^msgid_plural/) {
                    $inmsgid = 0;
                    $inmsgidp = 1;
                    $inmsgstr = 0;
                    $inmsgstrp = 0;
                    $line =~ s/^msgid_plural\s*//;
                    push(@idpblk,$line);
                } else {
                    $inmsgid = 1;
                    $inmsgidp = 0;
                    $inmsgstr = 0;
                    $inmsgstrp = 0;
                    $line =~ s/^msgid\s*//;
                    push(@idblk,$line);
                }
            } elsif ($line =~ /^msgstr/) {
                # we have a TRANSLATION - is it PLURAL?
                # if ($line =~ /^msgstr\[(\n+)\]/) 
                if ($line =~ /^msgstr\[([0-1])\]/) {
                    $num = $1;
                    $line =~ s/^msgstr\[([0-1])\]\s*//;
                    $num = $1;
                    prt("$lnn: Plural $num '$line'\n") if (VERB9());
                    $inmsgid = 0;
                    $inmsgidp = 0;
                    if ($num == 0) {
                        $inmsgstr = 1;
                        $inmsgstrp = 0;
                        push(@strblk,$line);
                    } else {
                        $inmsgstr = 0;
                        $inmsgstrp = 1;
                        push(@strpblk,$line);
                    }
                } else {
                    if ($line =~ /\[/) {
                        pgm_exit(1,"$lnn: Regex FAILED '$line'\n");
                    }
                    $inmsgid = 0;
                    $inmsgidp = 0;
                    $inmsgstr = 1;
                    $inmsgstrp = 0;
                    $line =~ s/^msgstr\s*//;
                    push(@strblk,$line);
                }
            } else {
                pgm_exit(1,"$lnn: NOT PARSES! '$line'\n");
            }
        }
        $blkcnt++;
    }
    if (length($mid)) {
        my @a = @idblk;
        my @b = @idpblk;
        my @c = @strblk;
        my @d = @strpblk;
        push(@blocks,[$mid,\@a,\@b,\@c,\@d]);
    }
    $len = scalar @blocks;
    prt("Have $len blocks of text\n");

    ###pgm_exit(1,"TEMP EXIT\n");
}

sub process_in_file1($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$tline,$len,$ln,$msgctxt);
    my ($ras,$ra,$rb);
    $lnn = 0;
    my @id = ();
    my @str = ();
    $msgctxt = "start";
    my $inid = 0;
    my $instr = 0;
    my %msgtxt = ();
    my @msgarr = ();
    my $haspp = 0;
    my $withpp = 0;
    my $withpp2 = 0;
    my $withpp1 = 0;
    my $ppcnt = 0;
    my $maxpps = 0;
    my %msgtxt2 = ();
    my $ingroup = 0;
    for ($ln = 0; $ln < $lncnt; $ln++) {
        $line = $lines[$ln];
        chomp $line;
        $lnn = $ln + 1;
        $tline = trim_all($line);
        $len = length($tline);
        if ($len == 0) {
            $ingroup = 0;
            next;
        }
        if (substr($line,0,1) eq '#') {
            $ingroup = 0;
            next;
        }
        if ($line =~ /msgctxt\s+"(\w+)"\s*$/) {
            $inc = $1;
            if (defined $msgtxt{$msgctxt}) {
                pgm_exit(1,"$lnn: Oops, duplicated $msgctxt\n");
            }
            my @a = @id;
            my @b = @str;
            $msgtxt{$msgctxt} = [\@a,\@b];
            push(@msgarr,$msgctxt);
            $msgctxt = $inc;
            prt("$lnn: $msgctxt\n") if (VERB9());
            @id = ();
            @str = ();
            $inid = 0;
            $instr = 0;
            if ($haspp) {
                $withpp++;
                #if (($haspp > 1)||($ppcnt > 1)) {
                if ($ppcnt > 1) {
                    $withpp2++;
                    $msgtxt2{$msgctxt} = $ppcnt;
                } else {
                    $withpp1++;
                }
            }
            $haspp = 0;
            $ppcnt = 0;
            $ingroup = 1;   # start of a group
        } elsif ($line =~ /msgid\s+"(.*)"\s*$/) {
            $inc = $1;
            if ($inc) {
                if ($inc =~ /\%\d+\$/) {
                    $haspp++;
                    $ppcnt += count_pp($inc);
                }
                push(@id,$inc);
            }
            $inid = 1;
            $instr = 0;
        } elsif ($line =~ /msgstr\s+"(.*)"\s*$/) {
            $inc = $1;
            if ($inc) {
                push(@str,$inc);
            }
            $inid = 0;
            $instr = 1;
        } elsif ($line =~ /"(.*)"\s*$/) {
            $inc = $1;
            if ($inc) {
                if ($inid) {
                    if ($inc =~ /\%\d+\$/) {
                        $haspp++;
                        $ppcnt += count_pp($inc);
                    }
                    push(@id,$inc);
                } elsif ($instr) {
                    push(@str,$inc);
                } else {
                    pgm_exit(1,"$lnn: In in id nor str! $line\n");
                }
            }
        } else {
            pgm_exit(1, "$lnn: Unparsed [$line]\n");
        }
    }
    if ($haspp) {
        $withpp++;
        if (($haspp > 1)||($ppcnt > 1)) {
            $withpp2++;
        } else {
            $withpp1++;
        }
    }
    $line = '';
    my $mcnt = scalar @msgarr;
    prt("Found $mcnt ids, $withpp with PPs, $withpp1 with just 1, $withpp2 with 2 or more\n");
    my @arr = sort keys %msgtxt2;
    my $cnt = 0;
    foreach $msgctxt (@arr) {
        $cnt++;
        $mcnt = $msgtxt2{$msgctxt};
        prt("$cnt: $msgctxt $mcnt\n");
        $ras = $msgtxt{$msgctxt}; #  [\@a,\@b];
        $ra = ${$ras}[0];
        prt(join("\n",@{$ra})."\n");
    }
    $load_log = 1;
    foreach $msgctxt (@msgarr) {
        $ras = $msgtxt{$msgctxt}; #  [\@a,\@b];
        $ra = ${$ras}[0];
        $rb = ${$ras}[1];
        if ($msgctxt ne 'start') {
            $line .= "msgctxt \"$msgctxt\"\n";
        }
        $line .= "msgid ";
        $ln = 0;
        foreach $inc (@{$ra}) {
            $line .= "\"$inc\"\n";
            $ln++;
        }
        if ($ln == 0) {
            $line .= "\"\"\n";
        }
        $line .= "msgstr ";
        $ln = 0;
        foreach $inc (@{$rb}) {
            $line .= "\"$inc\"\n";
            $ln++;
        }
        if ($ln == 0) {
            $line .= "\"\"\n";
        }
        $line .= "\n";
    }
    write2file($line,$out_file1);
    prt("Results written to $out_file1\n");
    my %h = ();
    $h{$inf} = [\@msgarr,\%msgtxt];
    return \%h;
}

sub process_in_file2($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$tline,$len,$ln,$msgctxt);
    my ($ras,$ra,$rb);
    $lnn = 0;
    my @id = ();
    my @str = ();
    $msgctxt = "start";
    my $inid = 0;
    my $instr = 0;
    my %msgtxt = ();
    my @msgarr = ();
    for ($ln = 0; $ln < $lncnt; $ln++) {
        $line = $lines[$ln];
        chomp $line;
        $lnn = $ln + 1;
        $tline = trim_all($line);
        $len = length($tline);
        next if ($len == 0);
        next if (substr($line,0,1) eq '#');
        if ($line =~ /msgctxt\s+"(\w+)"\s*$/) {
            $inc = $1;
            if (defined $msgtxt{$msgctxt}) {
                pgm_exit(1,"$lnn: Oops, duplicated $msgctxt\n");
            }
            my @a = @id;
            my @b = @str;
            $msgtxt{$msgctxt} = [\@a,\@b];
            push(@msgarr,$msgctxt);
            $msgctxt = $inc;
            prt("$lnn: $msgctxt\n") if (VERB9());
            @id = ();
            @str = ();
            $inid = 0;
            $instr = 0;
        } elsif ($line =~ /msgid\s+"(.*)"\s*$/) {
            $inc = $1;
            if ($inc) {
                push(@id,$inc);
            }
            $inid = 1;
            $instr = 0;
        } elsif ($line =~ /msgstr\s+"(.*)"\s*$/) {
            $inc = $1;
            if ($inc) {
                push(@str,$inc);
            }
            $inid = 0;
            $instr = 1;
        } elsif ($line =~ /"(.*)"\s*$/) {
            $inc = $1;
            if ($inc) {
                if ($inid) {
                    push(@id,$inc);
                } elsif ($instr) {
                    push(@str,$inc);
                } else {
                    pgm_exit(1,"$lnn: In in id nor str! $line\n");
                }
            }
        } else {
            pgm_exit(1, "$lnn: Unparsed [$line]\n");
        }
    }
    $line = '';
    foreach $msgctxt (@msgarr) {

        $ras = $msgtxt{$msgctxt}; #  [\@a,\@b];
        $ra = ${$ras}[0];
        $rb = ${$ras}[1];
        if ($msgctxt ne 'start') {
            $line .= "msgctxt \"$msgctxt\"\n";
        }
        $line .= "msgid ";
        $ln = 0;
        foreach $inc (@{$ra}) {
            $line .= "\"$inc\"\n";
            $ln++;
        }
        if ($ln == 0) {
            $line .= "\"\"\n";
        }
        $line .= "msgstr ";
        $ln = 0;
        foreach $inc (@{$rb}) {
            $line .= "\"$inc\"\n";
            $ln++;
        }
        if ($ln == 0) {
            $line .= "\"\"\n";
        }
        $line .= "\n";
    }
    write2file($line,$out_file2);
    prt("Results written to $out_file2\n");
    my %h = ();
    $h{$inf} = [\@msgarr,\%msgtxt];
    return \%h;
}

sub compare_hashes($$) {
    my ($rh1,$rh2) = @_;
    my @a1 = keys %{$rh1};
    my @a2 = keys %{$rh2};
    my $inf1 = $a1[0];
    my $inf2 = $a2[0];
    my $ra1 = ${$rh1}{$inf1};
    my $ra2 = ${$rh2}{$inf2};
    my $rma1 = ${$ra1}[0];
    my $rth1 = ${$ra1}[1];
    my $rma2 = ${$ra2}[0];
    my $rth2 = ${$ra2}[1];
    my $cnt1 = scalar @{$rma1};
    my $cnt2 = scalar @{$rma2};
    prt("Compare of two files - \n$inf1 - $cnt1, with\n$inf2 - $cnt2\n");
    my ($msgctxt,$ras1,$rb1,$rb2,$ras2,$stg1,$stg2);
    # $ras = $msgtxt{$msgctxt}; #  [\@a,\@b];
    # $ra = ${$ras}[0];
    # $rb = ${$ras}[1];
    foreach $msgctxt (@{$rma1}) {
        $ras1 = ${$rth1}{$msgctxt};
        $ra1 = ${$ras1}[0];
        $rb1 = ${$ras1}[1];
        if (defined ${$rth2}{$msgctxt}) {
            $ras2 = ${$rth2}{$msgctxt};
            $ra2 = ${$ras2}[0];
            $rb2 = ${$ras2}[1];
            $cnt1 = scalar @{$ra1};
            $cnt2 = scalar @{$ra2};
            $stg1 = join("",@{$ra1});
            $stg2 = join("",@{$ra2});
            if ($stg1 ne $stg2) {
                prt("msgxtxt $msgctxt is different $cnt1 $cnt2!\n");
                prt("$stg1\n");
                prt("$stg2\n");
            }
        } else {
            prt("msgxtxt $msgctxt not found in 2!\n");
        }
    }
    $load_log = 1;
}

sub split_comma($) {
    my $line = shift;
    my $len = length($line);
    my ($i,$ch,$txt,$pc);
    my @arr = ();
    $txt = '';
    my $inquote = 0;
    $ch = '';
    for ($i = 0; $i < $len; $i++) {
        $pc = $ch;
        $ch = substr($line,$i,1);
        if (($ch eq ',') && !$inquote) {
            $txt = trim_all($txt);
            push(@arr,$txt);
            $txt = '';
        } else {
            if ($ch =~ /\s/) {
                $txt .= $ch if (length($txt));
            } else {
                $txt .= $ch;
            }
            if (($ch eq '"') && ($pc ne "\\")) {
                if ($inquote) {
                    $inquote = 0;
                } else {
                    $inquote = 1;
                }
            }
        }
    }
    push(@arr,$txt) if (length($txt));
    return \@arr;
}

sub process_header($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($i,$line,$tline,$lnn,$len,$ch,$pc,$j,@arr,$cnt,$ra);
    my ($mid,$plu,$txt,$comment);
    my $incomm = 0;
    my $instr = 0;
    my $brcnt = 0;
    $ch = 0;
    $pc = 0;
    my @braces = ();
    my $dline = '';
    my $msgcnt = 0;
    $comment = '';
    for ($i = 0; $i < $lncnt; $i++) {
        $lnn = $i + 1;
        $line = $lines[$i];
        $tline = trim_all($line);
        $len = length($tline);
        next if ($len == 0);
        next if ($tline =~ /^\s*\/\//);    # skip inline comments - should be NONE in Tidy!
        # static languageDefinition language_en = { whichPluralForm_en, {
        if ($line =~ /^\s*static\s+languageDefinition\s+/) {
            $instr = 1;
            prt("$lnn: Start languageDefinition stucture...\n");
            $comment = '';
        }
        for ($j = 0; $j < $len; $j++) {
            $pc = $ch;
            $ch = substr($tline,$j,1);
            if ($incomm) {
                if (($ch eq '/')&&($pc eq '*')) {
                    $incomm = 0;
                    prt("$lnn: End comment...\n") if (VERB9());
                    $comment =~ s/\s*\*\s*$//;
                } else {
                    $comment .= $ch;
                }
            } elsif (($ch eq '*') && ($pc eq '/')) {
                $incomm = 1;
                $dline =~ s/\/$//g;
                $comment = "#. ";
                prt("$lnn: Start comment...\n") if (VERB9());
            } elsif ($instr) {                if ($ch eq '{') {
                    push(@braces,$lnn);
                    $brcnt = scalar @braces;
                    prt("$lnn: brcnt $brcnt\n") if (VERB9());
                } elsif ($ch eq '}') {
                    if (@braces) {
                        pop @braces;
                    } else {
                        prtw("WARNING: $lnn: Got } but none on stack!\n");
                    } 
                    $brcnt = scalar @braces;
                    prt("$lnn: brcnt $brcnt\n") if (VERB9());
                    if ($brcnt == 2) {
                        $dline =~ s/\n$//g;
                        if (length($dline)) {
                            $ra = split_comma($dline);
                            $cnt = scalar @{$ra};
                            prt("$lnn:$cnt: $dline\n") if (VERB9());
                            prt(join(",",@{$ra})."\n") if (VERB9());
                            if ($cnt == 3) {
                                $mid = ${$ra}[0];
                                $plu = trim_all(${$ra}[1]);
                                $txt = trim_all(${$ra}[2]);
                                #             0    1    2    3
                                push(@msgids,[$mid,$plu,$txt,$comment]);
                                if ($mid eq 'TIDY_LANGUAGE') {
                                    if ($msgcnt != 0) {
                                        prtw("WARNING: 'TIDY_LANGUAGE' not first entry!\n");
                                    }
                                    if (length($language) == 0) {
                                        $language = strip_double_quotes($txt);
                                        prt("Set language to '$language'\n");
                                    }
                                }
                            } else {
                                prtw("WARNING: $lnn:$cnt: DID NOT SPLIT 3! $dline\n"); # if (VERB9());
                            }
                        }
                        $dline = '';
                        $comment = '';
                    }
                } elsif ($ch eq ';') {
                    if ($brcnt) {
                        # still got stuff
                    } else {
                        prt("$lnn: Exit languageDefinition stucture...\n");
                        $instr = 0;
                    }
                } else {
                    if ($brcnt == 3) {
                       $dline .= $ch;
                    }
                }

            }
        }
        if ($incomm) {
            $comment .= "\n#. ";
        }
        if ($brcnt == 3) {
            $dline .= "\n" if (length($dline));
        }
    }
    #$load_log = 1;
    process_msgids();

}

#########################################
### MAIN ###
parse_args(@ARGV);
process_po_file($def_file2);
if ($action eq 'h2p') {
    process_header($in_header);
} else {
    my $rh1 = process_in_file1($in_file);
    if (length($in_file2)) {
        #my $rh2 = process_in_file2($in_file2);
        #compare_hashes($rh1,$rh2);
    }
}
pgm_exit(0,"");
########################################

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    my $verb = VERB2();

    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                $verb = VERB2();
                prt("Verbosity = $verbosity\n") if ($verb);
            } elsif ($sarg =~ /^l/) {
                if ($sarg =~ /^ll/) {
                    $load_log = 2;
                } else {
                    $load_log = 1;
                }
                prt("Set to load log at end. ($load_log)\n") if ($verb);
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file1 = $sarg;
                prt("Set out file to [$out_file1].\n") if ($verb);
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            if (length($in_file)) {
                if (length($in_file2)) {
                    prg_exit(1,"Already have 1 $in_file, 2 $in_file2!\nWhat is this $arg?\n");
                } else {
                    $in_file2 = $arg;
                    prt("Set input file 2 to [$in_file2]\n") if ($verb);
                }
            } else {
                $in_file = $arg;
                prt("Set input file 1 to [$in_file]\n") if ($verb);
            }
        }
        shift @av;
    }

    if ($debug_on) {
        prtw("WARNING: DEBUG is ON!\n");
        if ($action eq 'h2p') {
            if (length($in_header) == 0) {
                $in_header = $def_hdr;
                prt("Set DEFAULT header to [$in_header]\n");
            }
        } else {
            if (length($in_file) ==  0) {
                $in_file = $def_file1;
               prt("Set DEFAULT input 1 to [$in_file]\n");
            }
            if (length($in_file2) ==  0) {
                $in_file2 = $def_file2;
                prt("Set DEFAULT input 2 to [$in_file2]\n");
            }
        }
        if (length($out_file) == 0) {
            $out_file = $out_file1;
            prt("Set DEFAULT out file to [$out_file]\n");
        }
    }
    if ($action eq 'h2p') {
        if (length($in_header) ==  0) {
            pgm_exit(1,"ERROR: act=$action: No header file found in command!\n");
        }
        if (! -f $in_header) {
            pgm_exit(1,"ERROR: Unable to find in file [$in_header]! Check name, location...\n");
        }
    } else {
        if (length($in_file) ==  0) {
            pgm_exit(1,"ERROR: No input files found in command!\n");
        }
        if (! -f $in_file) {
            pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
        }
    }
}

sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>  (-o) = Write output to this file.\n");
}

# eof - poconvert.pl
index -|- top