#!/perl -w # NAME: parse-xml02.pl # AIM: My attempt at my OWN XML parser # This is the second attempt, with a complete RE-WRITE # 27/01/2010 geoff mclane http://geoffair.net/mperl use strict; use warnings; use Data::Dumper; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); # autogeneration of functions # auto-generate a set of functions # my @TTColors = qw( red green blue white ); # for $name (@TTColors) { # no strict 'refs'; # allow symbol table manipulation # *$name = *{uc $name} = sub { "@_"; } # } # Options my $add_blank_attribs = 0; # features my $load_log = 1; my $add_header = 1; #my $show_comments = 1; my $keep_path_order = 1; #my $exclude_blanks = 0; my $out_xml = "tempxml5.xml"; my $out_xml2 = "tempxml7.xml"; # content hash reference strings my $x_chr_cont = 'content'; my $x_chr_attr = 'attributes'; # element types my $XT_HEADER = 1; # xml header my $XT_COMMENT = 2; # comments my $XT_DOCTYPE = 3; # doctype ]> my $XT_CDATA = 4; # cdata # elements my $XT_ELE1 = 5; # simple my $XT_ELE2 = 6; # closed my $XT_ELE3 = 7; # complete # stacked in an array my $XAO_TYPE = 0; my $XAO_TEXT = 1; my $XAO_TAG = 2; my $XAO_PRE = 3; my $XAO_LNNUM = 4; my $XAO_STACK = 5; # options during parse my $XO_SHOW1 = 1; my $XO_SHOW2 = 2; my $XO_SHOW3 = 4; my $XO_SHOW4 = 8; my %xml_type_names = ( $XT_HEADER => 'header', $XT_COMMENT => 'comment', $XT_DOCTYPE => 'doctype', $XT_CDATA => 'cdata', $XT_ELE1 => 'open', $XT_ELE2 => 'close', $XT_ELE3 => 'complete' ); # special hash strings my $x_cont = 'ContentArray'; my $x_warn = 'ErrorWarnings'; my $x_root = 'DocRoot'; my $x_file = 'FileName'; # information on XML # from : http://www.w3.org/TR/REC-xml # XML documents SHOULD begin with an XML declaration - XML HEADER # '' - XML DOCTYPE #my $in_file = 'tests.xml'; #my $in_file = 'test4.xml'; #my $in_file = 'test3.xml'; #my $in_file = 'test8.xml'; # has an ERROR #my $in_file = 'test9.xml'; # has an ERROR #my $in_file = 'C:\DTEMP\libxml2-2.6.30\result\slashdot16.xml'; # UTF-16 file #my $in_file = 'C:\DTEMP\FG\CubeServ420.xml'; my $in_file = 'C:\FGCVS\FlightGear\data/Aircraft/B-2/B-2-set.xml'; #my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\787\\787.xml"; #my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\787\\787-set.xml"; #my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\c172p\\c172p-set.xml"; #my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\737-300\\737-300-set.xml"; #my $in_file = "tempxml6.xml"; # debug my @warnings = (); # =================================================== my $test_xml = < Title

contentitalics

EOF # =================================================== my $test_xml2 = < Cessna 172P Skyhawk (1981 model) David Megginson production jsb c172p Aircraft/c172p/Models/c172p.xml n301dp 0 Aircraft/c172p/splash.png false true -0.21 0.235 0.36 -12 Aircraft/c172p/Systems/KAP140.xml Aircraft/c172p/c172-sound.xml 1 0 0 0 0 0 0 0.027 0.0 3 EOF my $test_xml3 = < ]> Hello, world! EOF sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub show_warnings() { if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { ###prt( "No warnings issued.\n\n" ); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if ( !($msg =~ /\n$/) ); prt($msg); } show_warnings(); close_log($outfile,$load_log); exit($val); } ########################################################## # ### MY XML PARSER ### sub xml_get_content_hr($$) { my ($txt,$rat) = @_; my %h = (); $h{$x_chr_cont} = $txt; if ($add_blank_attribs) { $h{$x_chr_attr} = $rat; } else { if (scalar keys(%{$rat})) { $h{$x_chr_attr} = $rat; } } return \%h; } sub xml_get_type_name($) { my ($typ) = shift; if (defined $xml_type_names{$typ}) { return $xml_type_names{$typ}; } return 'Unknown $typ!'; } sub ret_stack_text($) { my ($ra) = @_; my $rtxt = ''; foreach my $tx (@{$ra}) { $rtxt .= '/' if (length($rtxt)); $rtxt .= ${$tx}[0]; } return $rtxt; } sub ret_stack_text_simple($) { my ($ra) = @_; my $rtxt = ''; my ($tx); foreach $tx (@{$ra}) { $rtxt .= '/' if (length($rtxt)); $rtxt .= $tx; } return $rtxt; } sub check_ele_stack($$$) { my ($res,$rw,$opts) = @_; my $cnt = scalar @{$res}; if ($cnt) { my $wrap = 4; my $msg = "WARNING: Still $cnt items ON THE STACK! "; my $tcnt = 0; $msg .= "\n" if ($cnt > $wrap); for (my $i = 0; $i < $cnt; $i++) { my $refts = ${$res}[$i]; my $toptag = ${$refts}[0]; my $toplnn = ${$refts}[1]; $msg .= "[".$toptag."]($toplnn) "; $tcnt++; if ($tcnt > $wrap) { $tcnt = 0; $msg .= "\n"; } } push(@{$rw},$msg); prtw("$msg\n") if ($opts & $XO_SHOW4); } } # element types # xml header # comments # doctype ]> # cdata # elements # simple # closed # complete sub scan_xml_text($$$) { my ($fil,$text,$opts) = @_; my $len = length($text); my ($i,$cc,$pretxt,$lnn,$element); my ($xitem,$pc,$ppc,$pppc); my ($eletyp,$eleref,$pele,$plnn); my ($stkcnt,$bgnlnn,$stktxt,$msg); my %xmlhash = (); my $rxmlhash = \%xmlhash; my @elestack = (); my @xmlarray = (); my @warns = (); my @error = (); my $doc_root = ''; my $dr_line = 0; my $doc_error = 0; $i = 0; $lnn = 0; $pretxt = ''; $element = ''; $cc = ''; $pc = ''; $ppc = ''; $pppc = ''; while (($i < $len) && !$doc_error) { $eletyp = 0; # accumulate text before an element $pretxt = ''; for (; $i < $len; $i++) { $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if ($cc eq '<'); $pretxt .= $cc; } # accumulate the element $bgnlnn = $lnn; $element = ''; $i++; # bump past '<' char for (; $i < $len; $i++) { $pppc = $ppc; $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if (($cc eq '>')||($cc =~ /\s/)); # stop on '>' OR a SPACE, or out of chars $element .= $cc; # accumulate element } next if (length($element) == 0); prt("$lnn: Process element [$element]\n") if ($opts & $XO_SHOW1); # determine element type if ($element =~ /^\?xml/i) { $eletyp = $XT_HEADER; # is xml header $xitem = $element; if ($cc ne '>') { for (; $i < $len; $i++) { $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if ($cc eq '>'); $xitem .= $cc; } } # store head # $XAO_ TYPE TEXT TAG PRE LNNUM $stktxt = ret_stack_text(\@elestack); push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]); prt("$lnn: Done HEADER <$xitem> END HEADER\n") if ($opts & $XO_SHOW2); $pretxt = ''; $element = ''; $i++; # skip last '>' } elsif ($element =~ /^!--/) { $eletyp = $XT_COMMENT; # comment $xitem = $element; if (!(($cc eq '>')&&($pc eq '-')&&($ppc eq '-'))) { $pc = '*'; # make sure not trapped by for (; $i < $len; $i++) { $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if (($cc eq '>')&&($pc eq '-')&&($ppc eq '-')); $xitem .= $cc; } } prt("$lnn: Done COMMENT <$xitem> END COMMENT\n") if ($opts & $XO_SHOW2); # $XAO_ TYPE TEXT TAG PRE LNNUM $stktxt = ret_stack_text(\@elestack); push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]); $element = ''; $pretxt = ''; $i++; # skip last '>' } elsif ($element =~ /^!DOCTYPE/) { $eletyp = $XT_DOCTYPE; # doctype - $cc has to be a SPACE $xitem = $element; if ($cc ne '>') { for (; $i < $len; $i++) { $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if ($cc eq '>'); $xitem .= $cc; if ($cc eq '[') { # enter DOCTYPE comment $i++; for (; $i < $len; $i++) { $pppc = $ppc; $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); $xitem .= $cc; last if ($cc eq ']'); if (($cc eq '-')&&($pc eq '-')&&($ppc eq '!')&&($pppc eq '<')) { # entered comment in doctype $i++; $pppc = $ppc; $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); $xitem .= $cc; $i++; for (; $i < $len; $i++) { $pppc = $ppc; $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); $xitem .= $cc; last if (($cc eq '>')&&($pc eq '-')&&($ppc eq '-')); } } } } } prt("$lnn: Done DOCTYPE <$xitem> END DT\n") if ($opts & $XO_SHOW2); } else { $msg = "ERROR:$lnn: Closed DOCTYPE> - mal-formed XML!"; push(@error,$msg); prtw("$msg\n") if ($opts & $XO_SHOW4); $doc_error++; last; } # $XAO_ TYPE TEXT TAG PRE LNNUM $stktxt = ret_stack_text(\@elestack); push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]); $pretxt = ''; $element = ''; $i++; # skip last '>' } elsif ($element =~ /^!\[CDATA\[/) { $eletyp = $XT_CDATA; # CDATA $xitem = $element; if (!(($cc eq '>') && ($pc eq ']') && ($ppc eq ']'))) { for (; $i < $len; $i++) { $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if (($cc eq '>')&&($pc eq ']')&&($ppc eq ']')); $xitem .= $cc; } } prt("$lnn: Done CDATA <$xitem> END CDATA\n") if ($opts & $XO_SHOW2); # $XAO_ TYPE TEXT TAG PRE LNNUM $stktxt = ret_stack_text(\@elestack); push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]); $element = ''; $pretxt = ''; $i++; # skip last '>' } # ================================================================= if (length($element)) { # an element, which may be complete ie end in '/>', and may have attributes a="b" $xitem = $element; if ($cc eq '>') { if ($element =~ /^\//) { $xitem = $element; $element = substr($element,1); # remove leading '/' from element $eletyp = $XT_ELE2; # open, now closed - so pop } elsif ($pc eq '/') { # open/closed element $eletyp = $XT_ELE3; } else { $eletyp = $XT_ELE1; # open, so push } } else { for (; $i < $len; $i++) { $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if ($cc eq '>'); $xitem .= $cc; } if ($element =~ /^\//) { $element = substr($element,1); $eletyp = $XT_ELE2; # open, now closed - so pop } elsif ($pc eq '/') { # open/close element $eletyp = $XT_ELE3; } else { $eletyp = $XT_ELE1; # open, so push } } if ($eletyp == $XT_ELE1) { push(@elestack,[$element,$lnn]); $stkcnt = scalar @elestack; if ($stkcnt == 1) { if (length($doc_root)) { $msg = "ERROR:$lnn: Have doc root [$doc_root]($dr_line), now 2nd root [$element]($lnn)!"; push(@error,$msg); prtw("$msg\n") if ($opts & $XO_SHOW4); $doc_error++; last; } $doc_root = $element; $dr_line = $lnn; } prt("$lnn: PUSHED [$element] ($stkcnt)\n") if ($opts & $XO_SHOW3); } elsif ($eletyp == $XT_ELE2) { if (@elestack) { $eleref = $elestack[-1]; $pele = ${$eleref}[0]; $plnn = ${$eleref}[1]; if ($element eq $pele) { pop @elestack; $stkcnt = scalar @elestack; prt("$lnn: POPPED [$element] ($stkcnt)\n") if ($opts & $XO_SHOW3); } else { $msg = "WARNING:$lnn: Element [$element] NOT last. Last is [$pele]($plnn)! NO POP"; push(@warns,$msg); prtw("$msg\n") if ($opts & $XO_SHOW4); } } else { $msg = "WARNING:$lnn: Element [$element] NOT ON EMPTY STACK! NO POP"; push(@warns,$msg); prtw("$msg\n") if ($opts & $XO_SHOW4); } } # $XAO_ TYPE TEXT TAG PRE LNNUM $stktxt = ret_stack_text(\@elestack); push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]); $pretxt = ''; $element = ''; $i++; # skip last '>' } } if ($i < $len) { $msg = "WARNING:$lnn: Still ".($len - $i)." characters in file [$fil] NOT PARSED!"; push(@warns,$msg); prtw("$msg\n") if ($opts & $XO_SHOW4); } check_ele_stack(\@elestack,\@warns,$opts); # if (!$doc_error); # fill up the HASH with collections # ================================= ${$rxmlhash}{$x_warn} = [ \@warns, \@error ] if (@warns || @error); ${$rxmlhash}{$x_cont} = \@xmlarray; ${$rxmlhash}{$x_root} = $doc_root; ${$rxmlhash}{$x_file} = $fil; # ================================= return $rxmlhash; } sub has_utf_16_BOM($) { my ($fil) = shift; if (open INF, "<$fil") { binmode INF; my $buf = ""; if ((read INF, $buf, 2) == 2) { close INF; my $od1 = ord(substr($buf,0,1)); my $od2 = ord(substr($buf,1,1)); if (($od1 == 0xFF)&&($od2 == 0xFE)) { return (16+2); # LittleEndians (windows) } elsif (($od1 == 0xFE)&&($od2 == 0xFF)) { return (16+4); # BigEndians (unix) } elsif ($od1 == 0) { return 4; } elsif ($od2 == 0) { return 2; } return 1; } close INF; } return 0; } sub parse_xml_file($) { my ($fil) = @_; my $bom = has_utf_16_BOM($fil); if (!open INF, "<$fil") { pgm_exit(1,"ERROR: Unable to open file [$fil]!"); } if ($bom & 2) { binmode INF, ":encoding(UTF-16LE)"; } elsif ($bom & 4) { binmode INF, ":encoding(UTF-16BE)"; } my @lines = ; close INF; $lines[0] = substr($lines[0],1) if ($bom & 16); # move PAST the BOM my $text = join("",@lines); my $len = length($text); my $lnn = scalar @lines; prt("Processing $lnn lines, $len chars, from "); prt("\n ") if (length($fil) > 24); prt("[$fil]"); if ($bom & 6) { prt(" UTF-16LE") if ($bom & 2); prt(" UTF-16BE") if ($bom & 4); prt("(BOM)") if ($bom & 16); } prt("\n"); return scan_xml_text($fil,$text,0); } sub get_warn_error_text($) { my ($rh) = @_; my $text = ''; if (defined ${$rh}{$x_warn}) { my $rwarn = ${$rh}{$x_warn}[0]; my $rerror = ${$rh}{$x_warn}[1]; my ($err); if (@{$rwarn}) { foreach $err (@{$rwarn}) { $text .= "\n"; } } if (@{$rerror}) { foreach $err (@{$rerror}) { $text .= "\n"; } } } return $text; } sub get_xml_ref_hash_text($$) { my ($rh,$opts) = @_; if (!defined ${$rh}{$x_cont}) { return "\n"; } my $ra = ${$rh}{$x_cont}; my $cnt = scalar @{$ra}; my $text = ''; my $ind = 0; my $in; my ($i,$eletyp,$xitem,$element,$pretxt,$bgnlnn,$eleref,$prev); $prev = ''; $text .= get_warn_error_text($rh); # $XAO_ TYPE TEXT TAG PRE LNNUM #ush(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn ]); for ($i = 0; $i < $cnt; $i++) { $eleref = ${$ra}[$i]; $eletyp = ${$eleref}[$XAO_TYPE]; $xitem = ${$eleref}[$XAO_TEXT]; $element = ${$eleref}[$XAO_TAG]; $pretxt = ${$eleref}[$XAO_PRE]; $bgnlnn = ${$eleref}[$XAO_LNNUM]; # element types $in = ' ' x $ind; if ($eletyp == $XT_HEADER) { # xml header $text .= "<$xitem>\n"; } elsif ($eletyp == $XT_COMMENT) { # comments if ($opts & 1) { $text .= "$in<$xitem>\n"; } } elsif ($eletyp == $XT_DOCTYPE) { # doctype ]> $text .= "<$xitem>\n"; } elsif ($eletyp == $XT_CDATA) { # cdata if ($opts & 2) { $text .= "$in<$xitem>\n"; } else { $text =~ s/\n$//; $text .= "<$xitem>\n"; } } elsif ($eletyp == $XT_ELE1) { # simple $ind++; $text .= "$in<$xitem>\n"; $prev = $element; } elsif ($eletyp == $XT_ELE2) { # closed $ind-- if ($ind); $in = ' ' x $ind; $pretxt = trim_all($pretxt); if ($prev eq $element) { $text =~ s/\n$//; $text .= $pretxt if (length($pretxt) && !($pretxt =~ /^\s+$/)); $text .= "<$xitem>\n"; } else { $text .= $in; if (length($pretxt) && !($pretxt =~ /^\s+$/)) { $text .= "$pretxt\n" if (length($pretxt) && !($pretxt =~ /^\s+$/)); $text .= "$in<$xitem>\n"; } else { $text .= "<$xitem>\n"; } } } elsif ($eletyp == $XT_ELE3) { # complete $text .= "$in<$xitem>\n"; } } return $text; } sub write_xml_output($$) { my ($fil,$rh) = @_; my $xout = get_xml_ref_hash_text($rh,0); write2file($xout,$fil); prt("XML written to $fil file...\n"); # prt($xout); } sub xml_pgm_exit($) { my ($rh) = @_; if (defined ${$rh}{$x_warn}) { my $txt = get_warn_error_text($rh); prt($txt); pgm_exit(1,"Exit with warnings, errors..."); } else { pgm_exit(0,"Normal exit."); } } sub is_in_array_ref($$) { my ($tag,$rarr) = @_; my $cnt = scalar @{$rarr}; my ($i,$ele); for ($i = 0; $i < $cnt; $i++) { $ele = ${$rarr}[$i]; return ($i+1) if ($ele eq $tag); } return 0; } sub is_in_array_ref_0($$) { my ($tag,$rarr) = @_; my $cnt = scalar @{$rarr}; my ($i,$ele); for ($i = 0; $i < $cnt; $i++) { $ele = ${$rarr}[$i]; # extract ref return ($i+1) if (${$ele}[0] eq $tag); } return 0; } sub get_att_ref($) { my ($txt) = @_; $txt = substr($txt,1) while ($txt =~ /^\s/); # clear any leading spaces my $len = length($txt); my ($i,$cc,$key,$val); $i = 0; my %h = (); while ($i < $len) { $key = ''; for (; $i < $len; $i++) { $cc = substr($txt,$i,1); if ($cc eq '=') { $i++; $cc = substr($txt,$i,1); last; } $key .= $cc; } return \%h if ($cc ne '"'); $i++; $val = ''; for (; $i < $len; $i++) { $cc = substr($txt,$i,1); last if ($cc eq '"'); $val .= $cc; } $h{$key} = $val; $i++; # bump over 2nd inverted commas # and eat any spaces for (; $i < $len; $i++) { $cc = substr($txt,$i,1); last if ( !($cc =~ /\s/) ); } } return \%h; } sub show_ele_hash($$) { my ($reh,$out) = @_; my ($key,$cnt,$cont,$xitem,$msg,$attrs,$atth); my ($k,$v,$ec); $cnt = scalar keys(%{$reh}); prt("Show element hash - count $cnt...\n") if ($out); $cnt = 0; foreach $key (keys %{$reh}) { $cnt++; $ec = ${$reh}{$key}{'count'}; $msg = "$cnt: [$key]($ec) "; #prt("$cnt: key [$key] ".${$reh}{$key}{'count'}." "); $xitem = $key; $attrs = ''; if (defined ${$reh}{$key}{'xitem'}) { $xitem = ${$reh}{$key}{'xitem'}; $attrs = $xitem; $attrs =~ s/$key//; # strip OFF the key $attrs = trim_all($attrs); #$msg .= "[$attrs] "; } $atth = get_att_ref($attrs); if (!defined ${$reh}{$key}{'attrs'}) { ${$reh}{$key}{'attrs'} = $atth; } $msg .= "<$key"; foreach $k (keys %{$atth}) { $v = ${$atth}{$k}; $msg .= " $k=\"$v\""; } $msg .= ">"; if (defined ${$reh}{$key}{'content'}) { $cont = ${$reh}{$key}{'content'}; if (length($cont)) { #prt("cont [$cont]"); $msg .= "$cont"; } else { $msg .=" EMPTY"; delete ${$reh}{$key}; } } else { $msg .= "No CONTENT!"; delete ${$reh}{$key}; } prt("$msg\n") if ($out); } prt("Done element hash - count $cnt...\n") if ($out); } sub transfer_ele_hash($$$) { my ($sim,$rch,$reh) = @_; my ($key); foreach $key (keys %{$reh}) { ${$rch}{$sim}{$key} = [ ${$reh}{$key}{'content'}, ${$reh}{$key}{'attrs'} ]; } } sub xml_set_hash_ref_value($$$$) { my ($p,$h,$s,$rv) = @_; #prt("path [$p], to set element [$s] to value [$v]\n"); my @a = split('/',$p); my $ac = scalar @a; my $cnt = 0; my ($k,$ra,$pth); foreach $k (@a) { last if ($k eq $s); $cnt++; } if ($cnt >= $ac) { pgm_exit(1,"ERROR: PATH [$p] DOES NOT CONTAIN [$s], VALUE ${$rv}[0]\n"); } if ($cnt == 0) { $pth = $s; if (!defined ${$h}{$s}) { ${$h}{$s} = []; } $ra = ${$h}{$s}; push(@{$ra},$rv); ${$h}{$s} = $ra; } elsif ($cnt == 1) { $pth = $a[0]."/".$s; if (!defined ${$h}{$a[0]}{$s}) { ${$h}{$a[0]}{$s} = []; } $ra = ${$h}{$a[0]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$s} = $ra; } elsif ($cnt == 2) { $pth = $a[0]."/".$a[1]."/".$s; if (!defined ${$h}{$a[0]}{$a[1]}{$s}) { ${$h}{$a[0]}{$a[1]}{$s} = []; } $ra = ${$h}{$a[0]}{$a[1]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$a[1]}{$s} = $ra; } elsif ($cnt == 3) { $pth = $a[0]."/".$a[1]."/".$a[2]."/".$s; if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$s}) { ${$h}{$a[0]}{$a[1]}{$a[2]}{$s} = []; } $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$a[1]}{$a[2]}{$s} = $ra; } elsif ($cnt == 4) { $pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$s; if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$s}) { ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$s} = []; } $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$s} = $ra; } elsif ($cnt == 5) { $pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$s; if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$s}) { ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$s} = []; } $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$s} = $ra; } elsif ($cnt == 6) { $pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$a[5]."/".$s; if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$s}) { ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$s} = []; } $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$s} = $ra; } elsif ($cnt == 7) { $pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$a[5]."/".$a[6]."/".$s; if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$s}) { ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$s} = []; } $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$s} = $ra; } elsif ($cnt == 8) { $pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$a[5]."/".$a[6]."/".$a[7]."/".$s; if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$s}) { ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$s} = []; } $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$s} = $ra; } elsif ($cnt == 9) { $pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$a[5]."/".$a[6]."/".$a[7]."/".$a[8]."/".$s; if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}{$s}) { ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}{$s} = []; } $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}{$s}; push(@{$ra},$rv); ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}{$s} = $ra; } else { pgm_exit(1,"ERROR: Out of DEPTH - Increase in source.\n"); } } sub xml_get_hash_ref_content($$) { my ($p,$h) = @_; my @a = split('/',$p); my $cnt = scalar @a; my ($ra,$rh); my $rtxt = ''; $ra = [ { } ]; if ($cnt == 1) { if (defined ${$h}{$a[0]}) { $ra = ${$h}{$a[0]}; } } elsif ($cnt == 2) { if (defined ${$h}{$a[0]}{$a[1]}) { $ra = ${$h}{$a[0]}{$a[1]}; } } elsif ($cnt == 3) { if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}) { $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}; } } elsif ($cnt == 4) { if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}) { $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}; } } elsif ($cnt == 5) { if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}) { $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}; } } elsif ($cnt == 6) { if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}) { $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}; } } elsif ($cnt == 7) { if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}) { $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}; } } elsif ($cnt == 8) { if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}) { $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}; } } elsif ($cnt == 9) { if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}) { $ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}; } } else { pgm_exit(1,"ERROR: Out of DEPTH - Increase in source.\n"); } $rh = ${$ra}[0]; $rtxt = (defined ${$rh}{$x_chr_cont}) ? ${$rh}{$x_chr_cont} : "'undef'"; return $rtxt; } # end sub xml_get_hash_ref_content($$) # forward reference sub enum_hash_ref($$$); sub enum_array_ref($$$); sub enum_unknown_ref($$$); sub enum_array_ref($$$) { my ($ar,$lev,$t) = @_; my ($k,$r,$c); my $l2 = $lev + 1; $c = 0; foreach $k (@{$ar}) { $r = ref($k); $c++; if ($r eq 'ARRAY') { enum_array_ref($k,$l2,$t); } elsif ($r eq 'HASH') { enum_hash_ref($k,$l2,$t); } else { prt("$lev:A:$c:$t: $k\n"); } } } sub enum_hash_ref($$$) { my ($hr,$lev,$t) = @_; my ($k,$v,$r,$i,$p); my $l2 = $lev + 1; foreach $k (keys %{$hr}) { $v = ${$hr}{$k}; $r = ref($v); $p = length($t) ? "$t/$k" : $k; if ($r eq 'ARRAY') { enum_array_ref($v,$l2,$p); } elsif ($r eq 'HASH') { enum_hash_ref($v,$l2,$p); } else { prt("$lev:H:$t: $k = $v\n"); } } } sub enum_unknown_ref($$$) { my ($hr,$lev,$t) = @_; my $r = ref($hr); if ($r eq 'HASH') { enum_hash_ref($hr,$lev,$t); } elsif ($r eq 'ARRAY') { enum_array_ref($hr,$lev,$t); } else { prt("$lev:$t: $hr\n"); } } sub xml_get_all_children($$$$) { my ($rh,$sim,$dep,$opts) = @_; my %ch = (); my %ch2 = (); my $rch2 = \%ch2; if (!defined ${$rh}{$x_cont}) { return $rch2; # "\n"; } my $ra = ${$rh}{$x_cont}; my $doc_root = ${$rh}{$x_root}; my $cnt = scalar @{$ra}; my $text = ''; my $ind = 0; my $in; my ($i,$eletyp,$xitem,$element,$pretxt,$bgnlnn,$eleref,$prev,$stktxt); my (@arr,$inarr,$acnt,$diff,$i2,$typnam,$sst,$tpt,$msg,$hadyes); my ($stkpath,$inref); my ($topele,$topref,$attref,$topatt,$chr); my @tagstack = (); my %elehash = (); my $dbg_ln = $opts; $prev = ''; $hadyes = 0; $inref = 0; prt("Get all children of [$sim], depth $dep...\n") if ($dbg_ln); $text .= get_warn_error_text($rh); # $XAO_ TYPE TEXT TAG PRE LNNUM #ush(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn ]); for ($i = 0; $i < $cnt; $i++) { $i2 = $i + 1; $eleref = ${$ra}[$i]; $eletyp = ${$eleref}[$XAO_TYPE]; $xitem = ${$eleref}[$XAO_TEXT]; $element = ${$eleref}[$XAO_TAG]; $pretxt = ${$eleref}[$XAO_PRE]; $bgnlnn = ${$eleref}[$XAO_LNNUM]; $stktxt = ${$eleref}[$XAO_STACK]; next if (length($stktxt) == 0); $sst = $stktxt; $sst =~ s/^$doc_root\///; $typnam = xml_get_type_name($eletyp); # element types $in = ' ' x $ind; @arr = split('/',$stktxt); $acnt = scalar @arr; $inarr = is_in_array_ref($sim,\@arr); $msg = "$i2: $typnam [$element][$sst]"; $msg .= " $inarr of $acnt "; if ($inarr) { $tpt = trim_all($pretxt); $diff = ($acnt - $inarr); if ($dep <= 0) { $msg .= "YesD"; } elsif ($diff <= $dep) { $msg .= "Yes"; } else { $msg .= "Yes, but DEPTH!"; $inarr = 0; } $msg .= " $tpt" if (length($tpt)); prt("$msg\n") if ($dbg_ln); $hadyes++; } else { if ($element eq $sim) { $msg .= "NO, but YES because element is $sim"; $hadyes++; $inarr = 1; } else { $msg .="NO"; $hadyes-- if ($hadyes); } prt("$msg\n") if ($dbg_ln); } next if (!$inarr); if ($eletyp == $XT_HEADER) { # xml header #$text .= "<$xitem>\n"; } elsif ($eletyp == $XT_COMMENT) { # comments #if ($opts & 1) { # $text .= "$in<$xitem>\n"; #} } elsif ($eletyp == $XT_DOCTYPE) { # doctype ]> #$text .= "<$xitem>\n"; } elsif ($eletyp == $XT_CDATA) { # cdata $text .= "$i2:" if ($dbg_ln); if ($opts & 2) { $text .= "$in<$xitem>\n"; } else { $text =~ s/\n$//; $text .= "<$xitem>\n"; } } elsif ($eletyp == $XT_ELE1) { # simple #$ind++; $text .= "$i2:" if ($dbg_ln); $text .= "$in<$xitem>\n"; $prev = $xitem; $prev =~ s/^$element//; $attref = get_att_ref($prev); $prev = $element; $stkpath = ret_stack_text(\@tagstack); push(@tagstack,[$element,$attref]); $ind = scalar @tagstack; if (defined $elehash{$element}) { $elehash{$element}{'count'}++; } else { $elehash{$element}{'count'} = 1; $elehash{$element}{'line'} = $i2; $elehash{$element}{'xitem'} = $xitem; } } elsif ($eletyp == $XT_ELE2) { # closed #$ind-- if ($ind); $in = ' ' x $ind; $pretxt = trim_all($pretxt); $inref = is_in_array_ref_0($element,\@tagstack); if (@tagstack) { $ind = scalar @tagstack; $topref = $tagstack[-1]; $topele = ${$topref}[0]; $topatt = ${$topref}[1]; if (!$inref) { prtw("WARNING: element [$element] NOT IN STACK!\n"); next; } elsif ($inref != $ind) { prtw("WARNING: element [$element] NOT LAST STACK! last [$topele]\n"); next; } } else { prtw("WARNING: element [$element] NOT IN EMPTY STACK!\n"); next; } $stkpath = ret_stack_text(\@tagstack); pop @tagstack; $ind = scalar @tagstack; if (defined $elehash{$element}) { #$elehash{$element}{'count'}-- if ($elehash{$element}{'count'}); $elehash{$element}{'content'} = $pretxt; } $in = ' ' x $ind; if ($prev eq $element) { $text =~ s/\n$//; $text .= $pretxt if (length($pretxt) && !($pretxt =~ /^\s+$/)); $text .= "$i2:" if ($dbg_ln); $text .= "<$xitem>\n"; } else { $text .= $in; if (length($pretxt) && !($pretxt =~ /^\s+$/)) { $text .= "$pretxt\n" if (length($pretxt) && !($pretxt =~ /^\s+$/)); $text .= "$i2:" if ($dbg_ln); $text .= "$in<$xitem>\n"; } else { $text .= "$i2:" if ($dbg_ln); $text .= "<$xitem>\n"; } } if (length($pretxt)) { $chr = xml_get_content_hr($pretxt,$topatt); xml_set_hash_ref_value($stkpath,$rch2,$element,$chr); } } elsif ($eletyp == $XT_ELE3) { # complete $text .= "$i2:" if ($dbg_ln); $text .= "$in<$xitem>\n"; } } #prt(Dumper($rch2)); #enum_hash_ref(\%ch2,0,''); #enum_hash_ref($rch2,0,''); prt("Debug parse text...\n$text\nEnd Debug parse text\n") if ($dbg_ln); # ========================================== show_ele_hash( \%elehash, 0 ); show_ele_hash( \%elehash, 0 ); transfer_ele_hash( $sim, \%ch, \%elehash ); # ========================================== ### pgm_exit(1,"TEMP EXIT"); #return \%ch; #return \%ch2; return $rch2; } sub show_child_hash($$$) { my ($rc,$sim,$opt) = @_; my $ri = ${$rc}{$sim}; my ($key,$rah,$ritm,$msg,$k,$v,$cont,$cnt); $cnt = scalar keys( %{$ri} ); prt("Show $cnt children hash...\n"); foreach $key (keys %{$ri}) { $msg = "<$key"; $ritm = ${$rc}{$sim}{$key}; $cont = ${$ritm}[0]; $rah = ${$ritm}[1]; foreach $k (keys %{$rah}) { $v = ${$rah}{$k}; $msg .= " $k=\"$v\""; } $msg .= ">$cont"; prt("$msg\n"); } prt("Done $cnt children hash...\n"); } sub show_child_hash2($$$) { my ($rc,$sim,$opt) = @_; my $ri = ${$rc}{$sim}; my @arr = qw(status description aero flight-model author); my ($key,$p,$txt,$min,$len); $min = 0; foreach $key (@arr) { $len = length($key); $min = $len if ($len > $min); } foreach $key (@arr) { $p = "$sim/$key"; $txt = xml_get_hash_ref_content($p,$rc); $key .= ' ' while (length($key) < $min); $key .= ':'; prt("$key $txt\n"); } } sub show_fg_sim_references($) { my ($rh) = @_; my $rc = xml_get_all_children($rh,"sim",0,0); my ($txt,$cnt,$v,$h); #enum_hash_ref($rc,0,''); #enum_unknown_ref($rc,0,''); #prt(Dumper($rc)); #$txt = xml_get_hash_ref_content('sim/status',$rc); #prt("Got sim/status txt = [$txt]\n"); #$txt = xml_get_hash_ref_content('sim/status2',$rc); #prt("Got sim/status2 txt = [$txt]\n"); #if (defined ${$rc}{'sim'}{'status'}[0]{'content'}) { ##if (defined ${$rc}{'sim'}{'status'}) { #$v = ${$rc}{'sim'}{'status'}; #$h = ${$v}[0]; #if (defined ${$h}{'content'}) { #$txt = ${$h}{'content'}; #prt("Got sim/status/h[content] $txt\n"); #} else { #prt("Got sim/status $v\n"); #} #} #show_child_hash($rc,"sim",0); show_child_hash2($rc,"sim",0); } #################################### # ### MAIN ### #my $ref_hash = parse_xml_text("test",$test_xml2); #my $ref_hash = parse_xml_text("test",$test_xml3); #my $ref_hash = scan_xml_text('test',$test_xml3); #my $ref_hash = scan_xml_text('test',$test_xml2); parse_args(@ARGV); my $ref_hash = parse_xml_file($in_file); if (length($out_xml)) { write_xml_output($out_xml,$ref_hash); } if (${$ref_hash}{$x_root} eq 'PropertyList') { show_fg_sim_references($ref_hash); } xml_pgm_exit($ref_hash); #################################### sub give_help { prt("$pgmname: version 0.0.9 2010/01/29\n"); prt("Usage: $pgmname [options] input_file_name\n"); prt("Options:\n"); prt(" -h (-?) = This help, and exit.\n"); prt(" -i file = Alternate for input file name.\n"); prt(" -l = Load log at end.\n"); prt("Input file name will be parsed as an XML file.\n"); pgm_exit(0,"Help exit"); } sub need_arg { my ($a,@av) = @_; if (!@av) { pgm_exit(1,"ERROR: Arg [$a] MUST be followed by a 2nd argument! Aborting...\n"); } } sub parse_args { my (@av) = @_; my ($arg,$sarg); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg =~ /^\?/)) { give_help($arg); } elsif ($sarg =~ /^i/i) { need_arg(@av); shift @av; $arg = $av[0]; $in_file = $arg; } elsif ($sarg =~ /^l/i) { $load_log = 1; } else { pgm_exit(1,"ERROR: Unknown argument [$arg]! Aborting...\n"); } } else { $in_file = $arg; } shift @av; } } # eof - oarse-xml02.pl