Generated: Tue Feb 2 17:54:52 2010 from parse-xml.pl 2010/01/26 50.4 KB.
#!/perl -w # NAME: parse-xml.pl # AIM: My attempt at my OWN XML parser use strict; use warnings; use Data::Dumper; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); # information on XML # from : http://www.w3.org/TR/REC-xml # XML documents SHOULD begin with an XML declaration - XML HEADER # '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' - XML DOCTYPE my $in_file = 'test4.xml'; #my $in_file = 'test3.xml'; #my $in_file = 'C:\DTEMP\FG\CubeServ420.xml'; #my $in_file = 'C:\FGCVS\FlightGear\data/Aircraft/B-2/B-2-set.xml'; #my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\787\\787.xml"; #my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\787\\787-set.xml"; #my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\c172p\\c172p-set.xml"; #my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\737-300\\737-300-set.xml"; #my $in_file = "tempxml6.xml"; # features my $load_log = 1; my $add_header = 1; #my $show_comments = 1; my $keep_path_order = 1; #my $exclude_blanks = 0; my $out_xml = "tempxml5.xml"; my $out_xml2 = "tempxml7.xml"; # special hash strings my $xml_header = "=XML HEADER="; #my $xml_comments = "=XML COMMENT="; my $xml_pathorder = "=XML PATHS="; my $xml_doctype = "=XML DOCTYPE="; # special hash in hash strings my $x_name = 'Name'; my $x_cont = 'Content'; my $x_attr = 'Attributes'; my $x_line = 'Line'; my $x_type = 'Type'; # 1 = <a>...</a>, 2 = <a .../> my $stk_empty = "=*EMPTY*="; my @warnings = (); # debug my $dbg_lines = 1; # show EACH line (trimmed) of XML my $dbg_x01 = 0; # prt("[dbg_x01] Found [$sf]\n") if ($dbg_x01); my $dbg_x02 = 0; # prt("[$dbg_x02] COMMENT:[$tag]\n") if ($dbg_x02); my $dbg_x03 = 0; # prt("[dbg_x03] HEAD:[$tag]\n") if ($dbg_x03); my $dbg_x04 = 0; # prt("[dbg_x04] CLOSE1:[$tag]\n") if ($dbg_x04); my $dbg_x05 = 1; # prt("[dbg_x05] $lnn:$cols: Stacked 1 [$currtag]\n") if ($dbg_x05); see also $dbg_x11 my $dbg_x06 = 1; # prt("[dbg_x06] $lnn:$cols: Stacked 2 [$currtag]\n") if ($dbg_x06); see also $dbg_x16 my $dbg_x07 = 1; # prt("[dbg_x07] $lnn:$cols: UNStacked 1 [$tmptag], currtag [$currtag]\n") if ($dbg_x07); my $dbg_x08 = 1; # prt("[dbg_x08] $lnn:$cols: UNStacked 2 [$tmptag], currtag [$currtag]\n") if ($dbg_x08); also see $dbg_x14 my $dbg_x09 = 0; # prt("[dbg_x09] Done $lnn lines, from [$fil]. $len chars...\n") if ($dbg_x09); my $dbg_x10 = 1; # prt("[dbg_x10] $lnn: END CDATA:<$tag>\n") if ($dbg_x10); # new set of OPEN and CLOSE debug strings my $dbg_x11 = 0; # prt("[dbg_x11] $lnn: $stktxt - [$currtag] open 1 - no attrs - text [$ttxt]\n") if ($dbg_x11); seel also $dbg_x05 my $dbg_x12 = 0; # prt("[dbg_x12]HASH:$lnn: $msg\n") if ($dbg_x12); my $dbg_x13 = 0; # prt("[dbg_x13]$lnn: $stktxt - [$tmptag] content [$ttxt] tag [$tag] closed BLANK\n") if ($dbg_x13); my $dbg_x14 = 0; # prt("[dbg_x14]$lnn: $stktxt - [$tmptag] content [$ttxt] tag [$tag] closed SAVED2 $savedcnt ($tcntc)\n") if ($dbg_x14); also see $dbg_x08 my $dbg_x15 = 0; # prt("[dbg_x15]$lnn: $stktxt - [$currtag] atts [$tmpcont] CHECKME [ "); +++ my $dbg_x16 = 0; # prt("[dbg_x16]$lnn: $stktxt - [$currtag] open 2\n") if ($dbg_x16); also see $dbg_x06 my $dbg_x17 = 0; # prt("[dbg_x17]$lnn: $stktxt - close 2 of [$tmptag] content [$tmpcont] SAVED $savedcnt ($tcntc)\n") if ($dbg_x17); my $dbg_x18 = 1; # prt("[dbg_x18] $lnn:$cols: Done DOCTYPE [$tag]\n") if ($dbg_x18); my $dbg_x19 = 1; # prt("[dbg_x19]$lnn: POPPED 1 [$toptag] ($stkcnt) $stktxt\n") if ($dbg_x19); and POPPED 2 my $dbg_x20 = 1; # prt("[dbg_x20]$lnn:$cols: $cc Proceed to END OF TAG - INNER LOOP\n") if ($dbg_x20); # =================================================== my $test_xml = <<EOF; <?xml version=\"1.0\"?> <html> <head> <title>Title</title> </head> <body> <p><b>content</b><i>italics</i></p> <attrs new="this" old="that" /> </body> </html> EOF # =================================================== my $test_xml2 = <<EOF; <?xml version="1.0"?> <!-- ************************************************************************ JSBSim Cessna 172P with 3D cockpit. Started October 23 2001 by John Check, fgpanels\@rockfish.net ************************************************************************ --> <PropertyList> <sim> <description>Cessna 172P Skyhawk (1981 model)</description> <author>David Megginson</author> <status>production</status> <flight-model archive="y">jsb</flight-model> <aero archive="y">c172p</aero> <model> <path archive="y">Aircraft/c172p/Models/c172p.xml</path> <livery> <file type="string">n301dp</file> <index type="int">0</index> </livery> </model> <startup> <splash-texture>Aircraft/c172p/splash.png</splash-texture> </startup> <!-- hide the 2D panel --> <panel> <visibility archive="y">false</visibility> </panel> <!-- position the pilot viewpoint and angle --> <view> <internal archive="y">true</internal> <config> <x-offset-m archive="y">-0.21</x-offset-m> <y-offset-m archive="y">0.235</y-offset-m> <z-offset-m archive="y">0.36</z-offset-m> <pitch-offset-deg>-12</pitch-offset-deg> </config> </view> <systems> <autopilot> <path>Aircraft/c172p/Systems/KAP140.xml</path> </autopilot> <electrical> <!-- null electrical system path here so we can use a nasal based --> <!-- model defined later in the nasal section of this file. --> <path></path> </electrical> </systems> <sound> <path archive="y">Aircraft/c172p/c172-sound.xml</path> </sound> <help include="c172-help.xml"/> <tutorials include="Tutorials/c172-tutorials.xml"/> <multiplay> <chat_display>1</chat_display> <generic> <int type="int">0</int> <int type="int">0</int> <int type="int">0</int> <int type="int">0</int> <int type="int">0</int> <int type="int">0</int> </generic> </multiplay> <menubar include="Dialogs/c172p-menu.xml"/> </sim> <!-- trim for level cruise --> <controls> <flight> <aileron-trim>0.027</aileron-trim> <rudder-trim>0.0</rudder-trim> </flight> <engines> <engine n="0"> <magnetos>3</magnetos> </engine> </engines> </controls> </PropertyList> EOF my $test_xml3 = <<EOF; <?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE greeting [ <!-- comment --> <!ELEMENT greeting (#PCDATA)> ]> <greeting>Hello, world!</greeting> EOF sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub show_warnings() { if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "No warnings issued.\n\n" ); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if ( !($msg =~ /\n$/) ); prt($msg); } show_warnings(); close_log($outfile,$load_log); exit($val); } ########################################################## # ### MY XML PARSER ### sub get_first_line($$) { my ($txt,$ln) = @_; my $max = length($txt); my $nxline = ''; my ($k,$cc); for ($k = 0; $k < $max; $k++) { $cc = substr($txt,$k,1); last if ($cc eq "\n"); $nxline .= $cc; } $nxline = trim_all($nxline); prt("$ln:[$nxline]\n") if ($dbg_lines); return $nxline; } sub is_cdata($) { my ($txt) = @_; if ($txt =~ /^\[CDATA\[/) { return 1; } return 0; } sub is_doctype($) { my ($txt) = @_; if ($txt =~ /^!DOCTYPE/) { return 1; } return 0; } sub ret_stack_text($) { my ($ra) = @_; my $rtxt = ''; foreach my $tx (@{$ra}) { $rtxt .= '/' if (length($rtxt)); $rtxt .= ${$tx}[0]; } return $rtxt; } sub ret_stack_text_simple($) { my ($ra) = @_; my $rtxt = ''; foreach my $tx (@{$ra}) { $rtxt .= '/' if (length($rtxt)); $rtxt .= $tx; } return $rtxt; } sub get_ref_tag($$) { my ($rh,$ra) = @_; my $rtxt = ''; my $rrh = $rh; foreach my $tx (@{$ra}) { $rrh = ${$rrh}{$tx}; } return $rrh; } sub get_att_ref($) { my ($txt) = @_; $txt = substr($txt,1) while ($txt =~ /^\s/); # clear any leading spaces my $len = length($txt); my ($i,$cc,$key,$val); $i = 0; my %h = (); while ($i < $len) { $key = ''; for (; $i < $len; $i++) { $cc = substr($txt,$i,1); if ($cc eq '=') { $i++; $cc = substr($txt,$i,1); last; } $key .= $cc; } return \%h if ($cc ne '"'); $i++; $val = ''; for (; $i < $len; $i++) { $cc = substr($txt,$i,1); last if ($cc eq '"'); $val .= $cc; } $h{$key} = $val; $i++; # bump over 2nd inverted commas # and eat any spaces for (; $i < $len; $i++) { $cc = substr($txt,$i,1); last if ( !($cc =~ /\s/) ); } } return \%h; } sub get_save_ref($$$$$) { my ($tag,$attr,$cont,$lnn,$typ) = @_; my %h = (); $h{$x_name} = $tag; $h{$x_attr} = $attr; $h{$x_cont} = $cont; $h{$x_line} = $lnn; $h{$x_type} = $typ; my ($msg,$k,$v,$ats); $ats = ''; foreach $k (keys %{$attr}) { $v = ${$attr}{$k}; $ats .= ' ' if (length($ats)); $ats .= "$k=\"$v\""; } $ats = "none" if (length($ats) == 0); $msg = "$x_name=[$tag], $x_cont=[$cont], "; $msg .= "$x_attr=[$ats]"; prt("[dbg_x12]HASH:$lnn: $msg\n") if ($dbg_x12); return \%h; } sub check_ele_stack($) { my ($res) = @_; my $cnt = scalar @{$res}; if ($cnt) { my $wrap = 4; my $msg = "WARNING: Still $cnt items ON THE STACK! "; my $tcnt = 0; $msg .= "\n" if ($cnt > $wrap); for (my $i = 0; $i < $cnt; $i++) { my $refts = ${$res}[$i]; my $toptag = ${$refts}[0]; my $toplnn = ${$refts}[1]; $msg .= "[".$toptag."]($toplnn) "; $tcnt++; if ($tcnt > $wrap) { $tcnt = 0; $msg .= "\n"; } } prtw("$msg\n"); } } # element types # xml header <?xml ... ?> # comments <!-- ... --> # doctype <!DOCTYPE ... [ <!-- ... --> ]> # elements # simple <element attribs> # closed </element> # complete <element attribs/> sub scan_xml_text($$) { my ($fil,$text) = @_; my $len = length($text); my ($i,$cc,$pretxt,$lnn,$element); my ($xitem,$pc,$ppc,$pppc); my ($eletyp,$eleref,$pele,$plnn); my ($stkcnt); my %xml = (); my $rxml = \%xml; my @elestack = (); my @comstack = (); $i = 0; $lnn = 0; $pretxt = ''; $element = ''; $cc = ''; $pc = ''; $ppc = ''; $pppc = ''; while ($i < $len) { $eletyp = 0; # accumulate text before an element $pretxt = ''; for (; $i < $len; $i++) { $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if ($cc eq '<'); $pretxt .= $cc; } # accumulate the element $element = ''; $i++; # bump past '<' char for (; $i < $len; $i++) { $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if (($cc eq '>')||($cc =~ /\s/)); # stop on '>' OR a SPACE, or out of chars $element .= $cc; # accumulate element } next if (length($element) == 0); prt("$lnn: Process element [$element]\n"); # determine element type if ($element =~ /^\?xml/i) { # is xml header $xitem = $element; if ($cc ne '>') { $xitem .= $cc; for (; $i < $len; $i++) { $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if ($cc eq '>'); $xitem .= $cc; } } # store head my @cs1 = @comstack; ${$rxml}{$xml_header} = [$pretxt,\@cs1,$xitem]; $pretxt = ''; @comstack = (); prt("$lnn: Done HEADER <$xitem>\n"); $element = ''; } elsif ($element =~ /^!--/) { # comment $xitem = $element; if (!(($cc eq '>')&&($pc eq '-')&&($ppc eq '-'))) { $pc = '*'; # make sure not trapped by <!--> $xitem .= $cc; for (; $i < $len; $i++) { $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if (($cc eq '>')&&($pc eq '-')&&($ppc eq '-')); $xitem .= $cc; } } push(@comstack,$xitem); prt("$lnn: Done COMMENT <$xitem>\n"); $element = ''; } elsif ($element =~ /^!DOCTYPE/) { # doctype - $cc has to be a SPACE $xitem = $element; if ($cc ne '>') { $xitem .= $cc; for (; $i < $len; $i++) { $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if ($cc eq '>'); $xitem .= $cc; if ($cc eq '[') { # enter DOCTYPE comment $i++; for (; $i < $len; $i++) { $pppc = $ppc; $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); $xitem .= $cc; last if ($cc eq ']'); if (($cc eq '-')&&($pc eq '-')&&($ppc eq '!')&&($pppc eq '<')) { # entered comment in doctype $i++; $pppc = $ppc; $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); $xitem .= $cc; $i++; for (; $i < $len; $i++) { $pppc = $ppc; $ppc = $pc; $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); $xitem .= $cc; last if (($cc eq '>')&&($pc eq '-')&&($ppc eq '-')); } } } } } prt("$lnn: Done DOCTYPE <$xitem>\n"); } else { pgm_exit(1,"ERROR: Closed DOCTYPE> - mal-formed XML!\n"); } $element = ''; } # elsif ($element =~ /^\//) { # # begin of a close element - starts with </...> # $xitem = $element; # $element = substr($element,1); # $eletyp = 2; # if ($cc ne '>') { # $xitem .= $cc; # for (; $i < $len; $i++) { # $pc = $cc; # $cc = substr($text,$i,1); # $lnn++ if ($cc eq "\n"); # last if ($cc eq '>'); # $xitem .= $cc; # } # } # # got CLOSED element item # prt("$lnn: Done CLOSE 2 <$xitem>\n"); # } if (length($element)) { # an element, which may be complete ie end in '/>', and may have attributes a="b" $xitem = $element; if ($cc eq '>') { if ($element =~ /^\//) { $xitem = $element; $element = substr($element,1); $eletyp = 2; # open, now closed - so pop } elsif ($pc eq '/') { # open/closed element $eletyp = 3; } else { $eletyp = 1; } } else { $xitem .= $cc; for (; $i < $len; $i++) { $pc = $cc; $cc = substr($text,$i,1); $lnn++ if ($cc eq "\n"); last if ($cc eq '>'); $xitem .= $cc; } if ($element =~ /^\//) { $element = substr($element,1); $eletyp = 2; # open, now closed - so pop } elsif ($pc eq '/') { # open/close element $eletyp = 3; } else { $eletyp = 1; } } if ($eletyp == 1) { push(@elestack,[$element,$lnn]); $stkcnt = scalar @elestack; prt("$lnn: PUSHED [$element] ($stkcnt)\n"); } elsif ($eletyp == 2) { if (@elestack) { $eleref = $elestack[-1]; $pele = ${$eleref}[0]; $plnn = ${$eleref}[1]; if ($element eq $pele) { pop @elestack; $stkcnt = scalar @elestack; prt("$lnn: POPPED [$element] ($stkcnt)\n"); } else { prtw("WARNING:$lnn: Element [$element] NOT last [$pele]! NO POP\n"); } } else { prtw("WARNING:$lnn: Element [$element] NOT ON EMPTY STACK! NO POP\n"); } } } } check_ele_stack(\@elestack); return \%xml; } sub parse_xml_text($$) { my ($fil,$text) = @_; $in_file = $fil; #scan_xml_text($fil,$text); my ($msg); my %xml_hash = (); my ($tag,$len,$i,$cc,$nc,$pc,$pc1,$pc2,$iscom,$tcnt,$ishead,$lnn,$isclose1,$isclose2,$ttxt,$spcnt); my ($i2,$cols,$currtag,$tmptag,$stkcnt,$iscdata,$stktxt); my ($rc,$tmpcont,$attref); my ($key,$val,$ra); my (@tagstack,$savedcnt,$saveref,@pathorder,@comstack,$tcntc); my ($intag,$isdoctype,$refts,$toplnn,$toptag,$curline); $len = length($text); prt("Processing $len chars, from $fil...\n"); $cc = ''; $pc1 = ''; $pc2 = ''; $iscom = 0; $ishead = 0; $lnn = 1; $isclose1 = 0; $isclose2 = 0; $ttxt = ''; $currtag = ''; $cols = 0; $iscdata = 0; @tagstack = (); @pathorder = (); @comstack = (); $tag = ''; # start with CLEAN tag $savedcnt = 0; $intag = 0; $curline = get_first_line($text,$lnn); for ($i = 0; $i < $len; $i++) { $i2 = $i + 1; $cols++; $pc2 = $pc1; $pc1 = $pc; $pc = $cc; $rc = substr($text,$i,1); $nc = ($i2 < $len) ? substr($text,$i2,1) : ''; $cc = $rc; if ($cc =~ /\s/) { if ($cc eq "\n") { $lnn++; $cols = 0; $curline = ($i2 < $len) ? get_first_line(substr($text,$i2),$lnn) : ""; } $cc = ' '; } if ($cc eq '<') { prtw("WARNING: LOST TAG DATA 3 [$tag]\n") if (length($tag)); $tag = ''; # clear the tag $i++; $tcnt = 0; $iscom = 0; $ishead = 0; $isclose1 = 0; $isclose2 = 0; $iscdata = 0; $spcnt = 0; $intag = 1; # got TAG start prt("[dbg_x20]$lnn:$cols: $cc Proceed to END OF TAG - INNER LOOP\n") if ($dbg_x20); for (; $i < $len; $i++) { $cols++; $i2 = $i + 1; $pc2 = $pc1; $pc1 = $pc; $pc = $cc; $rc = substr($text,$i,1); $nc = ($i2 < $len) ? substr($text,$i2,1) : ''; $cc = $rc; if ($cc =~ /\s/) { if ($cc eq "\n") { $lnn++; $cols = 0; $curline = ($i2 < $len) ? get_first_line(substr($text,$i2),$lnn) : ""; } $cc = ' '; } # <?xml $ishead = 1 if (($tcnt == 2) && ($nc eq 'l') && ($cc eq 'm') && ($pc eq 'x') && ($pc1 eq '?')); $isdoctype = 1 if (($nc eq 'D') && ($cc eq '!') && ($pc eq '<') && is_doctype(substr($text,$i))); if ($isdoctype) { # stay and EAT DOCTYPE completly $i++; $tag = $cc; $intag = 0; for (; $i < $len; $i++) { $cols++; $i2 = $i + 1; $pc2 = $pc1; $pc1 = $pc; $pc = $cc; $rc = substr($text,$i,1); $nc = ($i2 < $len) ? substr($text,$i2,1) : ''; $cc = $rc; if ($cc =~ /\s/) { if ($cc eq "\n") { $lnn++; $cols = 0; $curline = ($i2 < $len) ? get_first_line(substr($text,$i2),$lnn) : ""; } } last if ($rc eq '>'); $tag .= $rc; if ($rc eq '[') { # enter BOX $i++; for (; $i < $len; $i++) { $cols++; $i2 = $i + 1; $pc2 = $pc1; $pc1 = $pc; $pc = $cc; $rc = substr($text,$i,1); $nc = ($i2 < $len) ? substr($text,$i2,1) : ''; $cc = $rc; if ($cc =~ /\s/) { if ($cc eq "\n") { $lnn++; $cols = 0; $curline = ($i2 < $len) ? get_first_line(substr($text,$i2),$lnn) : ""; } } $tag .= $rc; last if ($rc eq ']'); if (($cc eq '-') && ($pc eq '-') && ($pc1 eq '!') && ($pc2 eq '<')) { # enter comment in BOX $i++; $pc = '*'; # make sure no exit on <!--> for (; $i < $len; $i++) { $cols++; $i2 = $i + 1; $pc2 = $pc1; $pc1 = $pc; $pc = $cc; $rc = substr($text,$i,1); $nc = ($i2 < $len) ? substr($text,$i2,1) : ''; $cc = $rc; if ($cc =~ /\s/) { if ($cc eq "\n") { $lnn++; $cols = 0; $curline = ($i2 < $len) ? get_first_line(substr($text,$i2),$lnn) : ""; } } $tag .= $rc; last if (($rc eq '>')&&($pc eq '-')&&($pc1 eq '-')); } } } } } # out of DOCTYPE $xml_hash{$xml_doctype} = $tag; prt("[dbg_x18] $lnn:$cols: Done DOCTYPE [$tag]\n") if ($dbg_x18); $tag = ''; $intag = 0; $isdoctype = 0; last; # exit INNER tag loop } # <![CDATA[ ... ]]> $iscdata = 1 if (($tcnt == 1) && ($nc eq 'C') && ($cc eq '[') && ($pc eq '!') && ($pc1 eq '<') && is_cdata(substr($text,$i))); if ($iscdata) { #prt("$lnn:$cols: Got CDATA\n"); #prt(substr($text,$i,40)."\n"); #pgm_exit(1,"Temp exit"); prt("[dbg_x10]$lnn: Stay and EAT CDATA completely!\n") if ($dbg_x10); $i++; prtw("WARNING: LOST TAG DATA 1 [$tag]\n") if (length($tag) && ($tag ne '!')); $tag = $pc.$cc; # proceed to END OF CDATA TAG $intag = 0; for (; $i < $len; $i++) { $cols++; $i2 = $i + 1; $pc2 = $pc1; $pc1 = $pc; $pc = $cc; $rc = substr($text,$i,1); $nc = ($i2 < $len) ? substr($text,$i2,1) : ''; $cc = $rc; if ($cc =~ /\s/) { if ($cc eq "\n") { $lnn++; $cols = 0; $curline = ($i2 < $len) ? get_first_line(substr($text,$i2),$lnn) : ""; } # $cc = ' '; DO NOT CHANGE TO SPACE ON CDATA } # is it '-->', end of comment if (($cc eq '>') && ($pc eq ']') && ($pc1 eq ']')) { last; # reached END OF CDATA } $tag .= $cc; } prt("[dbg_x10]$lnn: Done CDATA completely!\n") if ($dbg_x10); last; # exit TAG inner loop } # <!-- $iscom = 1 if (($tcnt == 1) && ($nc eq '-') && ($cc eq '-') && ($pc eq '!') && ($pc1 eq '<')); if ($iscom) { # stay and EAT comment completely $i++; prtw("WARNING: LOST TAG DATA 2 [$tag]\n") if (length($tag) && ($tag ne '!')); $tag = $pc.$cc; # proceed to END OF TAG for (; $i < $len; $i++) { $cols++; $i2 = $i + 1; $pc2 = $pc1; $pc1 = $pc; $pc = $cc; $rc = substr($text,$i,1); $nc = ($i2 < $len) ? substr($text,$i2,1) : ''; $cc = $rc; if ($cc =~ /\s/) { if ($cc eq "\n") { $lnn++; $cols = 0; $curline = ($i2 < $len) ? get_first_line(substr($text,$i2),$lnn) : ""; } $cc = ' '; } # is it '-->', end of comment if (($cc eq '>') && ($pc eq '-') && ($pc1 eq '-')) { last; # reached END OF COMMENT } $tag .= $rc; } push(@comstack,$tag); $intag = 0; last; # exit TAG inner loop #} elsif (!$iscdata) { } else { # </... $isclose1 = 1 if (($tcnt == 0) && ($cc eq '/')); # <... /> if (($cc eq '/') && ($nc eq '>')) { $isclose2 = 1; # this is a <... /> self closed tag #if ($spcnt && @tagstack && ($tagstack[-1] eq $currtag)) { if ($spcnt) { # if a SPACE count, then this TAG was STACKED if (@tagstack) { $refts = $tagstack[-1]; $toptag = ${$refts}[0]; $toplnn = ${$refts}[1]; } #if (@tagstack && ($tagstack[-1] eq $currtag)) { if (@tagstack && ($toptag eq $currtag)) { $stktxt = ret_stack_text(\@tagstack); #$tmptag = pop @tagstack; $refts = pop @tagstack; $tmptag = ${$refts}[0]; $toplnn = ${$refts}[1]; $stkcnt = scalar @tagstack; if (@tagstack) { #$currtag = $tagstack[-1]; $refts = $tagstack[-1]; $currtag = ${$refts}[0]; } else { $currtag = $stk_empty; # "*EMPTY*"; } $tmpcont = $tag; $tmpcont =~ s/$tmptag\s+//; $savedcnt++; $attref = get_att_ref($tmpcont); $tmpcont = ''; $tcntc = scalar @comstack; prt("[dbg_x19]$lnn: POPPED 2 [$toptag] ($stkcnt) $stktxt\n") if ($dbg_x19); if ($dbg_x17) { prt("[dbg_x17]$lnn: $stktxt - close 2 of [$tmptag] attributes ["); foreach $key (keys %{$attref}) { $val = ${$attref}{$key}; prt("$key=\"$val\" "); } prt("] SAVED $savedcnt ($tcntc) ($stkcnt)\n"); } $saveref = get_save_ref($tmptag,$attref,$tmpcont,$lnn,2); if (!defined $xml_hash{$stktxt}) { $xml_hash{$stktxt} = [ ]; push(@pathorder,$stktxt); # keep (new) PATH order from FILE } $ra = $xml_hash{$stktxt}; my @cs1 = @comstack; push(@{$ra}, [$saveref, \@cs1]); $xml_hash{$stktxt} = $ra; @comstack = (); prt("[dbg_x07] $lnn:$cols: UNStacked 1 [$tmptag], currtag [$currtag] ($stkcnt) $stktxt\n") if ($dbg_x07); $tag = ''; # CLEAR TAG of tag and attributes } else { if (!@tagstack) { pgm_exit(1,"ERROR: $lnn:$cols: curtag [".$currtag."] CLOSE BUT NO STACK?\n"); } else { #pgm_exit(1,"ERROR: $lnn:$cols: curtag [".$currtag."] CLOSE BUT LAST IN STACK IS [".$tagstack[-1]."]?\n"); pgm_exit(1,"ERROR: $lnn:$cols: curtag [".$currtag."] CLOSE BUT LAST IN STACK IS [$toptag]"); } } $i += 2; # IMPORTANT - move past these chars $intag = 0; last; } else { # NO space count, so this is a tag of the form <abc/>, so has NOT been stacked $stktxt = ret_stack_text(\@tagstack); $stkcnt = scalar @tagstack; $attref = get_att_ref(""); # get empty reference $saveref = get_save_ref($tag,$attref,"",$lnn,2); if (!defined $xml_hash{$stktxt}) { $xml_hash{$stktxt} = [ ]; push(@pathorder,$stktxt); # keep (new) PATH order from FILE } $ra = $xml_hash{$stktxt}; my @cs3 = @comstack; push(@{$ra}, [$saveref, \@cs3]); $xml_hash{$stktxt} = $ra; @comstack = (); prt("[dbg_x07] $lnn:$cols: saved [<$tag/>], currtag [$currtag] ($stkcnt) $stktxt\n") if ($dbg_x07); $tag = ''; # CLEAR TAG of tag (of type <tag/>) NO SPACE $i += 2; # IMPORTANT - move past these chars $intag = 0; # done this TAG last; } } elsif ($cc eq '>') { # CLOSE OF TAG if (($spcnt == 0) && !$ishead && !$iscom) { # had no SPACES so far, and NOT head or comment # --------------------------------------------- if ($isclose1) { # had <... /> = CLOSE1 if (@tagstack) { $refts = $tagstack[-1]; $toptag = ${$refts}[0]; $toplnn = ${$refts}[1]; } #if (@tagstack && ($tagstack[-1] eq $currtag)) { if (@tagstack && ($toptag eq $currtag)) { $stktxt = ret_stack_text(\@tagstack); #$tmptag = pop @tagstack; $refts = pop @tagstack; $tmptag = ${$refts}[0]; $toplnn = ${$refts}[1]; $stkcnt = scalar @tagstack; if (@tagstack) { #$currtag = $tagstack[-1]; $refts = $tagstack[-1]; $currtag = ${$refts}[0]; } else { $currtag = $stk_empty; # "*EMPTY*"; } prt("[dbg_x19]$lnn: POPPED 1 [$toptag] ($stkcnt) $stktxt\n") if ($dbg_x19); #if ($exclude_blanks && ((length($ttxt) == 0) || ($ttxt =~ /^\s+$/))) { if ((length($ttxt) == 0) || ($ttxt =~ /^\s+$/)) { prt("[dbg_x13]$lnn: $stktxt - [$tmptag] content [$ttxt] tag [$tag] closed BLANK\n") if ($dbg_x13); } else { $savedcnt++; $tcntc = scalar @comstack; prt("[dbg_x14]$lnn: $stktxt - [$tmptag] content [$ttxt] tag [$tag] closed SAVED2 $savedcnt ($tcntc)\n") if ($dbg_x14); $saveref = get_save_ref($tmptag,$attref,$ttxt,$lnn,1); if (!defined $xml_hash{$stktxt}) { $xml_hash{$stktxt} = [ ]; push(@pathorder,$stktxt); # KEEP (new) PATH ORDER FROM FILE } $ra = $xml_hash{$stktxt}; my @cs2 = @comstack; push(@{$ra}, [$saveref, \@cs2]); # add comment stack $xml_hash{$stktxt} = $ra; @comstack = (); } prt("[dbg_x08] $lnn:$cols: UNStacked 2 [$tmptag], currtag [$currtag] ($stkcnt) $stktxt\n") if ($dbg_x08); $tag = ''; # CLEAR CLOSED TAG $ttxt = ''; # CLEAR CLOSED TEXT } elsif (@tagstack) { #prt("ERROR: $lnn:$cols: curtag [".$currtag."] NE tagstack [".$tagstack[-1]."]!\n"); prt("ERROR: $lnn:$cols: curtag [".$currtag."] NE tagstack [$toptag]!\n"); pgm_exit(1,"Bad TAG 1! file=$fil"); } else { prt("ERROR: $lnn:$cols: curtag [".$currtag."] NOT IN tagstack!\n"); pgm_exit(1,"Bad TAG 2! file=$fil"); } $intag = 0; } elsif (length($tag)) { # CLOSE2 <...> push(@tagstack,[$tag,$lnn]); $stkcnt = scalar @tagstack; $currtag = $tag; $stktxt = ret_stack_text(\@tagstack); prt("[dbg_x11] $lnn: $stktxt - [$currtag] open 1 - no attrs - text [$ttxt] ($stkcnt)\n") if ($dbg_x11); $attref = get_att_ref(""); # returns an EMPTY HASH prt("[dbg_x05] $lnn:$cols: Stacked 1 [$currtag] ($stkcnt) $stktxt\n") if ($dbg_x05); $tag = ''; # clear STACKED tag } else { pgm_exit(1,"ERROR: $lnn:$cols: NOT Close type 1, AND NO TAG LENGTH!\n"); } } else { if ($ishead) { # what to do with the HEAD data - see below #prt("End HEAD [$tag]\n"); #} elsif ($iscom) { # comment now done above # # what to do with COMMENT data - see below # prt("End COMMENT [$tag]\n"); $intag = 0; } else { # had SPACES, and NOT head or comment - then is a tag with attributes $stktxt = ret_stack_text(\@tagstack); $tmpcont = $tag; $tmpcont =~ s/$currtag\s+//; $attref = get_att_ref($tmpcont); if ($dbg_x15) { prt("[dbg_x15]$lnn: $stktxt - [$currtag] atts [$tmpcont] CHECKME [ "); foreach $key (keys %{$attref}) { $val = ${$attref}{$key}; prt("$key = \"$val\" "); } prt("]\n"); } } $tag = '' if (!$ishead); # CLEAR TAG! } last; # exit this INNER TAG loop } elsif ($cc =~ /\s/) { if (($spcnt == 0) && !$ishead && !$isclose1 && !$iscom) { push(@tagstack,[$tag,$lnn]); $stkcnt = scalar @tagstack; $currtag = $tag; $stktxt = ret_stack_text(\@tagstack); prt("[dbg_x16]$lnn: $stktxt - [$currtag] open 2 ($stkcnt)\n") if ($dbg_x16); prt("[dbg_x06] $lnn:$cols: Stacked 2 [$currtag] ($stkcnt) $stktxt\n") if ($dbg_x06); } if ( !($pc =~ /\s/) ) { $tag .= $cc; } $spcnt++; # bump the SPACE counter } else { # NOT '/', '>', '\s'... $tag .= $cc; } } $tcnt++; #last if ($iscom); } # for (; $i < $len; $i++) { # proceed to END OF TAG prt("[dbg_x20]$lnn:$cols: $cc END OF TAG - INNER LOOP tag [$tag]\n") if ($dbg_x20); if ($ishead) { prt("[dbg_x03] $lnn: HEAD:<$tag>\n") if ($dbg_x03); if ($add_header) { if (defined $xml_hash{$xml_header}) { pgm_exit(1,"ERROR: Duplicate HEADER data! [$tag], previous [".$xml_hash{'XML HEADER'}."]\n"); } $xml_hash{$xml_header} = $tag; } $tag = ''; # CLEAR TAG of HEADER DATA - stored in 'XML HEADER' } elsif ($iscom) { prt("[dbg_x02] $lnn: COMMENT:<$tag>\n") if ($dbg_x02); $tag = ''; # CLEAR TAG of COMMENT DATA } elsif ($isclose1) { prt("[dbg_x04] $lnn: CLOSE1:<$tag>\n") if ($dbg_x04); } elsif ($isclose2) { prt("[dbg_x04] $lnn: CLOSE2:<$tag>\n") if ($dbg_x04); } elsif ($iscdata) { prt("[dbg_x10] $lnn: END CDATA currtag [$currtag] [$tag]\n") if ($dbg_x10); $ttxt = $tag; # store the CDATA in tag text ($ttxt) prtw("WARNING:$lnn: tag [$tag] starts with '<'\n") if ($tag =~ /^</); $tag = ''; # CLEAR TAG of CDATA } # done this tag $ttxt = '' if (!$iscdata); next; # back to line processing OUTER LOOP } # got open tag char '<' ############################### # no 'open' yet if ((length($ttxt) == 0)&&($cc eq '<')) { prtw("WARNING:$lnn: Adding $cc to content text!\n"); } #$ttxt .= $cc; $ttxt .= $cc if ($intag); } # outer loop - process text length ##################################### $xml_hash{$xml_pathorder} = \@pathorder; $stkcnt = scalar @tagstack; prt("[dbg_x09] Done $lnn lines, from [$fil]. $len chars...\n") if ($dbg_x09); if ($stkcnt) { $intag = 4; $msg = "WARNING: Still $stkcnt items ON THE STACK! "; $tcnt = 0; $msg .= "\n" if ($stkcnt > $intag); for ($i = 0; $i < $stkcnt; $i++) { #$msg .= "[".$tagstack[$i]."] "; $refts = $tagstack[$i]; $toptag = ${$refts}[0]; $toplnn = ${$refts}[1]; $msg .= "[".$toptag."]($toplnn) "; $tcnt++; if ($tcnt > $intag) { $tcnt = 0; $msg .= "\n"; } } prtw("$msg\n"); } return \%xml_hash; } sub parse_xml_file($) { my ($fil) = @_; if (!open INF, "<$fil") { pgm_exit(1,"ERROR: Unable to open file [$fil]!"); } my @lines = <INF>; close INF; my $text = join("",@lines); my $len = length($text); my $lnn = scalar @lines; prt("Processing $lnn lines, from [$fil]. $len chars...\n"); return parse_xml_text($fil,$text); } sub fix_xml_path($$$$$$$$) { my ($rarr,$ra,$rs,$rm,$out,$nm,$lnn,$rca1) = @_; my ($pcnt,$scnt,$pele,$cnt,$nxt,$ele,$ind,$clos,$tcnt); my ($ccnt,$ctxt); $pcnt = scalar @{$rarr}; $scnt = scalar @{$rs}; $ccnt = scalar @{$rca1}; # comment array # assume stacked <1><2><3> and # new path <1><2><4> # must close <3> and open <4> $pele = ''; $cnt = 0; prt( "[dbg4]:$lnn: path elements [$pcnt], stack elements [$scnt]\n") if ($out & 4); foreach $ele (@{$rarr}) { last if ($cnt >= $scnt); $nxt = ${$rs}[$cnt]; prt( "[dbg2]$cnt: element [$ele], and next 1 [$nxt] prev [$pele]\n") if ($out & 2); if (length($pele) && ($ele ne $nxt)) { while (($scnt > 0) && ($pele ne $nxt)) { $clos = pop @{$rs}; $scnt--; $ind = ' ' x $scnt; prt("$ind</$clos>\n") if ($out); ${$rm} .= "$ind</$clos>\n"; $nxt = '<none>'; if ($scnt) { $tcnt = $scnt - 1; $nxt = ${$rs}[$tcnt]; } prt( "[dbg2]$cnt: dropped [$clos], and next 2 [$nxt] prev [$pele] ($scnt)\n") if ($out & 2); } } $cnt++; $pele = $ele; } $cnt = 0; $scnt = scalar @{$rs}; foreach $ele (@{$rarr}) { # is this element in the stack already? last if ($ele eq $nm); # reached this element if ($cnt < $scnt) { if ($ele eq ${$rs}[$cnt]) { # already done prt( "[dbg2]$cnt: element [$ele] in stack ($scnt)\n") if ($out & 2); } else { prtw("WARNING: Missing element [$ele]!\n"); } } else { $ind = ' ' x $cnt; if ($ccnt) { foreach $ctxt (@{$rca1}) { ${$rm} .= "$ind<$ctxt>\n"; prt("$ind<$ctxt>\n") if ($out); } $ccnt = 0; } ${$rm} .= "$ind<$ele>\n"; prt("$ind<$ele> added\n") if ($out); push(@{$rs},$ele); } $cnt++; } return $cnt; } sub show_rh_kv($$$$$) { my ($path,$ra,$rs,$rm,$out) = @_; my ($k,$v,$rh,$k2); my ($nm,$txt,$attrs,$n2,$v2,$lnn,$typ); my ($cnt,$ele,$rarr,$scnt,$ind,@arr); my ($rh0,$rca1); #$rarr = close_xml_paths($path,$ra,$rs,$rm,$out); $scnt = scalar @{$rs}; @arr = split("/",$path); $rarr = \@arr; foreach $rh (@{$ra}) { $nm = ''; $txt = ''; $attrs = ''; $lnn = ''; $typ = 0; #prt(Dumper($rh)); $rh0 = ${$rh}[0]; $rca1 = ${$rh}[1]; # extact any COMMENT ARRAY (ref) foreach $k (keys %{$rh0}) { $v = ${$rh0}{$k}; if ($k eq $x_attr) { # this is a hash foreach $k2 (keys %{$v}) { $v2 = ${$v}{$k2}; $attrs .= ' ' if (length($attrs)); $attrs .= "$k2=\"$v2\""; } } elsif ($k eq $x_cont) { $txt = $v; } elsif ($k eq $x_name) { $nm = $v; } elsif ($k eq $x_line) { $lnn = $v; } elsif ($k eq $x_type) { $typ = $v; } else { pgm_exit(1,"ERROR: Unknown value in HASH [$k]???\n"); } } $cnt = fix_xml_path($rarr,$ra,$rs,$rm,$out,$nm,$lnn,$rca1); $n2 = (length($attrs)) ? "$nm $attrs" : "$nm"; $ind = ' ' x $cnt; if ($typ == 2) { prt("$ind<$n2 />\n") if ($out); ${$rm} .= "$ind<$n2 />\n"; } else { prt("$ind<$n2>$txt</$nm>\n") if ($out); ${$rm} .= "$ind<$n2>$txt</$nm>\n"; } } } sub show_ref_hash($$$) { my ($rh,$fil,$out) = @_; my ($key,$val,$cnt,$itm,$msg,$scnt,$msg2); my $def_head = "?xml version=\"1.0\"?"; my @stack = (); my $dopo = ($keep_path_order && (defined ${$rh}{$xml_pathorder})) ? 1 : 0; $msg2 = "Show of ref hash... from $fil "; if ($dopo) { $msg2 .= "in path order "; } else { $msg2 .= "in sort order "; } prt("$msg2\n"); $cnt = 0; $key = $xml_header; if (defined ${$rh}{$key}) { $val = ${$rh}{$key}; } else { $val = $def_head; } $msg = "<$val>\n"; prt($msg) if ($out); $msg .= "<!-- $msg2 -->\n"; # if ($show_comments) { # $key = $xml_comments; # if (defined ${$rh}{$key}) { # $val = ${$rh}{$key}; # foreach $itm (@{$val}) { # prt("<$itm>\n") if ($out); # $msg .= "<$itm>\n"; # } # } # } if ($dopo) { my $rpo = ${$rh}{$xml_pathorder}; foreach $key (@{$rpo}) { $val = ${$rh}{$key}; $cnt++; if ($key eq $xml_header) { #prt("$cnt: $key HEADER = $val\n"); #} elsif ($key eq $xml_comments) { #prt("$cnt: $key COMMENTS\n"); } elsif ($key eq $xml_pathorder) { #prt("$cnt: $key PATH ORDER\n"); } else { prt("$cnt:PATH: $key\n") if ($out); show_rh_kv($key,$val,\@stack,\$msg,$out); #prt(Dumper($key)); #prt(Dumper($val)); #prt("$cnt: End other\n"); } } } else { foreach $key (sort keys %{$rh}) { $val = ${$rh}{$key}; $cnt++; if ($key eq $xml_header) { #prt("$cnt: $key HEADER = $val\n"); #} elsif ($key eq $xml_comments) { #prt("$cnt: $key COMMENTS\n"); } elsif ($key eq $xml_pathorder) { #prt("$cnt: $key PATH ORDER\n"); } else { prt("$cnt:PATH: $key\n") if ($out); show_rh_kv($key,$val,\@stack,\$msg,$out); #prt(Dumper($key)); #prt(Dumper($val)); #prt("$cnt: End other\n"); } } } $scnt = scalar @stack; while ($scnt > 0) { $scnt--; $key = $stack[$scnt]; $val = ' ' x $scnt; $msg .= "$val</$key>\n"; prt("$val</$key>\n") if ($out); } prt("Done $cnt keys in ref hash... from $fil\n"); return $msg; } sub get_all_children($$$$) { my ($rh,$pp,$dep,$out) = @_; my %hash = (); my @po = (); my ($key,$val,$cnt,@arr,$acnt,$ele,$fnd); my $rpo = ${$rh}{$xml_pathorder}; my ($i,$j); $cnt = 0; $fnd = 0; foreach $key (@{$rpo}) { $val = ${$rh}{$key}; $cnt++; if ($key eq $xml_header) { #prt("$cnt: $key HEADER = $val\n"); } elsif ($key eq $xml_pathorder) { #prt("$cnt: $key PATH ORDER\n"); } else { prt("$cnt:PATH: $key\n") if ($out & 2); @arr = split("/",$key); $acnt = scalar @arr; for ($i = 0; $i < $acnt; $i++) { $ele = $arr[$i]; if ($ele eq $pp) { # found our path item - check for depth if (($i + $dep + 1) == $acnt) { prt("$cnt:PATH: $key ADDED\n") if ($out); $hash{$key} = $val; push(@po,$key); $fnd++; } last; } } } } if ($fnd) { $hash{$xml_pathorder} = \@po; } return \%hash; } sub get_content_text($) { my ($ra) = shift; my $rtxt = ''; my ($rh,$rh0); foreach $rh (@{$ra}) { $rh0 = ${$rh}[0]; if (defined ${$rh0}{$x_cont}) { $rtxt = ${$rh0}{$x_cont}; return $rtxt if (length($rtxt)); } } return $rtxt; } sub get_element_text($$) { my ($rh,$txt) = @_; my $rtxt = ''; my ($path,@arr,$ele,$val); foreach $path (keys %{$rh}) { @arr = split('/',$path); foreach $ele (@arr) { if ($ele eq $txt) { $val = ${$rh}{$path}; $rtxt = get_content_text($val); return $rtxt if (length($rtxt)); } } } return $rtxt; } sub get_element_hash_for_array($$) { my ($rhc,$ra) = @_; my %h = (); my ($ele); foreach $ele (@{$ra}) { my $txt = get_element_text($rhc,$ele); $h{$ele} = $txt; } return \%h; } sub show_fg_sim_references($) { my ($rh) = @_; my $rc = get_all_children($rh,"sim",1,0); #my $xout = show_ref_hash($rc,"test",0); #write2file($xout,$out_xml2); #prt("XML written to $out_xml2 file...\n"); my @arr = qw(description aero author status flight-model); #my $rah = get_element_hash_for_array($rc,\@arr); my $rah = get_element_hash_for_array($rh,\@arr); my ($key,$val,$min,$len); $min = 0; foreach $key (keys %{$rah}) { $len = length($key); $min = $len if ($len > $min); } foreach $key (keys %{$rah}) { $val = ${$rah}{$key}; $key .= ' ' while (length($key) < $min); prt("$key : $val\n"); } } sub show_fg_sim_references_ok($) { my ($rh) = @_; my $rc = get_all_children($rh,"sim",1,0); my $xout = show_ref_hash($rc,"test",0); write2file($xout,$out_xml2); prt("XML written to $out_xml2 file...\n"); my $txt = get_element_text($rc,'description'); prt("Description: [$txt]\n"); $txt = get_element_text($rc,'aero'); prt("Aero : [$txt]\n"); $txt = get_element_text($rc,'author'); prt("Author : [$txt]\n"); $txt = get_element_text($rc,'status'); prt("Status : [$txt]\n"); # flight-model $txt = get_element_text($rc,'flight-model'); $txt = "jsb (default)" if (length($txt) == 0); prt("FDM : [$txt]\n"); } sub play_with_xml_ref($) { my ($rh) = shift; show_fg_sim_references($rh); } #################################### # ### MAIN ### #my $ref_hash = parse_xml_text("test",$test_xml2); #my $ref_hash = parse_xml_text("test",$test_xml3); #my $ref_hash = scan_xml_text('test',$test_xml3); my $ref_hash = scan_xml_text('test',$test_xml2); #my $ref_hash = parse_xml_file($in_file); prt("\n"); #my $xout = show_ref_hash($ref_hash,$in_file,0); #write2file($xout,$out_xml); #prt("XML written to $out_xml file...\n"); # prt($xout); #play_with_xml_ref($ref_hash); pgm_exit(0,"Normal exit."); #################################### # eof