# Copyright (C) 2004 Jörg Tiedemann ##################################################################### # # # ##################################################################### # $Author: joerg72 $ # $Id: SubTree.pm,v 1.1.1.1 2004/05/03 15:23:04 joerg72 Exp $ # # package Uplug::XML::SubTree; use strict; use XML::Parser; our $DOCROOT='document'; our $SUBTREEROOT='.*'; our $IGNOREWARNINGS=1; sub new{ my $class=shift; my $self={}; bless $self,$class; $self->init(@_); return $self; } sub init{ my $self=shift; my ($SubTreeRoot,$DocRoot,$DocBody)=@_; if (not ref($self->{XmlParser})){ $self->{XmlParser}= new XML::Parser(Handlers => {Start => \&XmlTreeStart, End => \&XmlTreeEnd, Default => \&XmlTreeChar, XMLDecl => \&XmlDecl, # Doctype => \&XmlDoctype },); } $self->{XmlHandle}=$self->{XmlParser}->parse_start; $self->setTags($SubTreeRoot,$DocRoot,$DocBody); $self->{XmlHandle}->{XmlProlog}=''; } sub parser{return $_[0]->{XmlParser};} sub handle{return $_[0]->{XmlHandle};} #---------------------------------------------- # get handles and set handles for XML::Parser sub starthandle{return \&XmlTreeStart;} sub endhandle{return \&XmlTreeEnd;} sub charhandle{return \&XmlTreeChar;} sub declhandle{return \&XmlTreeDecl;} sub setStarthandle{$_[0]->{XmlParser}->setHandlers('Start',$_[1]);} sub setEndhandle{$_[0]->{XmlParser}->setHandlers('End',$_[1]);} sub setCharhandle{$_[0]->{XmlParser}->setHandlers('Default',$_[1]);} sub setDeclhandle{$_[0]->{XmlParser}->setHandlers('XMLDecl',$_[1]);} #--------------------------------------------------------------------------- # set document-specific XML tags for the XML::Parser # and compile regular expressions sub setTags{ my $self=shift; my ($SubTreeRoot,$DocRoot,$DocBody)=@_; if (not $SubTreeRoot){$SubTreeRoot=$SUBTREEROOT;} $self->{XmlHandle}->{SubTreeRoot}=$SubTreeRoot; $self->{XmlHandle}->{DocRootTag}=$DocRoot if ($DocRoot); $self->{XmlHandle}->{DocBodyTag}=$DocBody if ($DocBody); $self->CompileTagREs(); } #-------------------------------------------------- # compile regular expressions for matching XML-tags sub CompileTagREs{ my $self=shift; foreach my $t ('DocRootTag','SubTreeRoot','DocBodyTag'){ $self->{XmlHandle}->{$t.'RE'}=qr/^($self->{XmlHandle}->{$t})$/; } } #----------------------------------------------------- # parse XML-strings and return the next XML-sub-tree # - uses XML::Parser # # next($root) # $root ---> root tag of the XML sub-tree sub parse{ my $self=shift; my $xml=shift; my $root=shift; if (($root) and ($root ne $self->SubTreeRoot)){ $self->setTags($root); } my $header=undef; my $tail=undef; eval { $self->{XmlHandle}->parse_more($xml); }; if ($@){ if (not $IGNOREWARNINGS){ warn $@; print STDERR $_; } $header=$self->{XmlHandle}->{BeforeSubTree}.$_; $self->{XmlHandle}->{SubTreeEnded}=undef; $self->{XmlHandle}=$self->{XmlParser}->parse_start; # re-start my $ParseStr=$self->{XmlHandle}->{XmlProlog}; # XML parsern! eval { $self->{XmlHandle}->parse_more($ParseStr); }; return 2; } $self->{BeforeSubTree}=$self->{XmlHandle}->{BeforeSubTree}; # header $self->{SubTreeRoot}=$self->{XmlHandle}->{SubTreeEnded}; # root-tag if ($self->{XmlHandle}->{SubTreeEnded}){ my $subtree=$self->{XmlHandle}->{SubTreeString}; $self->{XmlHandle}->{BeforeSubTree}=undef; $self->{XmlHandle}->{SubTreeString}=undef; $self->{XmlHandle}->{SubTreeEnded}=undef; return $subtree; } return undef; } sub XmlProlog{ my $self=shift; return $self->{XmlHandle}->{XmlProlog}; } sub SubTreeRoot{ my $self=shift; return $self->{XmlHandle}->{SubTreeRoot}; } sub DocRootTag{ my $self=shift; return $self->{XmlHandle}->{DocRootTag}; } sub DocBodyTag{ my $self=shift; return $self->{XmlHandle}->{DocBodyTag}; } sub DocRoot{ my $self=shift; return $self->{XmlHandle}->{DocRoot}; } sub DocBody{ my $self=shift; return $self->{XmlHandle}->{DocBody}; } sub NewDoc{ my $self=shift; if ($self->{XmlHandle}->{NewDoc}){ $self->{XmlHandle}->{NewDoc}=0; return 1; } return 0; } sub NewBody{ my $self=shift; if ($self->{XmlHandle}->{NewBody}){ $self->{XmlHandle}->{NewBody}=0; return 1; } return 0; } sub header{ my $self=shift; return $self->{BeforeSubTree}; } sub root{ my $self=shift; return $self->{SubTreeRoot}; } #--------------------------------------------------------------------------- #--------------------------------------------------------------------------- # subroutines for the XML-parser # sub XmlTreeStart{ my $p=shift; my $e=shift; #-------------------------------------------------- # document root tags are parsed ... but ignored #-------------------------------------------------- if ((defined $p->{DocRootTagRE}) and ($e=~/$p->{DocRootTagRE}/)){ $p->{BeforeSubTree}=''; $p->{DocRootTag}=$e; $p->{DocRootTagRE}=qr/^($p->{DocRootTag})$/; $p->{NewDoc}=1; %{$p->{DocRoot}}=@_; return; } #-------------------------------------------------- # a new subtree starts! #-------------------------------------------------- if ($e=~/$p->{SubTreeRootRE}/){ if ($p->{SubTreeString}){ $p->{BeforeSubTree}.=$p->{SubTreeString} } $p->{SubTreeStarted}=$1; $p->{SubTreeEnded}=0; $p->{SubTreeString}=$p->original_string; } #-------------------------------------------------- # we are inside a valid subtree! #-------------------------------------------------- elsif($p->{SubTreeStarted}){ $p->{SubTreeString}.=$p->original_string; } #-------------------------------------------------- # ... neither inside nor at the beginning of a new one #-------------------------------------------------- else{ if ((defined $p->{DocBodyTagRE}) and ($e=~/$p->{DocBodyTagRE}/)){ $p->{DocBodyTag}=$e; $p->{DocBodyTagRE}=qr/^($p->{DocBodyTag})$/; $p->{NewBody}=1; %{$p->{DocBody}}=@_; } $p->{BeforeSubTree}.=$p->original_string; } } sub XmlTreeEnd{ my ($p,$e)=@_; #-------------------------------------------------- # the subtree ended! #-------------------------------------------------- if (($e=~/$p->{SubTreeRootRE}/) and ($p->{SubTreeStarted} eq $1)){ $p->{SubTreeStarted}=0; $p->{SubTreeEnded}=$1; # $p->{BeforeSubTree}=~s/\s*$/\n/s; # $p->{BeforeSubTree}=~s/^\s*//s; $p->{SubTreeString}.=$p->original_string; } #-------------------------------------------------- # still inside ... #-------------------------------------------------- elsif($p->{SubTreeStarted}){ $p->{SubTreeString}.=$p->original_string; } #-------------------------------------------------- # neither inside nor at the end #-------------------------------------------------- else{ $p->{BeforeSubTree}.=$p->original_string; } } sub XmlTreeChar{ my ($p,$e)=@_; #-------------------------------------------------- # inside a subtree -> save the string #-------------------------------------------------- if ($p->{SubTreeStarted}){ $p->{SubTreeString}.=$p->original_string; } #-------------------------------------------------- # not inside?! -> save string as header #-------------------------------------------------- else{ $p->{BeforeSubTree}.=$p->original_string; } } sub XmlDecl{ my ($p,$v,$e,$s)=@_; $p->{XmlProlog}=$p->original_string; $p->{XmlEncoding}=$e; $p->{XmlVersion}=$v; } sub XmlDoctype{ my ($p,$name,$sysid,$publid,$internal)=@_; }