# Copyright (C) 2004 Jörg Tiedemann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ##################################################################### # # Uplug::IO::PlugXML - bitext in PLUG XML # ##################################################################### # $Author: joerg72 $ # $Id: PlugXML.pm,v 1.2 2004/05/04 09:57:34 joerg72 Exp $ package Uplug::IO::PlugXML; use strict; use vars qw(@ISA $VERSION); use vars qw(%StreamOptions); use Uplug::IO::XML; use Uplug::Data::Lang; # use Uplug::IO::Any; $VERSION='1.0'; @ISA = qw( Uplug::IO::XML ); # stream options and their default values!! %StreamOptions = ('DocRootTag' => 'PLUG', 'DocHeaderTag' => 'header', 'DocBodyTag' => 'corpus', 'root' => 'align', 'InternalEncoding' => 'iso-8859-1', 'DTDname' => 'PLUG', 'DTDsystemID' => 'plugXML.dtd', 'DefaultEncoding' => 'iso-8859-1', # 'encoding' => 'iso-8859-1', # 'DataStructure' => 'complex', # 'encoding' => $Uplug::IO::DEFAULTENCODING, # 'DTDpublicID' => '...' ); $StreamOptions{DocRoot}{version}='1.0'; sub new{ my $class=shift; my $self=$class->SUPER::new($class); &Uplug::IO::AddHash2Hash($self->{StreamOptions},\%StreamOptions); return $self; } sub read{ my $self=shift; my ($data)=@_; if (not ref($data->{source})){$data->{source}=Uplug::Data::Lang->new();} if (not ref($data->{target})){$data->{target}=Uplug::Data::Lang->new();} if ($self->SUPER::read($data)){ my @seg=$data->findNodes('seg'); if (defined $seg[0]){ $seg[0]->setLabel('source'); $data->{source}->setRoot($seg[0]); } if (defined $seg[1]){ $seg[1]->setLabel('target'); $data->{target}->setRoot($seg[1]); } return 1; } return 0; } sub write{ my $self=shift; my ($data)=@_; $self->Uplug::IO::write($data); my @seg=$data->getNodes('seg'); if (not @seg){ my ($source)=$data->getNodes('source'); my ($target)=$data->getNodes('target'); if (not ref($source)){return 0;} if (not ref($target)){return 0;} my $srcLang=$data->attribute($source,'lang'); my $trgLang=$data->attribute($target,'lang'); if (not $srcLang){$data->setAttribute($source,'lang','src');} if (not $trgLang){$data->setAttribute($target,'lang','trg');} $source->setTagName('seg'); $target->setTagName('seg'); } return $self->SUPER::write($data); } sub readFromHandle{ my $self=shift; my $txt=$self->SUPER::readFromHandle(@_); if ($txt=~/\/ encoding=\"iso-8859-1\"\?\>/; } } my @ent=(); while ($txt=~/\&([^\s\;]+)\;/g){ if ($1 eq 'lt'){next;} if ($1 eq 'gt'){next;} if ($1 eq 'amp'){next;} push (@ent,$1); } foreach (@ent){ print STDERR "replace \&$_\; with \[$_\]!\n"; $txt=~s/\&$_\;/ [$_] /gs; } return $txt; }