#/usr/bin/perl
use XML::XPath;
if($#ARGV!=1){print "usage : perl $0 fichier_tag fichier_motif";exit;}
my $tag_file= shift @ARGV;
my $patterns_file = shift @ARGV;
my @patterns;
my $nb_patterns=0;
my $nb_tokens=0;
open(PATTERNSFILE, $patterns_file) or die "can't open $patterns_file: $!\n";
# lecture du fichier contenant les motifs, un motif par ligne (par exemple : NOM ADJ)
while ($ligne = ) {
chomp($ligne);
$nb_patterns = push(@patterns,$ligne);
}
my $xp = XML::XPath->new( filename => $tag_file ) or die "big trouble";
foreach my $pattern (@patterns){
@tokens=split(/ /,$pattern);
my $match_file = "res_extract-".join('_', @tokens).".txt";
open(MATCHFILE, ">$match_file") or die "can't open $match_file: $!\n";
&extract_pattern(@tokens);
close(MATCHFILE);
}
# routine d'extraction d'un motif
sub extract_pattern{
@tokenz=@_;
$first_token=shift @tokenz;
chomp($first_token);
$search_path="//element/data[1][contains(text(),\"$first_token\")]";
foreach my $token (@tokenz){
chomp($token);
$search_path.="/ancestor::element/following-sibling::element[1]/data[1][contains(text(),\"$token\")]";
}
foreach my $noeud ( $xp->find($search_path)->get_nodelist ) {
my @matching_tokens;
$noeud_tmp=$noeud->getParentNode;
$i=0;
foreach (@tokens){
$i++;
$motif=$noeud_tmp->getChildNode(3)->string_value;
$nb_tokens=unshift(@matching_tokens,$motif);
$motif="";
@noeudtmp=$xp->find("./preceding-sibling::element[1]",$noeud_tmp)->get_nodelist;
$noeud_tmp=shift(@noeudtmp);
}
print MATCHFILE join(' ', @matching_tokens)."\n";
}
}