#/usr/bin/perl use XML::XPath; if($#ARGV!=1){print "usage : perl $0 fichier_tag fichier_motif";exit;} my $tag_file= shift @ARGV; my $patterns_file = shift @ARGV; my @patterns; my $nb_patterns=0; my $nb_tokens=0; open(PATTERNSFILE, $patterns_file) or die "can't open $patterns_file: $!\n"; # lecture du fichier contenant les motifs, un motif par ligne (par exemple : NOM ADJ) while ($ligne = ) { chomp($ligne); $nb_patterns = push(@patterns,$ligne); } my $xp = XML::XPath->new( filename => $tag_file ) or die "big trouble"; foreach my $pattern (@patterns){ @tokens=split(/ /,$pattern); my $match_file = "res_extract-".join('_', @tokens).".txt"; open(MATCHFILE, ">$match_file") or die "can't open $match_file: $!\n"; &extract_pattern(@tokens); close(MATCHFILE); } # routine d'extraction d'un motif sub extract_pattern{ @tokenz=@_; $first_token=shift @tokenz; chomp($first_token); $search_path="//element/data[1][contains(text(),\"$first_token\")]"; foreach my $token (@tokenz){ chomp($token); $search_path.="/ancestor::element/following-sibling::element[1]/data[1][contains(text(),\"$token\")]"; } foreach my $noeud ( $xp->find($search_path)->get_nodelist ) { my @matching_tokens; $noeud_tmp=$noeud->getParentNode; $i=0; foreach (@tokens){ $i++; $motif=$noeud_tmp->getChildNode(3)->string_value; $nb_tokens=unshift(@matching_tokens,$motif); $motif=""; @noeudtmp=$xp->find("./preceding-sibling::element[1]",$noeud_tmp)->get_nodelist; $noeud_tmp=shift(@noeudtmp); } print MATCHFILE join(' ', @matching_tokens)."\n"; } }