#!/usr/bin/perl use strict; use warnings; use Unicode::String qw(utf8); use utf8; use File::Basename; if ( @ARGV < 2 ) { die utf8("Usage: $0 <fichier de patrons> <texte étiquetés> <dossier de sortie>\n\n"); } my $fpatrons = $ARGV[0]; my $ftexte = $ARGV[1]; my $outfolder = $ARGV[2]; $outfolder =~ s/[\/]$//; my @patterns = (); my @cats = (); my @words = (); my $nahm = basename($ftexte); $nahm =~ s/tagged-//; $nahm =~ s/\.txt$//; if (! -e $outfolder) { mkdir($outfolder) or die ("Problème avec la création du répertoire de $outfolder"); } open(PAT,"<:encoding(UTF-8)",$fpatrons); while (my $ligne=<PAT>) { push(@patterns,$ligne); } close(PAT); open(FILE,"<:encoding(UTF-8)",$ftexte); while(my ($mot,$cat,$lemme)=split(/[\t ]+/,<FILE>) ) { push(@cats,$cat); push(@words,$mot); } close(FILE); foreach my $pattern (@patterns) { my $output = ""; chomp($pattern); my @pat = split(/[\t ]+/,$pattern); print "$pattern\n"; my $lenpat = @pat; my $lencat = @cats; if(@pat != 0) { foreach my $index (0..($lencat - $lenpat)) { my $pwte = ""; my $i = 0; my $temp = $pat[$i]; my $truepattern = ""; while (($i < $lenpat) && ($cats[$i+$index]=~ /$temp/)) { $truepattern .= " ".$cats[$i+$index]; $pwte .= " ".$words[$i+$index]; $i++; $temp = $pat[$i]; } #print "$cats[$i+$index]".$pat[$i]."\n"; if($i == $lenpat) { #print utf8("$truepattern : $pwte\n"); $output .= $pwte."\n"; } } } my $name = $outfolder."/".$nahm."-".join("-",@pat).".txt"; open(FILE,">:encoding(UTF-8)",$name); print FILE $output; close(FILE); }