Le Script d'extraction des patrons

#!/usr/bin/perl

use strict;
use warnings;
use Unicode::String qw(utf8);
use utf8;
use File::Basename;

if ( @ARGV < 2 )
{
	die utf8("Usage: $0 <fichier de patrons> <texte étiquetés> <dossier de sortie>\n\n");
}

my $fpatrons = $ARGV[0];
my $ftexte = $ARGV[1];
my $outfolder = $ARGV[2];
$outfolder =~ s/[\/]$//;
my @patterns =  ();
my @cats = ();
my @words = ();

my $nahm = basename($ftexte);
$nahm =~ s/tagged-//;
$nahm =~ s/\.txt$//;

if (! -e $outfolder)
	{
		mkdir($outfolder) or die ("Problème avec la création du répertoire de $outfolder");
	}					

open(PAT,"<:encoding(UTF-8)",$fpatrons);

while (my $ligne=<PAT>)
	{
		push(@patterns,$ligne);
	} 

close(PAT);

open(FILE,"<:encoding(UTF-8)",$ftexte);

while(my ($mot,$cat,$lemme)=split(/[\t ]+/,<FILE>) )
{
	push(@cats,$cat);
	push(@words,$mot);
}

close(FILE);

foreach my $pattern (@patterns)
{
	my $output = "";
	chomp($pattern);
	my @pat = split(/[\t ]+/,$pattern);
	print "$pattern\n";
	my $lenpat = @pat;
	my $lencat = @cats;
	
	if(@pat != 0)
	{
		foreach my $index (0..($lencat - $lenpat))
		{
			my $pwte = "";
			my $i = 0; 
			my $temp = $pat[$i];
			my $truepattern = "";
			while (($i < $lenpat) && ($cats[$i+$index]=~ /$temp/))
			{
				$truepattern .= " ".$cats[$i+$index];
				$pwte .= " ".$words[$i+$index];
				$i++;
				$temp = $pat[$i];
			}
			#print "$cats[$i+$index]".$pat[$i]."\n";
			
			if($i == $lenpat)
			{
				#print utf8("$truepattern : $pwte\n");
				$output .= $pwte."\n";	
			}		
		}
	}
	my $name = $outfolder."/".$nahm."-".join("-",@pat).".txt";
	open(FILE,">:encoding(UTF-8)",$name);
	print FILE $output;
	close(FILE);
}