bao3()

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#

def python():

Patrons


#Fonction renvoyant une liste de patrons (eux aussi sous forme de liste), lorsqu'elle reçoit en entrée des patrons séparés par un "/"
def patrons_maker(liste) :
		patron=""
		
		patrons={}
		
		
		for pos in liste:
			if pos!="/":
				patron+=f'{pos} '
			else:
				patron=patron[:-1]
				patrons[patron]=[x for x in patron.split(' ')]
				patron=""
				
		patron=patron[:-1]
		
		patrons[patron]=[x for x in patron.split(' ')]
		
		return patrons


#Fonction parcourant un document et renvoyant un dictionnaire de patrons, en entrée elle prend un fichier et une liste de patrons. En sortie elle donne un dictionnaire.
def patrons_finder(fichier,liste,parser):
		patrons=patrons_maker(liste)
		#dictionnaire des longueurs de patrons, nécessaire pour faire des buffers pour les patrons en même temps
		len_patrons={p: len(patrons[p]) for p in patrons.keys()}
		dico_patrons={}
		#initialisations selon les deux parsers
		if parser=="ud":
			#Cette regex permet de séparer sur une ligne les différents niveaux d'annotation par UDpipe
			reg=re.compile("(.*?)(\t|\n)")
			pos_p=2
			forme_p=0
		elif parser == "tt":
			#Cette regex permet de séparer sur une ligne les différents niveaux d'annotation par TreeTagger
			reg=re.compile("<data.*?>(.*?)</data>")
			pos_p=0
			forme_p=2
		else:
			print('Je ne connais pas ce parser')
			return False
		
		#initialisation du dictionnaire final, qui contiendra un dictionnaire par patron
		for patron in patrons.keys():
			dico_patrons[patron]={}
			
		with open(fichier,"r",encoding="UTF-8") as f:
			l=f.readline()
			pos=[]
			forme=[]
			while l:
				matches=re.findall(reg,l)
				if matches:
					if parser=='tt':
						#le split était là pour les cas dans lesquels l'annotation pourrait être de type "DET:art"
						pos.append((matches[pos_p][0].split(":")[0]))
						forme.append(matches[forme_p])
					else:
						pos.append((matches[pos_p][0].split(":")[0]))
						forme.append(matches[forme_p][0])
				#Boucle de remplissage des dictionnaire, à chaque tour de boucle(/ligne du document), chaque buffer se produit
				for patron in patrons.keys():
					#pos est la liste des POS de tout le document qui se remplit au fur et à mesure du parcours, cette condition permet de commencer la recherche d'un patron uniquement lorsque l'on est assez avancé
					if len(pos)>=len_patrons[patron]:
						t=len_patrons[patron]
						#Création d'une liste de la taille de notre patron, regardant les t dernières pos ajoutées à la liste
						buf_pos=[pos[-n] for n in range(t,0,-1)]
						#Si ces pos sont les même que celles du patron, on ajoute les formes respectives en clé à notre dictionnaire de comptage du patron en question
						if buf_pos==patrons[patron]:
							buf_forme=" ".join([forme[-n] for n in range(t,0,-1)])
							dico_patrons[patron][buf_forme]=dico_patrons[patron].get(buf_forme,0)+1
						
				l=f.readline()
		return dico_patrons


def bao3(fichier, liste, parser):
	dic=patrons_finder(fichier,liste,parser)
	ecriture(dic, parser)
	return True

if __name__ == "__main__":
	fichier=sys.argv[1]
	parser=sys.argv[2]
	liste_patrons=sys.argv[3:]
	
	bao3(fichier, liste_patrons, parser)


python3 bao3.py analyse_BAO2.xml tt LISTE_PATRONS
python3 bao3.py analyse_BAO2.txt ud LISTE_PATRONS

LISTE_PATRONS : PATRON / PATRON / ...

***********

return script, résultat

Dépendance


#On parcourt le document phrase par phrase, créant un buffer
sent_buf = {} 
	obj_buf = []
	#on initialise les couples comme ensemble pour ne pas les répéter
	couples = set()
	for line in Path(fic).read_text().split("\n"):
		if line.startswith("<item>"):
			#initialisation de tous les niveaux d'annotation du document
			fields = re.findall("<a>([^<]+)</a>", line)
			idx, word, lemma, tag, _, _, head, rel, _, _ = fields
			#On ajoute dans notre buffer la clé de la position d'un mot dans la phrase, on lui donne la valeur de son lemme
			sent_buf[idx] = lemma
			#On s'intéresse à la relation obj
			if rel == 'obj':
				#On ajoute à notre liste de buffer de relation un tuple comprenant le lemme de l'objet actuel et la position de son gouverneur
				obj_buf.append((lemma, head))
		if line == "</p>":
		#Une fois la phrase finie on ajoute notre couple à noter ensemble
			for obj_lemma, head in obj_buf:
				#print(sent_buf[head], "--[obj]-->", obj_lemma)
				couples.add((f"{sent_buf[head]}", f"{obj_lemma}"))
			obj_buf = []
			sent_buf = {}
	#formatage pour padagraph
	print("@V: #src, %N")
	for src, tgt in couples:
		print(f"{src},{tgt}")

python3 bao3_rel.py sortieudpipe-slurp_3208-2021.xml

***********

return script, résultat

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#

sub perl {

Patrons


if ($parser=~/tt/) {
	while (my $ligne=shift @LISTE) {
		
		# si la ligne contenue dans $ligne correspond au premier du patron $PATRON[0]
		my $terme="";
		
		if ($ligne=~/<element><data type="type">$PATRON[0].*<\/data><data type="lemma">[^<]+?<\/data><data type="string">([^<]+?)<\/data><\/element>/) {
		$terme=$terme.$1;
			my $longueur=1;
			my $indice=1;
			# alors il faut que je lise autant de ligne qu'il y a dans le patron et tester chaque terme du patron...
			while (($LISTE[$indice-1]=~/<element><data type="type">($PATRON[$indice]).*<\/data><data type="lemma">[^<]+?<\/data><data type="string">([^<]+?)<\/data><\/element>/) and ($indice <= $#PATRON)) {
				$indice++;
				$terme.=" ".$2;
				$longueur++;
			}
			if ($longueur == $#PATRON + 1) {
				$dicoPatron{$terme}++;
				$nbTerme++;
		}
	}	
}


while (($LISTE[$indice-1]=~/.*?\t(.*?)\t.*?\t$PATRON[$indice].*/) and ($indice <= $#PATRON))
	open my $fileResu,">:encoding(UTF-8)","perl.txt";

	print $fileResu "$nbTerme éléments trouvés\n";
	foreach my $patron (sort {$dicoPatron{$b} <=> $dicoPatron{$a} } keys %dicoPatron) {
		print $fileResu "$dicoPatron{$patron}\t$patron\n";
	}
close($fileResu);</code></pre>
		
		<pre><code class="language-shell">
			
perl bao3.pl corpus-titre-description.txt ud PATRON

perl bao3.pl corpus-titre-description.xml tt PATRON

return $script, $résultat

Dépendance


<!-- Pour rappel, le format du fichier -->
$lt;item$gt;$lt;a$gt;1$lt;/a><a>Football</a><a>football</a><a>NOUN</a><a>_</a><a>Gender=Masc|Number=Sing</a><a>7</a><a>nsubj</a><a>_</a><a>SpacesAfter=

for (my $i=0;$i<=$#LIGNES;$i++)
		
		if ($LIGNES[$i]=~/<item><a>([^<]+)<\/a><a>([^<]+)<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>([^<]+)<\/a><a>[^<]*$relation[^<]*<\/a><a>[^<]+<\/a><a>[^<]+<\/a><\/item>/i) {
				my $posDep=$1;
				my $posGouv=$3;
				my $formeDep=$2;
		#on différencie le cas où le gouverneur se situe avant où après le dépendant, pour ne pas avoir à parcourir toute la phrase
		if ($posDep > $posGouv) {
					for (my $k=0;$k<$i;$k++) {
						if ($LIGNES[$k]=~/<item><a>$posGouv<\/a><a>([^<]+)<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>[^<]+<\/a><a>[^<]+<\/a><\/item>/) {
							my $formeGouv=$1;
							$dicoRelation{"$formeGouv $formeDep"}++;</code></pre>
							
		
		</code></pre>for (my $k=$i+1;$k<=$#LIGNES;$k++)

perl bao3_rel.pl sortieudpipe-slurp_3208-2021.xml obj

return $script, $résultat

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#

<langage nom="XSLT">

Patrons


<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    exclude-result-prefixes="xs"
    version="1.0">
    <xsl:output method="text"/>
    
    <xsl:template match="/">
        
        <xsl:apply-templates select="corpus" mode="ADJ-NOM"/>
        <xsl:apply-templates select="corpus" mode="NOM-ADJ"/>
        <xsl:apply-templates select="corpus" mode="NOM-ADJ-NUM"/>
        <xsl:apply-templates select="corpus" mode="NOM-PREP-NOM-PREP"/>
        <xsl:apply-templates select="corpus" mode="VERBE-DET-NOM"/>
        <xsl:apply-templates select="corpus" mode="VERBE-PREP-NOM"/>
    </xsl:template>
    <xsl:template match="corpus" mode="ADJ-NOM">
        <xsl-text>ADJ-NOM

</xsl-text>
        <xsl:for-each select="element">
            <xsl:if test="data[1]='ADJ' and following-sibling::element[1]/data[1]='NOUN'">
                <xsl:value-of select="data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[1]/data[3]"/><xsl:text> 
</xsl:text>
            </xsl:if>
        </xsl:for-each>
    </xsl:template>
    <xsl:template match="corpus" mode="NOM-ADJ">
        <xsl-text>NOM-ADJ
        
</xsl-text>
        <xsl:for-each select="element">
            <xsl:if test="data[1]='NOUN' and following-sibling::element[1]/data[1]='ADJ'">
                <xsl:value-of select="data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[1]/data[3]"/><xsl:text> 
</xsl:text>
            </xsl:if>
        </xsl:for-each>
    </xsl:template>
    <xsl:template match="corpus" mode="NOM-ADJ-NUM">
        <xsl-text>NOM-ADJ-NUM
        
</xsl-text>
        <xsl:for-each select="element">
            <xsl:if test="data[1]='NOUN' and following-sibling::element[1]/data[1]='ADJ' and following-sibling::element[2]/data[1]='NUM'">
                <xsl:value-of select="data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[1]/data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[2]/data[3]"/><xsl:text> 
</xsl:text>
            </xsl:if>
        </xsl:for-each>
    </xsl:template>
    <xsl:template match="corpus" mode="NOM-PREP-NOM-PREP">
        <xsl-text>NOM-PREP-NOM-PREP
        
</xsl-text>
        <xsl:for-each select="element">
            <xsl:if test="data[1]='NOUN' and following-sibling::element[1]/data[1]='ADP' and following-sibling::element[2]/data[1]='NOUN' and following-sibling::element[3]/data[1]='ADP'">
                <xsl:value-of select="data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[1]/data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[2]/data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[3]/data[3]"/><xsl:text> 
</xsl:text>
            </xsl:if>
        </xsl:for-each>
    </xsl:template>
    <xsl:template match="corpus" mode="VERBE-DET-NOM">
        <xsl-text>VERBE-DET-NOM
        
</xsl-text>
        <xsl:for-each select="element">
            <xsl:if test="data[1]='VERB' and following-sibling::element[1]/data[1]='DET' and following-sibling::element[2]/data[1]='NOUN'">
                <xsl:value-of select="data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[1]/data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[2]/data[3]"/><xsl:text> 
</xsl:text>
            </xsl:if>
        </xsl:for-each>
    </xsl:template>
    <xsl:template match="corpus" mode="VERBE-PREP-NOM">
        <xsl-text>VERBE-PREP-NOM
        
</xsl-text>
        <xsl:for-each select="element">
            <xsl:if test="data[1]='VERB' and following-sibling::element[1]/data[1]='ADP' and following-sibling::element[2]/data[1]='NOUN'">
                <xsl:value-of select="data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[1]/data[3]"/><xsl:text> </xsl:text><xsl:value-of select="following-sibling::element[2]/data[3]"/><xsl:text> 
</xsl:text>
            </xsl:if>
        </xsl:for-each>
    </xsl:template>
    
</xsl:stylesheet>

<xsl:value-of select="script"/>
<xsl:value-of select="résultat"/>


	

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

<xsl:param  name="Relation">obj</xsl:param>

<xsl:output method="text" encoding="utf-8"/>
<xsl:template match="/">
<xsl:apply-templates select=".//p"/>
</xsl:template>


<xsl:template match="p">
<xsl:for-each select="item">

	<xsl:if test="contains(./a[8]/text(),$Relation)">
		<xsl:variable name="p1" select="./a[2]/text()"/>
		<xsl:variable name="positionCible" select="./a[7]/text()"/>
		<xsl:variable name="positionSource" select="./a[1]/text()"/>
		<xsl:choose>
			<xsl:when test="$positionCible < $positionSource">
				<xsl:variable name="p2" select="preceding-sibling::item[a[1]=$positionCible]/a[2]/text()"/>    
				<xsl:value-of select="$p2"/><xsl:text> </xsl:text><xsl:value-of select="$p1"/><xsl:text>
</xsl:text>
			</xsl:when>
			<xsl:otherwise>
				<xsl:variable name="p2" select="following-sibling::item[a[1]=$positionCible]/a[2]/text()"/>    
				<xsl:value-of select="$p2"/><xsl:text> </xsl:text><xsl:value-of select="$p1"/><xsl:text>
</xsl:text>
			</xsl:otherwise>
		</xsl:choose>
	
	
	</xsl:if>


</xsl:for-each>
</xsl:template>


</xsl:stylesheet>

<xsl:value-of select="script"/>
<xsl:value-of select="résultat"/>

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#

<langage nom="XQuery">

<extraction objet="patrons morphosyntaxiques"> {

	
	
declare variable $count as xs:integer := 0;
	for $art in collection("TALISMANE-3208-2018")//p

	for $elt in $art/item
	let $conc2 :=
		if (($elt/a[4]/text()="NC") and ($elt/following-sibling::item[1]/a[4]/text()="P") and ($elt/following-sibling::item[2]/a[4]/text()="NC") and ($elt/following-sibling::item[3]/a[4]/text()="P") and ($elt/following-sibling::item[4]/a[4]/text()="NC")) then (
		concat($elt/a[2]/text()," ",$elt/following-sibling::item[1]/a[2]/text()," ",$elt/following-sibling::item[2]/a[2]/text()," ",$elt/following-sibling::item[3]/a[2]/text()," ",$elt/following-sibling::item[4]/a[2]/text())
		)
		else if (($elt/a[4]/text()="NC") and ($elt/following-sibling::item[1]/a[4]/text()="ADJ")) then (
			concat($elt/a[2]/text()," ",$elt/following-sibling::item[1]/a[2]/text())
		)
		else if (($elt/a[4]/text()="ADJ") and ($elt/following-sibling::item[1]/a[4]/text()="NC")) then (
			concat($elt/a[2]/text()," ",$elt/following-sibling::item[1]/a[2]/text())
		)
		else if (($elt/a[4]/text()="V") and ($elt/following-sibling::item[1]/a[4]/text()="DET") and ($elt/following-sibling::item[2]/a[4]/text()="NC")) then (
			concat($elt/a[2]/text()," ",$elt/following-sibling::item[1]/a[2]/text()," ",$elt/following-sibling::item[2]/a[2]/text())
		)
		else (
		"
"
		)
	where $conc2 != "
"
	group by $g:= $conc2
	order by count($conc2) descending 
	return string-join(($g,count($conc2)), "	")

return concat($script,$résultat)

}
</extraction>

<extraction objet="relations de dépendance"> {

	
for $item in collection("udpipe3208-2020")//item
	where contains($item/a[8]/text(),'obj')
	let $depforme:=$item/a[2]/text()
	let $positionSource:=$item/a[1]
	let $positionCible:=$item/a[7]
	let $noeudC:=
		if (number($positionCible) < number($positionSource)) then (
			$item/preceding-sibling::item[number(a[1])=number($positionCible)]/a[2]/text()
		)
		else (
			$item/following-sibling::item[number(a[1])=number($positionCible)]/a[2]/text()
		)
	let $preresu:= string-join(($noeudC,$depforme)," ")
	group by $g:=$preresu
	order by count($preresu) descending
	return string-join(($g,count($preresu)),"	")

return concat($script,$résultat)

}
</extraction>

def ProjetEncadre2(

nom="Extraction de patrons morphosyntaxiques et relations de dépendance"

def bao3():

def python():

sub perl {

<langage nom="XSLT">

<langage nom="XQuery">

nom="Extraction de patrons morphosyntaxiques et relations de dépendance" def bao3():

def python():

sub perl {

<langage nom="XSLT">

<langage nom="XQuery">

nom="Extraction de patrons morphosyntaxiques et relations de dépendance"

def bao3():