from typing import List import re import sys # python code.py fichier.xml patron def extract(corpus_file: str, patron: List[str]): buf = [("---", "---")] * len(patron) with open(corpus_file, encoding='utf-8') as corpus: for line in corpus: buf.pop(0) match = re.match('[^<]+?([^<]+?)[^<]+?([^<]+?)(:\w+)?[^<]+?[^<]+?[^<]+?[^<]+?[^<]+?[^<]+?', line) if match: tag = match.group(2) forme = match.group(1) buf.append((tag, forme)) else: buf = [("---", "---")] * len(patron) ok = True terme = "" for i, gat in enumerate(patron): if gat == buf[i][0]: terme = terme + buf[i][1] + f"/{gat} " else: ok = False if ok: print(terme) if __name__ == "__main__": corpus_file = sys.argv[1] patron = sys.argv[2:] extract(corpus_file, patron)