Projettal 2018

        1	# !/bin/bash
        2	# MODE D'EMPLOI DU PROGRAMME :
        3	# bash programme_tableau.sh NOM_DOSSIER_URL NOM_FICHIER_HTML "motif1|motif2"
        4	# le programme prend 3 arguments :
        5	# - le premier est le nom du dossier contenant les fichiers d'URLs, i.e l'INPUT : $1 par la suite
        6	# - le second est le fichier TABLEAU au format HTML : $2 par la suite
        7	# - le 3e est le motif dans les langues différentes choisies: $3
        8	# les 3 sont fournis dans la ligne de commande
        9	# bash ./PROGRAMMES/programme_tableau.sh ./URLS/ ./TABLEAUX/mariage.html "結婚|婚姻|mariage"
        10	#-------------------------------------------------------------------------------
        11	
        12	# récupération de la valeur des arguments donnés sur le terminal
        13	# c'est pour éviter dans la suite du programme des confusions car on utilise aussi des arguments dans les fonctions.
        14	# elles ne seront utilisées qu'à l'intérieur des fonctions
        15	deuxieme=$2;
        16	troisieme=$3;
        17	
        18	#********************************* FONCTIONS ***********************************
        19	
        20	ecriture_entete_html () {
        21	    echo "<html>" > $deuxieme ;
        22	    echo "<head><title>PREMIERE PAGE</title>
        23	    <meta charset=\"UTF-8\" /></head>" >> $deuxieme ;
        24	    echo "<body>" >> $deuxieme ;
        25	}
        26	ecriture_fin_html () {
        27	    echo "</body>" >> $deuxieme ;
        28	    echo "</html>" >> $deuxieme ;
        29	}
        30	# FONCTIONS POUR L'ECRITURE DES LIGNES DU TABLEAU
        31	ecriture_tableau_debut () {
        32	    echo "<table align=\"center\" border=\"1\">" >> $deuxieme ;
        33	    echo "<caption align=\"center\">TABLEAU N°$compteurtableau</caption>
        34	            <thead><tr bgcolor=\"pink\">
        35	                <th>N°</th>
        36	                <th>Code Http</th>
        37	                <th>URL</th>
        38	                <th>Page Aspirée</th>
        39	                <th>Encodage Entrée</th>
        40	                <th>Encodage Sortie</th>
        41	                <th>Dump</th>
        42	                <th>Contexte</th>
        43	                <th>Contexte HTML</th>
        44	                <th>Fq Motif</th>
        45	                <th>Index</th>
        46	                <th>Bigramme</th>
        47	                <th>Trigramme</th>
        48	            </tr></thead>" >> $deuxieme ;
        49	}
        50	# ecriture lignes tableau
        51	ecriture_tableau_normal () {
        52	    # écriture des lignes du tableau lorsque le traitement a pu être effectué
        53	    # $1 = $ENCODAGE|$ENCODAGEFILE
        54	    # $2 = $ENCODAGE|$ENCODAGEFILE|$ENCODAGESORTIE
        55	    # $3 = fr|zh|jp
        56	    # $4 = -utf8
        57	    echo "<tr>
        58	            <td>$compteur</td>
        59	            <td>$code_sortie</td>
        60	            <td><a target=\"_blank\" href=\"$ligne\">Lien n°$compteur</a></td>
        61	            <td><a target=\"_blank\" href=\"../PAGES-ASPIREES/$3/$compteurtableau-$compteur.html\">Page aspirée n° $compteur</a></td>
        62	            <td>$1</td>
        63	            <td>$2</td>
        64	            <td><a target=\"_blank\" href=\"../DUMP-TEXT/$3/$compteurtableau-$compteur$4.txt\">DUMP  n° $compteur</a></td>
        65	            <td><a target=\"_blank\" href=\"../CONTEXTES/$3/$compteurtableau-$compteur-contexte.txt\">CT $compteurtableau-$compteur</a></td>
        66	            <td><a target=\"_blank\" href=\"../CONTEXTES/$3/$compteurtableau-$compteur.html\">CTh $compteurtableau-$compteur</a></td>
        67	            <td>$nbmotif</td>
        68	            <td><a target=\"_blank\" href=\"../DUMP-TEXT/$3/index-$compteurtableau-$compteur.txt\">Ind $compteurtableau-$compteur</a></td>
        69	            <td><a target=\"_blank\" href=\"../DUMP-TEXT/$3/bigramme-$compteurtableau-$compteur.txt\">Bigr $compteurtableau-$compteur</a></td>
        70	            <td><a target=\"_blank\" href=\"../DUMP-TEXT/$3/trigramme-$compteurtableau-$compteur.txt\">Trigr $compteurtableau-$compteur</a></td>
        71	        </tr>" >> $deuxieme ;
        72	}
        73	ecriture_tableau_pb_encodage () {
        74	    # ligne du tableau en sortie si mauvais lien
        75	    echo "<tr>
        76	            <td>$compteur</td>
        77	            <td>$code_sortie</td>
        78	            <td><a target=\"_blank\" href=\"$ligne\">Lien n°$compteur</a></td>
        79	            <td>-</td>
        80	            <td>$ENCODAGE</td>
        81	            <td>NON CONFORME<br>($ENCODAGEFILE)</td>
        82	            <td>-</td>
        83	            <td>-</td>
        84	            <td>-</td>
        85	            <td>-</td>
        86	            <td>-</td>
        87	            <td>-</td>
        88	            <td>-</td>
        89	        </tr>" >> $deuxieme ;
        90	}
        91	ecriture_tableau_noResult () {
        92	    # ligne du tableau en sortie si mauvais code http
        93	    echo "<tr>
        94	            <td>$compteur</td>
        95	            <td><font color=\"red\"><b>$code_sortie</b></td>
        96	            <td><a target=\"_blank\" href=\"$ligne\">Lien n°$compteur</a></td>
        97	            <td>-</td>
        98	            <td>-</td>
        99	            <td>-</td>
        100	            <td>-</td>
        101	            <td>-</td>
        102	            <td>-</td>
        103	            <td>-</td>
        104	            <td>-</td>
        105	            <td>-</td>
        106	            <td>-</td>
        107	        </tr>" >> $deuxieme ;
        108	}
        109	ecriture_tableau_fin () {
        110	    # <hr /> permet de marquer la séparation des deux tableaux
        111	    echo "</table>" >> $deuxieme ;
        112	    echo "<hr color=\"pink\"/>" >> $deuxieme ;
        113	}
        114	
        115	# FONCTIONS POUR LES MESSAGES EN SORTIE
        116	msg_initial_OK () {
        117	    echo -e "ENCODAGE initial <$ENCODAGE> OK : on passe au traitement \n";
        118	}
        119	msg_traiter_non_UTF8 () {
        120	    echo -e "==> il faut traiter les URLs OK qui ne sont pas a priori en UTF8\n" ;
        121	}
        122	msg_apres_extraction () {
        123	    echo -e "ENCODAGE initial vide. ENCODAGE extrait via file : <$ENCODAGEFILE> \n";
        124	    echo -e "Il faut désormais s'assurer que cet encodage peut être OK ou pas... \n";
        125	}
        126	msg_second_OK () {
        127	    echo -e "ENCODAGE secondaire <$ENCODAGEFILE> OK : on passe au traitement \n";
        128	}
        129	msg_encodage_vide () {
        130	    echo -e "PB....ENCODAGE VIDE "; repere
        131	}
        132	msg_second_connu_de_iconv () {
        133	    echo -e "ENCODAGE secondaire <$ENCODAGEFILE> OK, connu de iconv : on passe au traitement \n";
        134	}
        135	msg_abandon () {
        136	    echo -e "PB....ENCODAGE PAS CONNU DE ICONV -- ABANDON "; repere
        137	}
        138	msg_initial_connu_de_iconv () {
        139	    echo -e "ENCODAGE initial <$ENCODAGE> OK, connu de iconv : on passe au traitement \n";
        140	}
        141	msg_non_pertinent () {
        142	    echo -e "L'encodage initial n'est pas un encodage pertinent...\n";
        143	    echo -e "Utilisation de file pour extraire le charset dans la page aspirée...\n";
        144	}
        145	msg_initial_non_connu_de_iconv () {
        146	    echo -e "ENCODAGE initial non connu de iconv. ENCODAGE extrait via file : $ENCODAGEFILE \n";
        147	    echo -e "Il faut désormais s'assurer que cet encodage peut être OK ou pas... \n";
        148	}
        149	msg_non_trouve () {
        150	    echo -e "PB....ENCODAGE NON TROUVE PAR FILE (VIDE OU NON UTF-8) "; repere
        151	}
        152	msg_pb () {
        153	    echo -e "PB...."; repere
        154	}
        155	
        156	# FONCTIONS POUR LE TRAITEMENT DES PAGES
        157	recherche_encodage ()  {
        158	    ENCODAGE=$(curl -sL "$ligne" | egrep -o "meta.+charset *= *[^>]+" | egrep -o "charset *= *[^>]+" | cut -f2 -d"=" | egrep -o "(\w+|\-)+" | uniq | tr "[a-z]" "[A-Z]" |  tr -d "\n" |  tr -d "\r") ;
        159	}
        160	repere () {
        161	    # Pour se repérer pendant l'exécution du programme
        162	    echo -e "$compteurtableau::$compteur::$code_sortie::$1::$ligne\n";
        163	}
        164	aspiration () {
        165	    # $1 = fr|zh|jp
        166	    curl -sL -o ./PAGES-ASPIREES/$1/$compteurtableau-$compteur.html $ligne;
        167	}
        168	dump_simple () {
        169	    # dump de l'URL
        170	    # $1 = fr|zh|jp
        171	    lynx -dump -nolist $ligne > ./DUMP-TEXT/$1/$compteurtableau-$compteur.txt ;
        172	}
        173	dump_lv2 () {
        174	    # $1 = fr|zh|jp
        175	    # $2 = $ENCODAGE|$ENCODAGEFILE
        176	    lynx --assume-charset="$2" --display-charset="$2" -dump -nolist $ligne > ./DUMP-TEXT/$1/$compteurtableau-$compteur.txt ;
        177	    iconv -f $2 -t utf-8 ./DUMP-TEXT/$1/$compteurtableau-$compteur.txt  > ./DUMP-TEXT/$1/$compteurtableau-$compteur-utf8.txt ;
        178	    ENCODAGESORTIE=$(file -i ./DUMP-TEXT/$1/$compteurtableau-$compteur-utf8.txt | cut -d"=" -f2 | tr '[a-z]' '[A-Z]');
        179	}
        180	contextmotif () {
        181	    # 1. contexte - pour concaténation uniquement
        182	    # $1 = fr|zh|jp
        183	    # $2 = -utf8
        184	
        185	    # copie du contexte des fichiers DUMP-TEXT dans CONTEXTES-CONCAT
        186	    egrep -i "$troisieme" ./DUMP-TEXT/$1/$compteurtableau-$compteur$2.txt > ./CONTEXTES-CONCAT/$1/$compteurtableau-$compteur.txt;
        187	    case $1 in
        188	        zh) bash ../../../stanford-segmenter/segment.sh pku ./CONTEXTES-CONCAT/zh/$compteurtableau-$compteur.txt UTF-8 0 > ./CONTEXTES-CONCAT/zh/$compteurtableau-$compteur-sg.txt ;
        189	        # supprime les fichiers qui n'ont pas '-sg' dans leur nom car on ne veut concaténer que les fichiers segmentés
        190	        rm ./CONTEXTES-CONCAT/zh/*[^-sg].txt;
        191	            ;;
        192	        jp) chasen -j ./CONTEXTES-CONCAT/jp/$compteurtableau-$compteur.txt | cut -f1 > ./CONTEXTES-CONCAT/jp/$compteurtableau-$compteur-sg.txt ;
        193	        rm ./CONTEXTES-CONCAT/jp/*[^-sg].txt;
        194	            ;;
        195	    esac
        196	    # copie des fichiers DUMP-TEXT dans le repertoire DUMP-CONCAT
        197	    case $1 in
        198	        fr) cp ./DUMP-TEXT/fr/$compteurtableau-$compteur$2.txt ./DUMP-CONCAT/fr ;
        199	            ;;
        200	    esac
        201	
        202	    # 2. Fq motif - comptage des occurences dans DUMP
        203	    nbmotif=$(egrep -coi "$troisieme" ./DUMP-TEXT/$1/$compteurtableau-$compteur$2.txt);
        204	
        205	    # 3. contexte html
        206	    perl ./minigrep/minigrepmultilingue.pl "utf-8" ./DUMP-TEXT/$1/$compteurtableau-$compteur$2.txt ./minigrep/parametre-motif-$1.txt ;
        207	    mv resultat-extraction.html ./CONTEXTES/$1/$compteurtableau-$compteur.html ;
        208	}
        209	extraction_encodage2 () {
        210	    # extraction de l'encodage avec la commande file
        211	    # $1 = fr|zh|jp
        212	    ENCODAGEFILE=$(file -i ./PAGES-ASPIREES/$1/$compteurtableau-$compteur.html | cut -d"=" -f2 | tr '[a-z]' '[A-Z]');
        213	}
        214	indexfr () {
        215	    # 4. index hierarchique pour le français
        216	    # $1 = -utf8
        217	    egrep -o "\w+" ./DUMP-TEXT/fr/$compteurtableau-$compteur$1.txt | sort | uniq -c | sort -r > ./DUMP-TEXT/fr/index-$compteurtableau-$compteur.txt ;
        218	}
        219	indexzh () {
        220	    # 4. index hierarchique pour le chinois
        221	    # $1 = -utf8
        222	    bash ../../../stanford-segmenter/segment.sh pku ./DUMP-TEXT/zh/$compteurtableau-$compteur$1.txt UTF-8 0 > ./DUMP-TEXT/zh/$compteurtableau-$compteur-sg.txt ;
        223	    egrep -o "\w+" ./DUMP-TEXT/zh/$compteurtableau-$compteur-sg.txt | sort | uniq -c | sort -r > ./DUMP-TEXT/zh/index-$compteurtableau-$compteur.txt ;
        224	
        225	    # copie des fichiers DUMP-TEXT dans le repertoire DUMP-CONCAT
        226	    cp ./DUMP-TEXT/zh/$compteurtableau-$compteur-sg.txt ./DUMP-CONCAT/zh ;
        227	}
        228	indexjp () {
        229	    # 4. index hierarchique pour le japonais
        230	    # $1 = -utf8
        231	    chasen -j ./DUMP-TEXT/jp/$compteurtableau-$compteur$1.txt | cut -f1 > ./DUMP-TEXT/jp/$compteurtableau-$compteur-sg.txt ;
        232	    egrep -o "\w+" ./DUMP-TEXT/jp/$compteurtableau-$compteur-sg.txt | sort | uniq -c | sort -r > ./DUMP-TEXT/jp/index-$compteurtableau-$compteur.txt ;
        233	
        234	    # copie des fichiers DUMP-TEXT dans le repertoire DUMP-CONCAT
        235	    cp ./DUMP-TEXT/jp/$compteurtableau-$compteur-sg.txt ./DUMP-CONCAT/jp ;
        236	}
        237	ngram () {
        238	    # $1 = fr|zh|jp
        239	    # $2 = -utf8|-sg
        240	
        241	    # 5.a. unigramme
        242	    egrep -o "\w+" ./DUMP-TEXT/$1/$compteurtableau-$compteur$2.txt > bi1.txt;
        243	
        244	    # 5.b. bigramme
        245	    tail -n +2 bi1.txt > bi2.txt ;
        246	    paste bi1.txt bi2.txt > bi3.txt ;
        247	    cat bi3.txt | sort | uniq -c | sort -r >  ./DUMP-TEXT/$1/bigramme-$compteurtableau-$compteur.txt ;
        248	
        249	    # 6. trigramme
        250	    tail -n +3 bi1.txt > bi3.txt ;
        251	    paste bi1.txt bi2.txt bi3.txt > bi4.txt;
        252	    cat bi4.txt | sort | uniq -c | sort -r >  ./DUMP-TEXT/$1/trigramme-$compteurtableau-$compteur.txt ;
        253	}
        254	contextextract () {
        255	    #extrait le contexte - facilite la lecture du contexte
        256	    # $1 = fr|zh|jp
        257	    # $2 = -utf8
        258	    egrep -C 1 "$troisieme" ./DUMP-TEXT/$1/$compteurtableau-$compteur$2.txt > ./CONTEXTES/$1/$compteurtableau-$compteur-contexte.txt;
        259	}
        260	concatenation () {
        261	    # concatène les fichiers contextes
        262	    # $1 = fr|zh|jp
        263	    for fichier in $(ls ./CONTEXTES-CONCAT/$1/ | egrep "\.txt")
        264	        do
        265	            echo -e "<fichier=\"$fichier\">\n" >> ./CONCAT/$1/contexte-concat-$1.txt;
        266	            cat ./CONTEXTES-CONCAT/$1/$fichier >> ./CONCAT/$1/contexte-concat-$1.txt;
        267	            echo -e "\n</fichier>\n" >> ./CONCAT/$1/contexte-concat-$1.txt;
        268	        done
        269	
        270	    # concatène les fichiers dans DUMP
        271	    for fichier in $(ls ./DUMP-CONCAT/$1/ | egrep "\.txt")
        272	        do
        273	            echo -e "<fichier=\"$fichier\">\n" >> ./CONCAT/$1/dump-concat-$1.txt;
        274	            cat ./DUMP-CONCAT/$1/$fichier >> ./CONCAT/$1/dump-concat-$1.txt;
        275	            echo -e "\n</fichier>\n" >> ./CONCAT/$1/dump-concat-$1.txt;
        276	        done
        277	}
        278	selection_langue_fichier () {
        279	    # choisi selon la langue des fichiers
        280	    # $1 = fonction appelée avec ses éventuels paramètres
        281	    # $2 = -utf8 (pour le tableau normal)
        282	    case $fichier in
        283	        *zh.txt) $1 zh $2;
        284	            ;;
        285	        *fr.txt) $1 fr $2;
        286	            ;;
        287	        *jp.txt) $1 jp $2;
        288	            ;;
        289	    esac
        290	}
        291	# FONCTIONS QUI UTILISENT LES FONCTIONS CI-DESSUS
        292	# LV1
        293	preparation_simplefr () {
        294	    # toutes les étapes pour les fichiers fr
        295	    # fr est un argument pour changer le nom du fichier dans la fonction
        296	    dump_simple fr
        297	    contextmotif fr
        298	    indexfr
        299	    ngram fr
        300	    contextextract fr
        301	}
        302	preparation_simplezh () {
        303	    # toutes les étapes pour les fichiers zh
        304	    dump_simple zh
        305	    contextmotif zh
        306	    indexzh
        307	    ngram zh -sg
        308	    contextextract zh
        309	}
        310	preparation_simplejp () {
        311	    # toutes les étapes pour les fichiers jp
        312	    dump_simple jp
        313	    contextmotif jp
        314	    indexjp
        315	    ngram jp -sg
        316	    contextextract jp
        317	}
        318	preparation_lv2fr () {
        319	    # toutes les étapes pour les fichiers fr
        320	    # $1 = $ENCODAGE|$ENCODAGEFILE
        321	    dump_lv2 fr $1
        322	    contextmotif fr -utf8
        323	    indexfr -utf8
        324	    ngram fr -utf8
        325	    contextextract fr -utf8
        326	}
        327	preparation_lv2zh () {
        328	    # toutes les étapes pour les fichiers zh
        329	    # $1 = $ENCODAGE|$ENCODAGEFILE
        330	    dump_lv2 zh $1
        331	    contextmotif zh -utf8
        332	    indexzh -utf8
        333	    ngram zh -sg
        334	    contextextract zh -utf8
        335	}
        336	preparation_lv2jp () {
        337	    # toutes les étapes pour les fichiers jp
        338	    # $1 = $ENCODAGE|$ENCODAGEFILE
        339	    dump_lv2 jp $1
        340	    contextmotif jp -utf8
        341	    indexjp -utf8
        342	    ngram jp -sg
        343	    contextextract jp -utf8
        344	}
        345	# LV2
        346	selection_preparation_simple () {
        347	    # traitement selon la langue des URLS pour les fichiers contenant ces URLS
        348	    case $fichier in
        349	        *zh.txt) preparation_simplezh
        350	            ;;
        351	        *fr.txt) preparation_simplefr
        352	            ;;
        353	        *jp.txt) preparation_simplejp
        354	            ;;
        355	    esac
        356	}
        357	selection_preparation_lv2 () {
        358	    # traitement selon la langue des URLS pour les fichiers contenant ces URLS
        359	    # $1 = $ENCODAGE|$ENCODAGEFILE
        360	    case $fichier in
        361	        *zh.txt) preparation_lv2zh $1
        362	            ;;
        363	        *fr.txt) preparation_lv2fr $1
        364	            ;;
        365	        *jp.txt) preparation_lv2jp $1
        366	            ;;
        367	    esac
        368	}
        369	# LV3
        370	traitement_simple_A () {
        371	    selection_langue_fichier aspiration
        372	    selection_preparation_simple
        373	
        374	    # ecriture ligne tableau
        375	    selection_langue_fichier "ecriture_tableau_normal $ENCODAGE $ENCODAGE"
        376	}
        377	recherche_encodage_lv2 ()  {
        378	    selection_langue_fichier aspiration
        379	    selection_langue_fichier extraction_encodage2
        380	}
        381	traitement_simple_B () {
        382	    selection_preparation_simple
        383	
        384	    # ecriture ligne tableau
        385	    selection_langue_fichier "ecriture_tableau_normal $ENCODAGE $ENCODAGEFILE"
        386	}
        387	traitement_lv2_A () {
        388	    selection_preparation_lv2 $ENCODAGEFILE
        389	
        390	    # ecriture ligne tableau
        391	    selection_langue_fichier "ecriture_tableau_normal $ENCODAGEFILE $ENCODAGESORTIE" -utf8
        392	}
        393	traitement_lv2_B () {
        394	    selection_langue_fichier aspiration
        395	    selection_preparation_lv2 $ENCODAGE
        396	
        397	    # ecriture ligne tableau
        398	    selection_langue_fichier "ecriture_tableau_normal $ENCODAGE $ENCODAGESORTIE" -utf8
        399	}
        400	# LV4
        401	condition_non_utf8 () {
        402	    reponse=$(iconv -l | grep $ENCODAGEFILE) ;
        403	    if [[ $ENCODAGEFILE == "" ]]
        404	        then
        405	            msg_encodage_vide
        406	            ecriture_tableau_pb_encodage
        407	        elif [[ $reponse != "" ]]
        408	            then
        409	                msg_second_connu_de_iconv
        410	                # page déjà aspirée
        411	                traitement_lv2_A
        412	        else
        413	            msg_abandon
        414	            ecriture_tableau_pb_encodage
        415	        fi
        416	}
        417	condition_utf8 () {
        418	    case $ENCODAGE in
        419	        "UTF-8") msg_initial_OK
        420	                traitement_simple_A
        421	            ;;
        422	        *)  condition_non_utf8_lv2
        423	            ;;
        424	    esac
        425	}
        426	condition_utf8_lv2 () {
        427	    # $1 = msg_non_trouve|condition_non_utf8
        428	    # $2 = ecriture_tableau_pb_encodage
        429	    case $ENCODAGEFILE in
        430	        "UTF-8") msg_second_OK
        431	                traitement_simple_B
        432	            ;;
        433	        *)  $1
        434	            $2
        435	            ;;
        436	    esac
        437	}
        438	condition_non_vide_non_utf8 () {
        439	    # ici est renvoyé quelque chose de non vide, mais c'est pas UTF-8
        440	    # si c'est connu de iconv
        441	    reponse=$(iconv -l | grep $ENCODAGE) ;
        442	    if [[ $reponse != "" ]]
        443	        then
        444	            msg_initial_connu_de_iconv
        445	            traitement_lv2_B
        446	        else
        447	            msg_non_pertinent
        448	            # chercher encodage de la page en appliquant la commande file sur la page aspirée
        449	            recherche_encodage_lv2
        450	            msg_initial_non_connu_de_iconv
        451	
        452	            condition_utf8_lv2 msg_non_trouve ecriture_tableau_pb_encodage
        453	        fi
        454	}
        455	condition_non_utf8_lv2 () {
        456	    msg_traiter_non_UTF8
        457	    case $ENCODAGE in
        458	        # le tiret '-' est ajouté pour qu'il n'y ait pas de décalage en sortie dans la ligne du tableau
        459	        "") ENCODAGE="-";
        460	        # chercher encodage de la page en appliquant la commande file sur la page aspirée
        461	        recherche_encodage_lv2
        462	        msg_apres_extraction
        463	        condition_utf8_lv2 condition_non_utf8
        464	            ;;
        465	        *)  condition_non_vide_non_utf8
        466	        # sinon c'est fini
        467	            ;;
        468	    esac
        469	}
        470	# LV5
        471	traitement_global () {
        472	    code_sortie=$(curl -s -L -o tmp.txt -w "%{http_code}" $ligne) ; # on demande à curl de générer un fichier temporaire
        473	    case $code_sortie in
        474	        200)
        475	        # URL OK
        476	        # recherche de l'encodage de la page en cours
        477	        recherche_encodage
        478	
        479	        # affiche en sortie la ligne à laquelle on se trouve
        480	        repere $ENCODAGE
        481	
        482	        condition_utf8
        483	            ;;
        484	        *)
        485	        # URL "pourrie", problème de connexion http
        486	        msg_pb
        487	        ecriture_tableau_noResult
        488	            ;;
        489	    esac
        490	    compteur=$((compteur + 1));
        491	    echo -e "_____________________________________________________________________\n";
        492	}
        493	#**************************** PROGRAMME PRINCIPAL ******************************
        494	
        495	# Phase 1 : ECRITURE ENTETE FICHIER HTML
        496	ecriture_entete_html
        497	#-------------------------------------------------------------------------------
        498	# compteurtableau compte les tableaux
        499	compteurtableau=1;
        500	# Phase 2 : TRAITEMENT DE CHAQUE FICHIER D'URLS
        501	for fichier in $(ls $1)
        502	    do
        503	        # ajout du titre et du nom des colonnes du tableau
        504	        ecriture_tableau_debut
        505	
        506	        # Phase 3 : TRAITEMENT DE CHAQUE LIGNE DU FICHIER D'URLS EN COURS
        507	        # ==> ECRITURE d'une ligne dans le tableau HTML
        508	        compteur=1;
        509	        # compteur sert à compter les urls et à nommer les fichiers de sortie.
        510	        for ligne in $(cat $1/$fichier)
        511	            do
        512	                traitement_global
        513	            done
        514	        #-----------------------------------------------------------------------------------------
        515	        # concaténation des fichiers contextes et dump pour l'analyse dans minigrep
        516	        selection_langue_fichier concatenation
        517	        #----------------------
        518	        ecriture_tableau_fin
        519	        compteurtableau=$((compteurtableau + 1))
        520	    done
        521	#-------------------------------------------------------------------------------
        522	# Phase 4 : ECRITURE FIN DE FICHIER HTML
        523	ecriture_fin_html
        524	#-------------------------------------------------------------------------------
        525	
        526	# c'est fini
        527	exit;