Spaces:

temp-late
/

rhyme-ai

Runtime error

App Files Files Community

Camille commited on Mar 14, 2022

Commit

43b8437

•

1 Parent(s): 8eb8892

find_rhyme_french

Browse files

Files changed (4) hide show

app.py +1 -1
rhyme_with_ai/noms-lexique.org.txt +0 -0
rhyme_with_ai/rhyme.py +7 -0
rhyme_with_ai/utils.py +64 -1

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ elif LANGUAGE == "french":
     MODEL_PATH = "camembert-base"
     ITER_FACTOR = 5
 else:
-    raise NotImplementedError(f"Unsupported language ({LANGUAGE}) expected 'english' or 'dutch'.")
 def main():
     st.markdown(

     MODEL_PATH = "camembert-base"
     ITER_FACTOR = 5
 else:
+    raise NotImplementedError(f"Unsupported language ({LANGUAGE}) expected 'english','dutch' or 'french.")
 def main():
     st.markdown(

rhyme_with_ai/noms-lexique.org.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

rhyme_with_ai/rhyme.py CHANGED Viewed

@@ -6,6 +6,9 @@ import requests
 from gazpacho import Soup, get
 from rhyme_with_ai.utils import find_last_word
 def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> List[str]:
@@ -23,6 +26,10 @@ def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> L
        return query_datamuse_api(last_word, n_rhymes)
     elif language == "dutch":
         return mick_rijmwoordenboek(last_word, n_rhymes)
     else:
         raise NotImplementedError(f"Unsupported language ({language}) expected 'english' or 'dutch'.")

 from gazpacho import Soup, get
 from rhyme_with_ai.utils import find_last_word
+from rhyme_with_ai.utils import find_rhyme_french
+from rhyme_with_ai.utils import extract
+from rhyme_with_ai.utils import mk_dico
 def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> List[str]:
        return query_datamuse_api(last_word, n_rhymes)
     elif language == "dutch":
         return mick_rijmwoordenboek(last_word, n_rhymes)
+    elif language == "french":
+        lexique = extract('noms-lexique.org.txt')
+        dico_3 = mk_dico(lexique, 3)
+        return find_rhyme_french(last_word, dico_3, lexique, n_rhymes)
     else:
         raise NotImplementedError(f"Unsupported language ({language}) expected 'english' or 'dutch'.")

rhyme_with_ai/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import itertools
 import string
 def color_new_words(new: str, old: str, color: str = "#eefa66") -> str:
@@ -46,4 +47,66 @@ def pairwise(iterable):
 def sanitize(s):
     """Remove punctuation from a string."""
-    return s.translate(str.maketrans("", "", string.punctuation))

 import itertools
 import string
+import random
 def color_new_words(new: str, old: str, color: str = "#eefa66") -> str:
 def sanitize(s):
     """Remove punctuation from a string."""
+    return s.translate(str.maketrans("", "", string.punctuation))
+def extract(filename):
+    """Extrait du fichier arguement les deux premiers champs
+    arg : nom du fichier au format tsv
+    return : list de tuples (ortho, phon)
+    """
+    words = []
+    with open(filename, 'r') as f:
+        f.readline() # première ligne
+        for line in f:
+            ortho, phon = line.split('\t')[0:2]
+            words.append((ortho, phon))
+    return words
+def mk_dico(lexique, n):
+    """
+    Construit un dictionnaire de rimes de longueur n
+    à partir d'un lexique phonétisé
+    args : lexique [(ortho, phon)], n int
+    return : dict {rime : [word1, word2, ..]}
+    """
+    dico = {}
+    for item in lexique:
+        if len(item[1]) >= n:
+            rime = item[1][-n:]
+            dico.setdefault(rime, []).append(item[0])
+    return dico
+def ortho2phon(word, words_list):
+    """
+    Trouve un mot (word) dans une liste (words_list)
+    et retourne la forme phonétique correspondante
+    (en cas d'homographe non homophone, retourne le premier trouvé)
+    args : word (str), words_list [(ortho, phon), (.., ..)]
+    return : str, "" si word ne fait pas partie de la liste
+    """
+    for item in words_list:
+        if word == item[0]:
+            return item[1]
+    return ""
+def find_rhyme_french(word, dico, lexique, n=3):
+    """
+    Pour un mot donné, retourne un mot au hasard dont les n
+    derniers phonèmes riment
+    args : word (str), dico (dict) le dictionnaire de rimes,
+        lexique (list) lexique ortho, phon, n (int) le nombre de phonèmes terminaux
+    """
+    # 1 trouver la transcription phonétique
+    phon = ortho2phon(word, lexique)
+    if not phon:
+        return None
+    # 2 extraire de la transcription les 3 derniers phonèmes (ou 2 le cas échéant)
+    # 3 trouver dans le dictionnaire la liste des mots du lexique qui ont la même suite de phonèmes finaux
+    if phon[-n:] not in dico:
+        return None
+    rhymes = dico[phon[-n:]]
+    if word in rhymes:
+        rhymes.remove(word)
+    # 4. piocher un mot au hasard dans la liste
+    rand = random.randint(0, len(rhymes) - 1)
+    return rhymes[rand]