ZabanZad_PoC

Sleeping

App Files Files Community

barghavani commited on Dec 7, 2023

Commit

2098a12

1 Parent(s): c07d473

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -252

app.py CHANGED Viewed

@@ -1,271 +1,134 @@
-import sys
-import os
-# By using XTTS you agree to CPML license https://coqui.ai/cpml
-os.environ["COQUI_TOS_AGREED"] = "1"
-import gradio as gr
-from TTS.api import TTS
-model_names = TTS().list_models()
-m = model_names[0]
-print(model_names)
-tts = TTS(m, gpu=False)
-tts.to("cpu") # no GPU or Amd
-#tts.to("cuda") # cuda only
-def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
-    if agree == True:
-        if language == "fa":
-            tts_init("saillab/xtts_v2_fa_revision1")
-        else:
-            tts_init(m)
-        if use_mic == True:
-            if mic_file_path is not None:
-                speaker_wav=mic_file_path
-            else:
-                gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
-                return (
-                    None,
-                    None,
-                )
-        else:
-            speaker_wav=audio_file_pth
-        if len(prompt)<2:
-            gr.Warning("Please give a longer prompt text")
-            return (
-                    None,
-                    None,
-                )
-        if len(prompt)>10000:
-            gr.Warning("Text length limited to 10000 characters for this demo, please try shorter text")
-            return (
-                    None,
-                    None,
-                )
-        try:
-            if language == "fr":
-                if m.find("your") != -1:
-                    language = "fr-fr"
-            if m.find("/fr/") != -1:
-                language = None
-            tts.tts_to_file(
-                text=prompt,
-                file_path="output.wav",
-                speaker_wav=speaker_wav,
-                language=language
-            )
-        except RuntimeError as e :
-            if "device-assert" in str(e):
-                # cannot do anything on cuda device side error, need tor estart
-                gr.Warning("Unhandled Exception encounter, please retry in a minute")
-                print("Cuda device-assert Runtime encountered need restart")
-                sys.exit("Exit due to cuda device-assert")
-            else:
-                raise e
-        return (
-            gr.make_waveform(
-                audio="output.wav",
-            ),
-            "output.wav",
-        )
-    else:
-        gr.Warning("Please accept the Terms & Condition!")
-        return (
-                None,
-                None,
-            )
-title = "XTTS Glz's remake (Fonctional Text-2-Speech)"
-description = """
-<a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
-<br/>
-XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
-<br/>
-This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
-<br/>
-Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
-<br/>
-<p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
-<br/>
-<a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
-<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
-</p>
-"""
-article = """
-<div style='margin:20px auto;'>
-<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
-</div>
-"""
-examples = [
-    [
-        "Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
-        "en",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "اگر بدنبال یادگیری فارسی هستید و در زبان زاد ��ا را دنبال کنید",
-        "fa",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Je suis un lycéen français de 17 ans, passioner par la Cyber-Sécuritée et les models d'IA.",
-        "fr",
-        "examples/male.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Als ich sechs war, sah ich einmal ein wunderbares Bild",
-        "de",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Cuando tenía seis años, vi una vez una imagen magnífica",
-        "es",
-        "examples/male.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
-        "pt",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
-        "pl",
-        "examples/male.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
-        "it",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
-        "tr",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Когда мне было шесть лет, я увидел однажды удивительную картинку",
-        "ru",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
-        "nl",
-        "examples/male.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
-        "cs",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
-    [
-        "当我还只有六岁的时候， 看到了一副精彩的插画",
-        "zh-cn",
-        "examples/female.wav",
-        None,
-        False,
-        True,
-    ],
 ]
-gr.Interface(
-    fn=predict,
     inputs=[
         gr.Textbox(
-            label="Text Prompt",
-            info="One or two sentences at a time is better",
-            value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
-        ),
-        gr.Dropdown(
-            label="Language",
-            info="Select an output language for the synthesised speech",
-            choices=[
-                "en",
-                "es",
-                "fr",
-                "de",
-                "it",
-                "pt",
-                "pl",
-                "tr",
-                "ru",
-                "nl",
-                "cs",
-                "ar",
-                "zh-cn",
-                "fa",
-            ],
-            max_choices=1,
-            value="en",
         ),
-        gr.Audio(
-            label="Reference Audio",
-            info="Click on the ✎ button to upload your own target speaker audio",
-            type="filepath",
-            value="examples/female.wav",
         ),
-        gr.Audio(source="microphone",
-                 type="filepath",
-                 info="Use your microphone to record audio",
-                 label="Use Microphone for Reference"),
-        gr.Checkbox(label="Check to use Microphone as Reference",
-                    value=False,
-                    info="Notice: Microphone input may not work properly under traffic",),
-        gr.Checkbox(
-            label="Agree",
-            value=True,
-            info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
-        ),
-    ],
-    outputs=[
-        gr.Video(label="Waveform Visual"),
-        gr.Audio(label="Synthesised Audio"),
     ],
-    title=title,
     description=description,
     article=article,
-    examples=examples,
-).queue().launch(debug=True)

+Hugging Face's logo
+Hugging Face
+Search models, datasets, users...
+Models
+Datasets
+Spaces
+Docs
+Solutions
+Pricing
+Spaces:
+Kamtera
+/
+Persian-tts-CoquiTTS
+like
+13
+App
+Files
+Community
+Persian-tts-CoquiTTS
+/
+app.py
+Kamtera's picture
+Kamtera
+Upload app.py with huggingface_hub
+b9d7413
+3 months ago
+raw
+history
+blame
+contribute
+delete
+No virus
+5.05 kB
+import tempfile ,os
+from TTS.config import load_config
+import gradio as gr
+from TTS.utils.manage import ModelManager
+from TTS.utils.synthesizer import Synthesizer
+MODEL_NAMES=[
+    "xtts-farsi",
+]
+MAX_TXT_LEN = 800
+model_path = os.getcwd() + "/best_model.pth"
+config_path = os.getcwd() + "/config.json"
+from TTS.utils.download import download_url
+modelInfo=[
+    ["xtts-farsi","best_model_31680.pth","config.json","https://huggingface.co/saillab/xtts_v2_fa_revision1"],
 ]
+for d in modelInfo:
+    directory=d[0]
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    print("|> Downloading: ",directory)
+    download_url(
+        d[3]+d[1],directory,"best_model.pth"
+    )
+    download_url(
+        d[3]+d[2],directory,"config.json"
+    )
+def tts(text: str,model_name: str):
+    if len(text) > MAX_TXT_LEN:
+        text = text[:MAX_TXT_LEN]
+        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
+    print(text)
+    # synthesize
+    synthesizer = Synthesizer(
+        model_name+"/best_model.pth", model_name+"/config.json"
+    )
+    if synthesizer is None:
+        raise NameError("model not found")
+    wavs = synthesizer.tts(text)
+    # return output
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        synthesizer.save_wav(wavs, fp)
+        return fp.name
+description="""
+This is a demo of persian text to speech model.
+**Github : https://github.com/karim23657/Persian-tts-coqui  **
+Models can be found here:  <br>
+|Model|Dataset|
+|----|------|
+|[vits female (best)](https://huggingface.co/Kamtera/persian-tts-female-vits)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
+|[vits male1 (best)](https://huggingface.co/Kamtera/persian-tts-male1-vits)|[persian-tts-dataset-male](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-male)|
+|[vits female1](https://huggingface.co/Kamtera/persian-tts-female1-vits)|[ParsiGoo](https://github.com/karim23657/ParsiGoo)|
+|[vits male](https://huggingface.co/Kamtera/persian-tts-male-vits)|[persian-tts-dataset](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset)|
+|[glowtts female](https://huggingface.co/Kamtera/persian-tts-female-glow_tts)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
+|[glowtts male](https://huggingface.co/Kamtera/persian-tts-male-glow_tts)|[persian-tts-dataset](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset)|
+|[tacotron2 female](https://huggingface.co/Kamtera/persian-tts-female-tacotron2)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
+"""
+article= ""
+examples=[
+    ["و خداوند شما را با ارسال روح در جسم زندگانی و حیات بخشید","xtts-farsi"],
+    ["تاجر تو چه تجارت می کنی ، تو را چه که چه تجارت می کنم؟","xtts-farsi"],
+]
+iface = gr.Interface(
+    fn=tts,
     inputs=[
         gr.Textbox(
+            label="Text",
+            value="زندگی فقط یک بار است؛ از آن به خوبی استفاده کن",
         ),
+        gr.Radio(
+            label="Pick a TTS Model ",
+            choices=MODEL_NAMES,
+            value="xtts-farsi",
         ),
     ],
+    outputs=gr.Audio(label="Output",type='filepath'),
+    examples=examples,
+    title="🗣️ Persian tts 🗣️",
     description=description,
     article=article,
+    live=False
+)
+iface.launch(share=False)