Spaces:

ixxan
/

uyghur-speech-models

Running

Irpan commited on Dec 25, 2024

Commit

c492cbb

1 Parent(s): 448bf1b

asr

Files changed (3) hide show

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ mms_transcribe = gr.Interface(
         gr.Dropdown(
             choices=[model for model in asr.models_info],
             label="Select a Model",
-            value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
             interactive=True
         ),
     ],
@@ -37,7 +37,7 @@ mms_synthesize = gr.Interface(
         gr.Dropdown(
             choices=[model for model in tts.models_info],
             label="Select a Model",
-            value="Meta-MMS",
             interactive=True
         )
     ],

         gr.Dropdown(
             choices=[model for model in asr.models_info],
             label="Select a Model",
+            value="Ixxan-FineTuned-MMS",
             interactive=True
         ),
     ],
         gr.Dropdown(
             choices=[model for model in tts.models_info],
             label="Select a Model",
+            value="Ixxan-FineTuned-MMS",
             interactive=True
         )
     ],

asr.py CHANGED Viewed

@@ -25,19 +25,13 @@ models_info = {
         "ctc_model": True,
         "arabic_script": True
     },
-    "ixxan/whisper-small-thugy20": {
-        "processor": AutoProcessor.from_pretrained("ixxan/whisper-small-thugy20"),
-        "model": AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-thugy20"),
-        "ctc_model": False,
-        "arabic_script": False
-    },
-    "ixxan/whisper-small-uyghur-common-voice": {
         "processor": AutoProcessor.from_pretrained("ixxan/whisper-small-uyghur-common-voice"),
         "model": AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-uyghur-common-voice"),
         "ctc_model": False,
         "arabic_script": False
     },
-    "ixxan/wav2vec2-large-mms-1b-uyghur-latin": {
         "processor": Wav2Vec2Processor.from_pretrained("ixxan/wav2vec2-large-mms-1b-uyghur-latin", target_lang='uig-script_latin'),
         "model": Wav2Vec2ForCTC.from_pretrained("ixxan/wav2vec2-large-mms-1b-uyghur-latin", target_lang='uig-script_latin'),
         "ctc_model": True,

         "ctc_model": True,
         "arabic_script": True
     },
+    "Ixxan-FineTuned-Whisper": {
         "processor": AutoProcessor.from_pretrained("ixxan/whisper-small-uyghur-common-voice"),
         "model": AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-uyghur-common-voice"),
         "ctc_model": False,
         "arabic_script": False
     },
+    "Ixxan-FineTuned-MMS": {
         "processor": Wav2Vec2Processor.from_pretrained("ixxan/wav2vec2-large-mms-1b-uyghur-latin", target_lang='uig-script_latin'),
         "model": Wav2Vec2ForCTC.from_pretrained("ixxan/wav2vec2-large-mms-1b-uyghur-latin", target_lang='uig-script_latin'),
         "ctc_model": True,

tts.py CHANGED Viewed

@@ -10,12 +10,17 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load processor and model
 models_info = {
     "Meta-MMS": {
         "processor": AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic"),
         "model": VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic"),
         "arabic_script": True
     },
-    "IS2AI-TurkicTTS": None
 }
 vocoder_checkpoint="parallelwavegan_male2_checkpoint/checkpoint-400000steps.pkl" ### specify vocoder path

 # Load processor and model
 models_info = {
+    "IS2AI-TurkicTTS": None,
     "Meta-MMS": {
         "processor": AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic"),
         "model": VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic"),
         "arabic_script": True
     },
+    "Ixxan-FineTuned-MMS": {
+        "processor": AutoTokenizer.from_pretrained("ixxan/mms-tts-uig-script_arabic-UQSpeech"),
+        "model": VitsModel.from_pretrained("ixxan/mms-tts-uig-script_arabic-UQSpeech"),
+        "arabic_script": True
+    }
 }
 vocoder_checkpoint="parallelwavegan_male2_checkpoint/checkpoint-400000steps.pkl" ### specify vocoder path