File size: 972 Bytes
be530f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gradio as gr
import torch
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
import soundfile as sf


model = SpeechT5ForTextToSpeech.from_pretrained("Beehzod/speecht5_finetuned_uz_customData2")
processor = SpeechT5Processor.from_pretrained("Beehzod/speecht5_finetuned_uz_customData2")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

speaker_embeddings = torch.zeros((1, 512))

def text_to_speech(text):
  
    inputs = processor(text=text, return_tensors="pt")
    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
    output_path = "output.wav"
    sf.write(output_path, speech.numpy(), 16000)
    return output_path


interface = gr.Interface(
    fn=text_to_speech,
    inputs="text",
    outputs="audio",
    title="Uzbek Text-to-Speech Generator",
    description="Enter Uzbek text and generate speech using the finetuned SpeechT5 model."
)

interface.launch()