Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
from datasets import load_dataset
|
4 |
+
import soundfile as sf
|
5 |
+
import torch
|
6 |
+
|
7 |
+
# Initialize the text-to-speech pipeline
|
8 |
+
synthesiser = pipeline("text-to-speech", "umarigan/speecht5_tts_tr_v1.0")
|
9 |
+
|
10 |
+
# Load the speaker embedding dataset
|
11 |
+
embeddings_dataset = load_dataset("umarigan/turkish_voice_dataset_embedded", split="train")
|
12 |
+
|
13 |
+
# Define the speech generation function
|
14 |
+
def generate_speech(text, speaker_id):
|
15 |
+
speaker_embedding = torch.tensor(embeddings_dataset[speaker_id]["speaker_embeddings"]).unsqueeze(0)
|
16 |
+
speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
|
17 |
+
|
18 |
+
# Save the generated audio to a file
|
19 |
+
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
|
20 |
+
|
21 |
+
# Return the audio file path to Gradio
|
22 |
+
return "speech.wav"
|
23 |
+
|
24 |
+
# Define the Gradio interface
|
25 |
+
inputs = [
|
26 |
+
gr.Textbox(label="Enter Text", placeholder="Bir berber bir berbere gel beraber bir berber kuralım demiş"),
|
27 |
+
gr.Number(label="Speaker ID", value=736, precision=0)
|
28 |
+
]
|
29 |
+
|
30 |
+
outputs = gr.Audio(label="Generated Speech")
|
31 |
+
|
32 |
+
gr.Interface(fn=generate_speech, inputs=inputs, outputs=outputs, title="Turkish Text-to-Speech").launch()
|