umarigan commited on
Commit
1144e23
·
verified ·
1 Parent(s): 081f0af

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -0
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from datasets import load_dataset
4
+ import soundfile as sf
5
+ import torch
6
+
7
+ # Initialize the text-to-speech pipeline
8
+ synthesiser = pipeline("text-to-speech", "umarigan/speecht5_tts_tr_v1.0")
9
+
10
+ # Load the speaker embedding dataset
11
+ embeddings_dataset = load_dataset("umarigan/turkish_voice_dataset_embedded", split="train")
12
+
13
+ # Define the speech generation function
14
+ def generate_speech(text, speaker_id):
15
+ speaker_embedding = torch.tensor(embeddings_dataset[speaker_id]["speaker_embeddings"]).unsqueeze(0)
16
+ speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
17
+
18
+ # Save the generated audio to a file
19
+ sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
20
+
21
+ # Return the audio file path to Gradio
22
+ return "speech.wav"
23
+
24
+ # Define the Gradio interface
25
+ inputs = [
26
+ gr.Textbox(label="Enter Text", placeholder="Bir berber bir berbere gel beraber bir berber kuralım demiş"),
27
+ gr.Number(label="Speaker ID", value=736, precision=0)
28
+ ]
29
+
30
+ outputs = gr.Audio(label="Generated Speech")
31
+
32
+ gr.Interface(fn=generate_speech, inputs=inputs, outputs=outputs, title="Turkish Text-to-Speech").launch()