File size: 2,109 Bytes
1144e23
 
 
 
 
 
 
 
 
 
 
 
 
4c33080
082bb30
4c33080
1144e23
 
 
 
 
 
 
 
 
 
082bb30
1144e23
 
082bb30
1144e23
082bb30
 
 
 
 
14c06f6
082bb30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
from transformers import pipeline
from datasets import load_dataset
import soundfile as sf
import torch

# Initialize the text-to-speech pipeline
synthesiser = pipeline("text-to-speech", "umarigan/speecht5_tts_tr_v1.0")

# Load the speaker embedding dataset
embeddings_dataset = load_dataset("umarigan/turkish_voice_dataset_embedded", split="train")

# Define the speech generation function
def generate_speech(text):
    # Use a pre-defined speaker embedding from the dataset
    speaker_embedding = torch.tensor(embeddings_dataset[768]["speaker_embeddings"]).unsqueeze(0)
    speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
    
    # Save the generated audio to a file
    sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
    
    # Return the audio file path to Gradio
    return "speech.wav"

# Define the Gradio interface
inputs = [
    gr.Textbox(label="📝 Enter Text", placeholder="Bir berber bir berbere gel beraber bir berber kuralım demiş", lines=3),
]

outputs = gr.Audio(label="🎤 Generated Speech")

# Additional elements to include information and style
title = "🎙️ Turkish Text-to-Speech with Fine-Tuned TTS Model"
description = """
Welcome to the **Turkish Text-to-Speech** app! 🌟 This model is a fine-tuned version of Microsoft's SpeechT5, trained on a large Turkish dataset with over 20k audio samples. 
It helps generate natural-sounding speech from text input in **Turkish**! 🇹🇷

**Use Cases**: 
- Easily generate **custom speech datasets**.
- Automate **text-to-speech pipelines** for various applications with low cost and efficiency. 💡

Check out the model on [Hugging Face](https://huggingface.co/umarigan/speecht5_tts_tr_v1.0)
"""

footer = """
💻 Connect with me on [X](https://x.com/Umar26338572e) 🐦
"""

# Create the Gradio app interface
gr.Interface(
    fn=generate_speech,
    inputs=inputs,
    outputs=outputs,
    title=title,
    description=description,
    article=footer,
    theme="compact",  # Choose a theme that matches the colorful aesthetic
).launch()