import gradio as gr from transformers import pipeline from datasets import load_dataset import soundfile as sf import torch # Initialize the text-to-speech pipeline synthesiser = pipeline("text-to-speech", "umarigan/speecht5_tts_tr_v1.0") # Load the speaker embedding dataset embeddings_dataset = load_dataset("umarigan/turkish_voice_dataset_embedded", split="train") # Define the speech generation function def generate_speech(text): # Use a pre-defined speaker embedding from the dataset speaker_embedding = torch.tensor(embeddings_dataset[768]["speaker_embeddings"]).unsqueeze(0) speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding}) # Save the generated audio to a file sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"]) # Return the audio file path to Gradio return "speech.wav" # Define the Gradio interface inputs = [ gr.Textbox(label="๐Ÿ“ Enter Text", placeholder="Bir berber bir berbere gel beraber bir berber kuralฤฑm demiลŸ", lines=3), ] outputs = gr.Audio(label="๐ŸŽค Generated Speech") # Additional elements to include information and style title = "๐ŸŽ™๏ธ Turkish Text-to-Speech with Fine-Tuned TTS Model" description = """ Welcome to the **Turkish Text-to-Speech** app! ๐ŸŒŸ This model is a fine-tuned version of Microsoft's SpeechT5, trained on a large Turkish dataset with over 20k audio samples. It helps generate natural-sounding speech from text input in **Turkish**! ๐Ÿ‡น๐Ÿ‡ท **Use Cases**: - Easily generate **custom speech datasets**. - Automate **text-to-speech pipelines** for various applications with low cost and efficiency. ๐Ÿ’ก Check out the model on [Hugging Face](https://huggingface.co/umarigan/speecht5_tts_tr_v1.0) """ footer = """ ๐Ÿ’ป Connect with me on [X](https://x.com/Umar26338572e) ๐Ÿฆ """ # Create the Gradio app interface gr.Interface( fn=generate_speech, inputs=inputs, outputs=outputs, title=title, description=description, article=footer, theme="compact", # Choose a theme that matches the colorful aesthetic ).launch()