DHEIVER commited on
Commit
2870f08
1 Parent(s): 857cad7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -37
app.py CHANGED
@@ -1,46 +1,23 @@
1
- from transformers import pipeline
2
- from datasets import load_dataset
3
- import soundfile as sf
4
- import torch
5
  import gradio as gr
6
- import os
 
7
 
8
- # Text-to-speech pipeline
9
- synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts")
10
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
11
 
12
- def synthesize_speech(text, speaker_id):
13
- try:
14
- if not text.strip():
15
- return "Please enter valid text."
16
 
17
- speaker_embedding = torch.tensor(embeddings_dataset[speaker_id]["xvector"]).unsqueeze(0)
18
- speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
19
-
20
- # Saving as WAV file
21
- wav_file_path = "speech.wav"
22
- sf.write(wav_file_path, speech["audio"], samplerate=speech["sampling_rate"])
23
-
24
- # Converting to MP3
25
- mp3_file_path = "speech.mp3"
26
- os.system(f"ffmpeg -i {wav_file_path} -acodec libmp3lame {mp3_file_path}")
27
-
28
- return mp3_file_path
29
- except Exception as e:
30
- return f"An error occurred: {str(e)}"
31
-
32
-
33
- # Improved Interface
34
  iface = gr.Interface(
35
- fn=synthesize_speech,
36
- inputs=[
37
- gr.Textbox(label="Enter Text", placeholder="Type here to practice English pronunciation..."),
38
- gr.Slider(minimum=0, maximum=len(embeddings_dataset)-1, label="Select Speaker Voice")
 
39
  ],
40
- outputs=gr.Audio(label="Pronunciation Audio", type="filepath"),
41
- title="English Pronunciation Helper",
42
- description="This tool helps you practice English pronunciation. Type any text in English, and hear how it's pronounced. You can also download the audio in MP3 format."
43
  )
44
 
45
-
46
  iface.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from bark import SAMPLE_RATE, generate_audio, preload_models
3
+ from IPython.display import Audio
4
 
5
+ # Preload models if needed
6
+ preload_models()
 
7
 
8
+ def create_audio(text):
9
+ audio_array = generate_audio(text)
10
+ return audio_array, SAMPLE_RATE
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  iface = gr.Interface(
13
+ fn=create_audio,
14
+ inputs=gr.inputs.Textbox(lines=5, placeholder="Digite seu texto aqui..."),
15
+ outputs=[
16
+ gr.outputs.Audio(type="numpy", label="Audio Gerado"),
17
+ gr.outputs.Textbox(label="Taxa de Amostragem")
18
  ],
19
+ title="Gerador de Áudio com IA",
20
+ description="Digite um texto para gerar áudio usando IA."
 
21
  )
22
 
 
23
  iface.launch()