hivecorp commited on
Commit
81b3ec7
·
verified ·
1 Parent(s): 05862c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -32
app.py CHANGED
@@ -1,46 +1,77 @@
1
  import gradio as gr
2
- import requests
3
- import uuid # For generating unique temporary file names
 
 
 
4
 
5
- # Define available speakers (update with real speaker options from the Kokoro-TTS model)
6
- AVAILABLE_SPEAKERS = ["Speaker 1", "Speaker 2", "Speaker 3"]
 
 
7
 
8
- # Function to interact with Kokoro-TTS API and generate speech
 
 
 
9
  def generate_tts(text, speaker):
10
- # Kokoro-TTS Space endpoint
11
- url = "https://hexgrad-kokoro-tts.hf.space/api/predict"
12
- payload = {
13
- "data": [text, speaker] # Send text and speaker selection to API
14
- }
15
- response = requests.post(url, json=payload)
16
-
17
- if response.status_code == 200:
18
- output = response.json()
19
- audio_data = output["data"][0] # Get the generated audio binary
20
- temp_filename = f"output_{uuid.uuid4().hex}.wav" # Generate a unique temporary name
21
- with open(temp_filename, "wb") as f:
22
- f.write(audio_data.encode('latin1')) # Decode and save the binary data as a WAV file
23
- return temp_filename, temp_filename
 
 
 
 
 
 
 
 
24
  else:
25
- return None, "Error: Unable to generate TTS"
 
 
 
 
26
 
27
- # Gradio Interface
28
- with gr.Blocks() as app:
29
- gr.Markdown("## Long Text-to-Speech Generator with Kokoro-TTS")
30
 
31
  with gr.Row():
32
- input_text = gr.Textbox(label="Enter your text", placeholder="Type or paste your text here...", lines=10)
33
- speaker_dropdown = gr.Dropdown(choices=AVAILABLE_SPEAKERS, label="Select Speaker")
34
 
35
- with gr.Row():
36
- generate_button = gr.Button("Generate Speech")
37
 
38
  with gr.Row():
39
- audio_output = gr.Audio(label="Generated Speech", type="file", interactive=False)
40
- download_button = gr.File(label="Download Audio", file_types=[".wav", ".mp3"])
 
 
 
 
 
 
 
 
 
 
41
 
42
- # Link the generate button to the TTS generation function
43
- generate_button.click(fn=generate_tts, inputs=[input_text, speaker_dropdown], outputs=[audio_output, download_button])
 
 
44
 
45
  # Launch the app
46
- app.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
4
+ import soundfile as sf
5
+ import os
6
+ import time
7
 
8
+ # Load the Kokoro-TTS model and processor
9
+ model_name = "hexgrad/Kokoro-TTS"
10
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name)
11
+ processor = AutoProcessor.from_pretrained(model_name)
12
 
13
+ # Define available speakers (update this based on the model's capabilities)
14
+ speakers = ["Speaker 1", "Speaker 2", "Speaker 3"] # Replace with actual speaker names
15
+
16
+ # Function to generate TTS
17
  def generate_tts(text, speaker):
18
+ try:
19
+ # Preprocess input text
20
+ inputs = processor(text, return_tensors="pt", speaker=speaker)
21
+
22
+ # Generate speech
23
+ with torch.no_grad():
24
+ speech = model.generate(**inputs)
25
+
26
+ # Save the output as a temporary file with an auto-generated name
27
+ timestamp = int(time.time())
28
+ output_file = f"output_{timestamp}.wav"
29
+ sf.write(output_file, speech.numpy(), samplerate=22050) # Adjust samplerate if needed
30
+
31
+ return output_file
32
+ except Exception as e:
33
+ return str(e)
34
+
35
+ # Gradio interface
36
+ def tts_app(text, speaker):
37
+ output_file = generate_tts(text, speaker)
38
+ if output_file.endswith(".wav"):
39
+ return output_file, f"Generated: {output_file}"
40
  else:
41
+ return None, output_file
42
+
43
+ # Auto-naming system for downloads
44
+ def get_download_name():
45
+ return f"tts_output_{int(time.time())}.wav"
46
 
47
+ # Create the Gradio app
48
+ with gr.Blocks() as demo:
49
+ gr.Markdown("# Kokoro-TTS v1.9: Long Input TTS Generation")
50
 
51
  with gr.Row():
52
+ text_input = gr.Textbox(label="Input Text", placeholder="Enter your text here...", lines=10)
53
+ speaker_dropdown = gr.Dropdown(label="Select Speaker", choices=speakers, value=speakers[0])
54
 
55
+ generate_button = gr.Button("Generate TTS")
 
56
 
57
  with gr.Row():
58
+ audio_output = gr.Audio(label="Generated Audio")
59
+ status_output = gr.Textbox(label="Status", placeholder="Generation status will appear here...")
60
+
61
+ download_button = gr.Button("Download Audio")
62
+ download_output = gr.File(label="Download Generated Audio")
63
+
64
+ # Link functions to interface
65
+ generate_button.click(
66
+ fn=tts_app,
67
+ inputs=[text_input, speaker_dropdown],
68
+ outputs=[audio_output, status_output]
69
+ )
70
 
71
+ download_button.click(
72
+ fn=get_download_name,
73
+ outputs=download_output
74
+ )
75
 
76
  # Launch the app
77
+ demo.launch()