Spaces:

hivecorp
/

orb-audio

Runtime error

App Files Files Community

hivecorp commited on Jan 4

Commit

81b3ec7

verified ·

1 Parent(s): 05862c2

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -32

app.py CHANGED Viewed

@@ -1,46 +1,77 @@
 import gradio as gr
-import requests
-import uuid  # For generating unique temporary file names
-# Define available speakers (update with real speaker options from the Kokoro-TTS model)
-AVAILABLE_SPEAKERS = ["Speaker 1", "Speaker 2", "Speaker 3"]
-# Function to interact with Kokoro-TTS API and generate speech
 def generate_tts(text, speaker):
-    # Kokoro-TTS Space endpoint
-    url = "https://hexgrad-kokoro-tts.hf.space/api/predict"
-    payload = {
-        "data": [text, speaker]  # Send text and speaker selection to API
-    }
-    response = requests.post(url, json=payload)
-    if response.status_code == 200:
-        output = response.json()
-        audio_data = output["data"][0]  # Get the generated audio binary
-        temp_filename = f"output_{uuid.uuid4().hex}.wav"  # Generate a unique temporary name
-        with open(temp_filename, "wb") as f:
-            f.write(audio_data.encode('latin1'))  # Decode and save the binary data as a WAV file
-        return temp_filename, temp_filename
     else:
-        return None, "Error: Unable to generate TTS"
-# Gradio Interface
-with gr.Blocks() as app:
-    gr.Markdown("## Long Text-to-Speech Generator with Kokoro-TTS")
     with gr.Row():
-        input_text = gr.Textbox(label="Enter your text", placeholder="Type or paste your text here...", lines=10)
-        speaker_dropdown = gr.Dropdown(choices=AVAILABLE_SPEAKERS, label="Select Speaker")
-    with gr.Row():
-        generate_button = gr.Button("Generate Speech")
     with gr.Row():
-        audio_output = gr.Audio(label="Generated Speech", type="file", interactive=False)
-        download_button = gr.File(label="Download Audio", file_types=[".wav", ".mp3"])
-    # Link the generate button to the TTS generation function
-    generate_button.click(fn=generate_tts, inputs=[input_text, speaker_dropdown], outputs=[audio_output, download_button])
 # Launch the app
-app.launch()

 import gradio as gr
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
+import soundfile as sf
+import os
+import time
+# Load the Kokoro-TTS model and processor
+model_name = "hexgrad/Kokoro-TTS"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name)
+processor = AutoProcessor.from_pretrained(model_name)
+# Define available speakers (update this based on the model's capabilities)
+speakers = ["Speaker 1", "Speaker 2", "Speaker 3"]  # Replace with actual speaker names
+# Function to generate TTS
 def generate_tts(text, speaker):
+    try:
+        # Preprocess input text
+        inputs = processor(text, return_tensors="pt", speaker=speaker)
+        # Generate speech
+        with torch.no_grad():
+            speech = model.generate(**inputs)
+        # Save the output as a temporary file with an auto-generated name
+        timestamp = int(time.time())
+        output_file = f"output_{timestamp}.wav"
+        sf.write(output_file, speech.numpy(), samplerate=22050)  # Adjust samplerate if needed
+        return output_file
+    except Exception as e:
+        return str(e)
+# Gradio interface
+def tts_app(text, speaker):
+    output_file = generate_tts(text, speaker)
+    if output_file.endswith(".wav"):
+        return output_file, f"Generated: {output_file}"
     else:
+        return None, output_file
+# Auto-naming system for downloads
+def get_download_name():
+    return f"tts_output_{int(time.time())}.wav"
+# Create the Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown("# Kokoro-TTS v1.9: Long Input TTS Generation")
     with gr.Row():
+        text_input = gr.Textbox(label="Input Text", placeholder="Enter your text here...", lines=10)
+        speaker_dropdown = gr.Dropdown(label="Select Speaker", choices=speakers, value=speakers[0])
+    generate_button = gr.Button("Generate TTS")
     with gr.Row():
+        audio_output = gr.Audio(label="Generated Audio")
+        status_output = gr.Textbox(label="Status", placeholder="Generation status will appear here...")
+    download_button = gr.Button("Download Audio")
+    download_output = gr.File(label="Download Generated Audio")
+    # Link functions to interface
+    generate_button.click(
+        fn=tts_app,
+        inputs=[text_input, speaker_dropdown],
+        outputs=[audio_output, status_output]
+    )
+    download_button.click(
+        fn=get_download_name,
+        outputs=download_output
+    )
 # Launch the app
+demo.launch()