Update app.py
Browse files
app.py
CHANGED
@@ -17,15 +17,9 @@ def download_model():
|
|
17 |
|
18 |
def initialize_models():
|
19 |
"""Initialize the OuteTTS and Faster-Whisper models"""
|
20 |
-
# Download and initialize GGUF model
|
21 |
model_path = download_model()
|
22 |
-
tts_interface = InterfaceGGUF(
|
23 |
-
model_path,
|
24 |
-
n_ctx=2048, # Reduced context size
|
25 |
-
n_batch=512, # Reduced batch size
|
26 |
-
n_threads=4, # Adjust based on CPU
|
27 |
-
verbose=False, # Reduce logging
|
28 |
-
)
|
29 |
|
30 |
# Initialize Whisper
|
31 |
asr_model = WhisperModel("tiny",
|
@@ -55,8 +49,8 @@ def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.
|
|
55 |
gr.Info(f"Using reference text: {reference_text}")
|
56 |
|
57 |
# Limit text lengths to prevent context overflow
|
58 |
-
reference_text = reference_text[:2000] #
|
59 |
-
text_to_speak = text_to_speak[:300] #
|
60 |
|
61 |
# Create speaker from reference audio
|
62 |
speaker = TTS_INTERFACE.create_speaker(
|
@@ -70,7 +64,7 @@ def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.
|
|
70 |
speaker=speaker,
|
71 |
temperature=temperature,
|
72 |
repetition_penalty=repetition_penalty,
|
73 |
-
max_lenght=
|
74 |
)
|
75 |
|
76 |
# Save to temporary file and return path
|
@@ -82,7 +76,6 @@ Reference text: {reference_text[:300]}...
|
|
82 |
|
83 |
except Exception as e:
|
84 |
return None, f"Error: {str(e)}"
|
85 |
-
|
86 |
# Create Gradio interface
|
87 |
with gr.Blocks(title="Voice Cloning with OuteTTS (GGUF)") as demo:
|
88 |
gr.Markdown("# 🎙️ Voice Cloning with OuteTTS (GGUF)")
|
|
|
17 |
|
18 |
def initialize_models():
|
19 |
"""Initialize the OuteTTS and Faster-Whisper models"""
|
20 |
+
# Download and initialize GGUF model
|
21 |
model_path = download_model()
|
22 |
+
tts_interface = InterfaceGGUF(model_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Initialize Whisper
|
25 |
asr_model = WhisperModel("tiny",
|
|
|
49 |
gr.Info(f"Using reference text: {reference_text}")
|
50 |
|
51 |
# Limit text lengths to prevent context overflow
|
52 |
+
reference_text = reference_text[:2000] # Limit reference text
|
53 |
+
text_to_speak = text_to_speak[:300] # Limit output text
|
54 |
|
55 |
# Create speaker from reference audio
|
56 |
speaker = TTS_INTERFACE.create_speaker(
|
|
|
64 |
speaker=speaker,
|
65 |
temperature=temperature,
|
66 |
repetition_penalty=repetition_penalty,
|
67 |
+
max_lenght=2048 # Note: Using original typo from docs ('lenght')
|
68 |
)
|
69 |
|
70 |
# Save to temporary file and return path
|
|
|
76 |
|
77 |
except Exception as e:
|
78 |
return None, f"Error: {str(e)}"
|
|
|
79 |
# Create Gradio interface
|
80 |
with gr.Blocks(title="Voice Cloning with OuteTTS (GGUF)") as demo:
|
81 |
gr.Markdown("# 🎙️ Voice Cloning with OuteTTS (GGUF)")
|