Roger commited on
Commit
a24e4a5
·
1 Parent(s): ef65b12
Files changed (2) hide show
  1. app.py +129 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ import os
5
+ import time
6
+ from scipy.io import wavfile
7
+
8
+ # Explicitly import Bark components
9
+ from bark import generate_audio, SAMPLE_RATE
10
+ from bark.generation import preload_models, load_model, generate_text_semantic
11
+
12
+ class VoiceCloningApp:
13
+ def __init__(self):
14
+ # Create working directory
15
+ self.base_dir = os.path.dirname(os.path.abspath(__file__))
16
+ self.working_dir = os.path.join(self.base_dir, "working_files")
17
+ os.makedirs(self.working_dir, exist_ok=True)
18
+
19
+ # Explicit model loading with error handling
20
+ try:
21
+ print("Attempting to load Bark models...")
22
+ preload_models()
23
+ print("Bark models loaded successfully.")
24
+ except Exception as e:
25
+ print(f"Error loading Bark models: {e}")
26
+ raise RuntimeError(f"Could not load Bark models: {e}")
27
+
28
+ def process_reference_audio(self, audio_data):
29
+ """Simple audio processing"""
30
+ if audio_data is None:
31
+ return "Please provide an audio input"
32
+
33
+ try:
34
+ # Unpack audio data
35
+ sample_rate, audio_array = audio_data
36
+
37
+ # Normalize audio
38
+ audio_array = audio_array / np.max(np.abs(audio_array))
39
+
40
+ # Save reference audio
41
+ filename = f"reference_{int(time.time())}.wav"
42
+ filepath = os.path.join(self.working_dir, filename)
43
+ wavfile.write(filepath, sample_rate, audio_array)
44
+
45
+ return "✅ Audio captured successfully!"
46
+
47
+ except Exception as e:
48
+ return f"Error processing audio: {str(e)}"
49
+
50
+ def generate_speech(self, text):
51
+ """Generate speech using Bark"""
52
+ if not text or not text.strip():
53
+ return None, "Please enter some text to speak"
54
+
55
+ try:
56
+ # Generate audio with explicit error handling
57
+ print(f"Generating speech for text: {text}")
58
+
59
+ # Use alternative generation method
60
+ semantic_tokens = generate_text_semantic(
61
+ text,
62
+ history_prompt=None,
63
+ temp=0.7,
64
+ min_eos_p=0.05,
65
+ )
66
+
67
+ # Generate audio from semantic tokens
68
+ audio_array = generate_audio(
69
+ semantic_tokens,
70
+ history_prompt=None,
71
+ temp=0.7
72
+ )
73
+
74
+ # Save generated audio
75
+ filename = f"generated_speech_{int(time.time())}.wav"
76
+ filepath = os.path.join(self.working_dir, filename)
77
+ wavfile.write(filepath, SAMPLE_RATE, audio_array)
78
+
79
+ return filepath, None
80
+
81
+ except Exception as e:
82
+ print(f"Speech generation error: {e}")
83
+ return None, f"Error generating speech: {str(e)}"
84
+
85
+ def create_interface():
86
+ app = VoiceCloningApp()
87
+
88
+ # Use the most basic Gradio theme to avoid font issues
89
+ with gr.Blocks() as interface:
90
+ gr.Markdown("# 🎙️ Voice Cloning App")
91
+
92
+ with gr.Row():
93
+ with gr.Column():
94
+ gr.Markdown("## 1. Capture Reference Voice")
95
+ reference_audio = gr.Audio(sources=["microphone", "upload"], type="numpy")
96
+ process_btn = gr.Button("Process Reference Voice")
97
+ process_output = gr.Textbox(label="Processing Result")
98
+
99
+ with gr.Column():
100
+ gr.Markdown("## 2. Generate Speech")
101
+ text_input = gr.Textbox(label="Enter Text to Speak")
102
+ generate_btn = gr.Button("Generate Speech")
103
+ audio_output = gr.Audio(label="Generated Speech")
104
+ error_output = gr.Textbox(label="Errors", visible=True)
105
+
106
+ # Bind functions
107
+ process_btn.click(
108
+ fn=app.process_reference_audio,
109
+ inputs=reference_audio,
110
+ outputs=process_output
111
+ )
112
+
113
+ generate_btn.click(
114
+ fn=app.generate_speech,
115
+ inputs=text_input,
116
+ outputs=[audio_output, error_output]
117
+ )
118
+
119
+ return interface
120
+
121
+ if __name__ == "__main__":
122
+ interface = create_interface()
123
+ interface.launch(
124
+ share=False,
125
+ debug=True,
126
+ show_error=True,
127
+ server_name='0.0.0.0',
128
+ server_port=7860
129
+ )
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio>=3.50.2
2
+ numpy
3
+ scipy
4
+ soundfile
5
+ torch
6
+ transformers
7
+ git+https://github.com/suno-ai/bark.git