ygauravyy commited on
Commit
39f7a33
·
verified ·
1 Parent(s): 699a7a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -60
app.py CHANGED
@@ -10,10 +10,16 @@ from api import BaseSpeakerTTS, ToneColorConverter
10
  import langid
11
  import traceback
12
  from dotenv import load_dotenv
 
 
 
13
 
14
  # Load environment variables
15
  load_dotenv()
16
 
 
 
 
17
  # Function to download and extract checkpoints
18
  def download_and_extract_checkpoints():
19
  zip_url = "https://huggingface.co/camenduru/OpenVoice/resolve/main/checkpoints_1226.zip"
@@ -40,10 +46,6 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
40
  if not openai.api_key:
41
  raise ValueError("Please set the OPENAI_API_KEY environment variable.")
42
 
43
- parser = argparse.ArgumentParser()
44
- parser.add_argument("--share", action='store_true', default=False, help="make link public")
45
- args = parser.parse_args()
46
-
47
  # Define paths to checkpoints
48
  en_ckpt_base = 'checkpoints/base_speakers/EN'
49
  zh_ckpt_base = 'checkpoints/base_speakers/ZH'
@@ -82,6 +84,7 @@ except Exception as e:
82
  # Supported languages
83
  supported_languages = ['zh', 'en']
84
 
 
85
  def predict(audio_file_pth, agree):
86
  text_hint = ''
87
  synthesized_audio_path = None
@@ -95,7 +98,7 @@ def predict(audio_file_pth, agree):
95
  if audio_file_pth is not None:
96
  speaker_wav = audio_file_pth
97
  else:
98
- text_hint += "[ERROR] Please record your voice using the Microphone.\n"
99
  return (text_hint, None)
100
 
101
  # Transcribe audio to text using OpenAI Whisper
@@ -121,7 +124,7 @@ def predict(audio_file_pth, agree):
121
  print(f"Detected language: {language_predicted}")
122
 
123
  if language_predicted not in supported_languages:
124
- text_hint += f"[ERROR] The detected language '{language_predicted}' is not supported. Supported languages are: {supported_languages}\n"
125
  return (text_hint, None)
126
 
127
  # Select TTS model based on language
@@ -134,97 +137,77 @@ def predict(audio_file_pth, agree):
134
  language = 'English'
135
  speaker_style = 'default'
136
 
137
- # Generate response using OpenAI GPT-4
138
  # Generate response using OpenAI GPT-4
139
  try:
140
  response = openai.chat.completions.create(
141
  model="gpt-4o-mini",
142
  messages=[
143
- {"role": "system", "content": "You are Mickey Mouse, a friendly and cheerful character who responds to children's queries in a simple and engaging manner. Please keep your response up to 200 characters."},
144
  {"role": "user", "content": input_text}
145
- ],
146
- max_tokens=200,
147
- n=1,
148
- stop=None,
149
- temperature=0.7,
150
  )
151
- # Correctly access the response content
152
- reply_text = response.choices[0].message.content.strip()
153
  print(f"GPT-4 Reply: {reply_text}")
154
  except Exception as e:
155
- text_hint += f"[ERROR] Failed to get response from OpenAI GPT-4: {str(e)}\n"
156
  return (text_hint, None)
157
 
158
  # Synthesize reply text to audio
159
  try:
160
  src_path = os.path.join(output_dir, 'tmp_reply.wav')
161
-
162
  tts_model.tts(reply_text, src_path, speaker=speaker_style, language=language)
163
- print(f"Audio synthesized and saved to {src_path}")
164
 
165
  save_path = os.path.join(output_dir, 'output_reply.wav')
166
-
167
  tone_color_converter.convert(
168
  audio_src_path=src_path,
169
  src_se=en_source_default_se if language == 'English' else zh_source_se,
170
  tgt_se=target_se,
171
- output_path=save_path,
172
- message="@MickeyMouse"
173
  )
174
- print(f"Tone color conversion completed and saved to {save_path}")
175
 
176
- text_hint += "Response generated successfully.\n"
177
  synthesized_audio_path = save_path
178
 
179
  except Exception as e:
180
- text_hint += f"[ERROR] Failed to synthesize audio: {str(e)}\n"
181
- traceback.print_exc()
182
  return (text_hint, None)
183
 
184
  return (text_hint, synthesized_audio_path)
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  with gr.Blocks(analytics_enabled=False) as demo:
187
  gr.Markdown("# Mickey Mouse Voice Assistant")
188
 
189
  with gr.Row():
190
  with gr.Column():
191
- audio_input = gr.Audio(
192
- source="microphone",
193
- type="filepath",
194
- label="Record Your Voice",
195
- info="Click the microphone button to record your voice."
196
- )
197
- tos_checkbox = gr.Checkbox(
198
- label="Agree to Terms & Conditions",
199
- value=False,
200
- info="I agree to the terms of service."
201
- )
202
  submit_button = gr.Button("Send")
203
 
204
  with gr.Column():
205
- info_output = gr.Textbox(
206
- label="Info",
207
- interactive=False,
208
- lines=4,
209
- )
210
- audio_output = gr.Audio(
211
- label="Mickey's Response",
212
- interactive=False,
213
- autoplay=True,
214
- )
215
 
216
- submit_button.click(
217
- predict,
218
- inputs=[audio_input, tos_checkbox],
219
- outputs=[info_output, audio_output]
220
- )
221
 
222
- # Launch the Gradio app
223
- demo.queue()
224
- demo.launch(
225
- server_name="0.0.0.0",
226
- server_port=int(os.environ.get("PORT", 7860)),
227
- debug=True,
228
- show_api=True,
229
- share=False
230
- )
 
10
  import langid
11
  import traceback
12
  from dotenv import load_dotenv
13
+ from fastapi import FastAPI, UploadFile, Form
14
+ from fastapi.responses import JSONResponse
15
+ from gradio.routes import mount_gradio_app
16
 
17
  # Load environment variables
18
  load_dotenv()
19
 
20
+ # Initialize FastAPI app
21
+ app = FastAPI()
22
+
23
  # Function to download and extract checkpoints
24
  def download_and_extract_checkpoints():
25
  zip_url = "https://huggingface.co/camenduru/OpenVoice/resolve/main/checkpoints_1226.zip"
 
46
  if not openai.api_key:
47
  raise ValueError("Please set the OPENAI_API_KEY environment variable.")
48
 
 
 
 
 
49
  # Define paths to checkpoints
50
  en_ckpt_base = 'checkpoints/base_speakers/EN'
51
  zh_ckpt_base = 'checkpoints/base_speakers/ZH'
 
84
  # Supported languages
85
  supported_languages = ['zh', 'en']
86
 
87
+ # Predict function (shared between FastAPI and Gradio)
88
  def predict(audio_file_pth, agree):
89
  text_hint = ''
90
  synthesized_audio_path = None
 
98
  if audio_file_pth is not None:
99
  speaker_wav = audio_file_pth
100
  else:
101
+ text_hint += "[ERROR] Please provide an audio file.\n"
102
  return (text_hint, None)
103
 
104
  # Transcribe audio to text using OpenAI Whisper
 
124
  print(f"Detected language: {language_predicted}")
125
 
126
  if language_predicted not in supported_languages:
127
+ text_hint += f"[ERROR] The detected language '{language_predicted}' is not supported.\n"
128
  return (text_hint, None)
129
 
130
  # Select TTS model based on language
 
137
  language = 'English'
138
  speaker_style = 'default'
139
 
 
140
  # Generate response using OpenAI GPT-4
141
  try:
142
  response = openai.chat.completions.create(
143
  model="gpt-4o-mini",
144
  messages=[
145
+ {"role": "system", "content": "You are Mickey Mouse, a friendly character."},
146
  {"role": "user", "content": input_text}
147
+ ]
 
 
 
 
148
  )
149
+ reply_text = response['choices'][0]['message']['content'].strip()
 
150
  print(f"GPT-4 Reply: {reply_text}")
151
  except Exception as e:
152
+ text_hint += f"[ERROR] GPT-4 response failed: {str(e)}\n"
153
  return (text_hint, None)
154
 
155
  # Synthesize reply text to audio
156
  try:
157
  src_path = os.path.join(output_dir, 'tmp_reply.wav')
 
158
  tts_model.tts(reply_text, src_path, speaker=speaker_style, language=language)
 
159
 
160
  save_path = os.path.join(output_dir, 'output_reply.wav')
 
161
  tone_color_converter.convert(
162
  audio_src_path=src_path,
163
  src_se=en_source_default_se if language == 'English' else zh_source_se,
164
  tgt_se=target_se,
165
+ output_path=save_path
 
166
  )
 
167
 
168
+ text_hint += "Response generated successfully."
169
  synthesized_audio_path = save_path
170
 
171
  except Exception as e:
172
+ text_hint += f"[ERROR] Synthesis failed: {str(e)}\n"
 
173
  return (text_hint, None)
174
 
175
  return (text_hint, synthesized_audio_path)
176
 
177
+
178
+ # FastAPI endpoint for prediction
179
+ @app.post("/predict")
180
+ async def predict_endpoint(file: UploadFile, agree: bool = Form(...)):
181
+ # Save uploaded file
182
+ temp_file_path = f"temp_{file.filename}"
183
+ with open(temp_file_path, "wb") as temp_file:
184
+ temp_file.write(await file.read())
185
+
186
+ # Call predict
187
+ info, audio_path = predict(temp_file_path, agree)
188
+ os.remove(temp_file_path)
189
+
190
+ if audio_path:
191
+ return JSONResponse({"info": info, "audio": audio_path})
192
+ else:
193
+ return JSONResponse({"info": info}, status_code=400)
194
+
195
+
196
+ # Gradio UI
197
  with gr.Blocks(analytics_enabled=False) as demo:
198
  gr.Markdown("# Mickey Mouse Voice Assistant")
199
 
200
  with gr.Row():
201
  with gr.Column():
202
+ audio_input = gr.Audio(source="microphone", type="filepath", label="Record Your Voice")
203
+ tos_checkbox = gr.Checkbox(label="Agree to Terms & Conditions", value=False)
 
 
 
 
 
 
 
 
 
204
  submit_button = gr.Button("Send")
205
 
206
  with gr.Column():
207
+ info_output = gr.Textbox(label="Info", interactive=False, lines=4)
208
+ audio_output = gr.Audio(label="Mickey's Response", interactive=False, autoplay=True)
 
 
 
 
 
 
 
 
209
 
210
+ submit_button.click(predict, inputs=[audio_input, tos_checkbox], outputs=[info_output, audio_output])
 
 
 
 
211
 
212
+ # Mount Gradio app to FastAPI
213
+ mount_gradio_app(app, demo, path="/")