Spaces:

ygauravyy
/

nanee-convo

Sleeping

App Files Files Community

ygauravyy commited on Dec 7, 2024

Commit

39f7a33

verified ·

1 Parent(s): 699a7a1

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -60

app.py CHANGED Viewed

@@ -10,10 +10,16 @@ from api import BaseSpeakerTTS, ToneColorConverter
 import langid
 import traceback
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Function to download and extract checkpoints
 def download_and_extract_checkpoints():
     zip_url = "https://huggingface.co/camenduru/OpenVoice/resolve/main/checkpoints_1226.zip"
@@ -40,10 +46,6 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
 if not openai.api_key:
     raise ValueError("Please set the OPENAI_API_KEY environment variable.")
-parser = argparse.ArgumentParser()
-parser.add_argument("--share", action='store_true', default=False, help="make link public")
-args = parser.parse_args()
 # Define paths to checkpoints
 en_ckpt_base = 'checkpoints/base_speakers/EN'
 zh_ckpt_base = 'checkpoints/base_speakers/ZH'
@@ -82,6 +84,7 @@ except Exception as e:
 # Supported languages
 supported_languages = ['zh', 'en']
 def predict(audio_file_pth, agree):
     text_hint = ''
     synthesized_audio_path = None
@@ -95,7 +98,7 @@ def predict(audio_file_pth, agree):
     if audio_file_pth is not None:
         speaker_wav = audio_file_pth
     else:
-        text_hint += "[ERROR] Please record your voice using the Microphone.\n"
         return (text_hint, None)
     # Transcribe audio to text using OpenAI Whisper
@@ -121,7 +124,7 @@ def predict(audio_file_pth, agree):
     print(f"Detected language: {language_predicted}")
     if language_predicted not in supported_languages:
-        text_hint += f"[ERROR] The detected language '{language_predicted}' is not supported. Supported languages are: {supported_languages}\n"
         return (text_hint, None)
     # Select TTS model based on language
@@ -134,97 +137,77 @@ def predict(audio_file_pth, agree):
         language = 'English'
         speaker_style = 'default'
-    # Generate response using OpenAI GPT-4
     # Generate response using OpenAI GPT-4
     try:
         response = openai.chat.completions.create(
             model="gpt-4o-mini",
             messages=[
-                {"role": "system", "content": "You are Mickey Mouse, a friendly and cheerful character who responds to children's queries in a simple and engaging manner. Please keep your response up to 200 characters."},
                 {"role": "user", "content": input_text}
-            ],
-            max_tokens=200,
-            n=1,
-            stop=None,
-            temperature=0.7,
         )
-        # Correctly access the response content
-        reply_text = response.choices[0].message.content.strip()
         print(f"GPT-4 Reply: {reply_text}")
     except Exception as e:
-        text_hint += f"[ERROR] Failed to get response from OpenAI GPT-4: {str(e)}\n"
         return (text_hint, None)
     # Synthesize reply text to audio
     try:
         src_path = os.path.join(output_dir, 'tmp_reply.wav')
         tts_model.tts(reply_text, src_path, speaker=speaker_style, language=language)
-        print(f"Audio synthesized and saved to {src_path}")
         save_path = os.path.join(output_dir, 'output_reply.wav')
         tone_color_converter.convert(
             audio_src_path=src_path,
             src_se=en_source_default_se if language == 'English' else zh_source_se,
             tgt_se=target_se,
-            output_path=save_path,
-            message="@MickeyMouse"
         )
-        print(f"Tone color conversion completed and saved to {save_path}")
-        text_hint += "Response generated successfully.\n"
         synthesized_audio_path = save_path
     except Exception as e:
-        text_hint += f"[ERROR] Failed to synthesize audio: {str(e)}\n"
-        traceback.print_exc()
         return (text_hint, None)
     return (text_hint, synthesized_audio_path)
 with gr.Blocks(analytics_enabled=False) as demo:
     gr.Markdown("# Mickey Mouse Voice Assistant")
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(
-                source="microphone",
-                type="filepath",
-                label="Record Your Voice",
-                info="Click the microphone button to record your voice."
-            )
-            tos_checkbox = gr.Checkbox(
-                label="Agree to Terms & Conditions",
-                value=False,
-                info="I agree to the terms of service."
-            )
             submit_button = gr.Button("Send")
         with gr.Column():
-            info_output = gr.Textbox(
-                label="Info",
-                interactive=False,
-                lines=4,
-            )
-            audio_output = gr.Audio(
-                label="Mickey's Response",
-                interactive=False,
-                autoplay=True,
-            )
-    submit_button.click(
-        predict,
-        inputs=[audio_input, tos_checkbox],
-        outputs=[info_output, audio_output]
-    )
-# Launch the Gradio app
-demo.queue()
-demo.launch(
-    server_name="0.0.0.0",
-    server_port=int(os.environ.get("PORT", 7860)),
-    debug=True,
-    show_api=True,
-    share=False
-)

 import langid
 import traceback
 from dotenv import load_dotenv
+from fastapi import FastAPI, UploadFile, Form
+from fastapi.responses import JSONResponse
+from gradio.routes import mount_gradio_app
 # Load environment variables
 load_dotenv()
+# Initialize FastAPI app
+app = FastAPI()
 # Function to download and extract checkpoints
 def download_and_extract_checkpoints():
     zip_url = "https://huggingface.co/camenduru/OpenVoice/resolve/main/checkpoints_1226.zip"
 if not openai.api_key:
     raise ValueError("Please set the OPENAI_API_KEY environment variable.")
 # Define paths to checkpoints
 en_ckpt_base = 'checkpoints/base_speakers/EN'
 zh_ckpt_base = 'checkpoints/base_speakers/ZH'
 # Supported languages
 supported_languages = ['zh', 'en']
+# Predict function (shared between FastAPI and Gradio)
 def predict(audio_file_pth, agree):
     text_hint = ''
     synthesized_audio_path = None
     if audio_file_pth is not None:
         speaker_wav = audio_file_pth
     else:
+        text_hint += "[ERROR] Please provide an audio file.\n"
         return (text_hint, None)
     # Transcribe audio to text using OpenAI Whisper
     print(f"Detected language: {language_predicted}")
     if language_predicted not in supported_languages:
+        text_hint += f"[ERROR] The detected language '{language_predicted}' is not supported.\n"
         return (text_hint, None)
     # Select TTS model based on language
         language = 'English'
         speaker_style = 'default'
     # Generate response using OpenAI GPT-4
     try:
         response = openai.chat.completions.create(
             model="gpt-4o-mini",
             messages=[
+                {"role": "system", "content": "You are Mickey Mouse, a friendly character."},
                 {"role": "user", "content": input_text}
+            ]
         )
+        reply_text = response['choices'][0]['message']['content'].strip()
         print(f"GPT-4 Reply: {reply_text}")
     except Exception as e:
+        text_hint += f"[ERROR] GPT-4 response failed: {str(e)}\n"
         return (text_hint, None)
     # Synthesize reply text to audio
     try:
         src_path = os.path.join(output_dir, 'tmp_reply.wav')
         tts_model.tts(reply_text, src_path, speaker=speaker_style, language=language)
         save_path = os.path.join(output_dir, 'output_reply.wav')
         tone_color_converter.convert(
             audio_src_path=src_path,
             src_se=en_source_default_se if language == 'English' else zh_source_se,
             tgt_se=target_se,
+            output_path=save_path
         )
+        text_hint += "Response generated successfully."
         synthesized_audio_path = save_path
     except Exception as e:
+        text_hint += f"[ERROR] Synthesis failed: {str(e)}\n"
         return (text_hint, None)
     return (text_hint, synthesized_audio_path)
+# FastAPI endpoint for prediction
+@app.post("/predict")
+async def predict_endpoint(file: UploadFile, agree: bool = Form(...)):
+    # Save uploaded file
+    temp_file_path = f"temp_{file.filename}"
+    with open(temp_file_path, "wb") as temp_file:
+        temp_file.write(await file.read())
+    # Call predict
+    info, audio_path = predict(temp_file_path, agree)
+    os.remove(temp_file_path)
+    if audio_path:
+        return JSONResponse({"info": info, "audio": audio_path})
+    else:
+        return JSONResponse({"info": info}, status_code=400)
+# Gradio UI
 with gr.Blocks(analytics_enabled=False) as demo:
     gr.Markdown("# Mickey Mouse Voice Assistant")
     with gr.Row():
         with gr.Column():
+            audio_input = gr.Audio(source="microphone", type="filepath", label="Record Your Voice")
+            tos_checkbox = gr.Checkbox(label="Agree to Terms & Conditions", value=False)
             submit_button = gr.Button("Send")
         with gr.Column():
+            info_output = gr.Textbox(label="Info", interactive=False, lines=4)
+            audio_output = gr.Audio(label="Mickey's Response", interactive=False, autoplay=True)
+    submit_button.click(predict, inputs=[audio_input, tos_checkbox], outputs=[info_output, audio_output])
+# Mount Gradio app to FastAPI
+mount_gradio_app(app, demo, path="/")