gauravgulati619 commited on
Commit
9fd7b07
·
verified ·
1 Parent(s): 5883d20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -1,20 +1,22 @@
1
  import os
2
  import gradio as gr
3
- import soundfile as sf
 
4
  from brain import encode_image, analyze_image_with_query
5
- from patientvoice import transcribe_with_groq
6
- from doctorvoice import text_to_speech_with_elevenlabs
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
10
  load_dotenv()
 
11
  system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
12
  What's in this image?. Do you find anything wrong with it medically?
13
  If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
14
  your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
15
  Donot say 'In the image I see' but say 'With what I see, I think you have ....'
16
  Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
17
- Keep your answer concise (max 5 sentences). No preamble, start your answer right away please"""
18
 
19
  def process_inputs(audio_data, image_filepath):
20
  # Handle audio input from microphone
@@ -38,7 +40,7 @@ def process_inputs(audio_data, image_filepath):
38
  else:
39
  speech_to_text_output = "No audio provided"
40
 
41
- # Process image input with error handling
42
  if image_filepath:
43
  try:
44
  doctor_response = analyze_image_with_query(
@@ -53,17 +55,14 @@ def process_inputs(audio_data, image_filepath):
53
 
54
  # Generate doctor's voice with error handling
55
  try:
56
- voice_of_doctor = text_to_speech_with_elevenlabs(
57
- input_text=doctor_response,
58
- output_filepath="final.mp3"
59
- )
60
  except Exception as e:
61
  voice_of_doctor = None
62
  print(f"Error in text-to-speech: {str(e)}")
63
 
64
  return speech_to_text_output, doctor_response, voice_of_doctor
65
 
66
- # Define Gradio interface
67
  iface = gr.Interface(
68
  fn=process_inputs,
69
  inputs=[
@@ -75,9 +74,8 @@ iface = gr.Interface(
75
  gr.Textbox(label="Doctor's Response"),
76
  gr.Audio(label="Doctor's Voice")
77
  ],
78
- title="MediVox: AI Doctor with Vision and Voice",
79
  css=".gradio-container {text-align: center;}"
80
  )
81
 
82
- # Launch the interface
83
  iface.launch()
 
1
  import os
2
  import gradio as gr
3
+ import soundfile as sf # For audio handling
4
+
5
  from brain import encode_image, analyze_image_with_query
6
+ from patientvoice import record_audio, transcribe_with_groq
7
+ from doctorvoice import text_to_speech_with_gtts
8
  from dotenv import load_dotenv
9
 
10
  # Load environment variables
11
  load_dotenv()
12
+
13
  system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
14
  What's in this image?. Do you find anything wrong with it medically?
15
  If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
16
  your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
17
  Donot say 'In the image I see' but say 'With what I see, I think you have ....'
18
  Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
19
+ Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
20
 
21
  def process_inputs(audio_data, image_filepath):
22
  # Handle audio input from microphone
 
40
  else:
41
  speech_to_text_output = "No audio provided"
42
 
43
+ # Handle the image input with error handling
44
  if image_filepath:
45
  try:
46
  doctor_response = analyze_image_with_query(
 
55
 
56
  # Generate doctor's voice with error handling
57
  try:
58
+ voice_of_doctor = text_to_speech_with_gtts(input_text=doctor_response, output_filepath="final.mp3")
 
 
 
59
  except Exception as e:
60
  voice_of_doctor = None
61
  print(f"Error in text-to-speech: {str(e)}")
62
 
63
  return speech_to_text_output, doctor_response, voice_of_doctor
64
 
65
+ # Create the interface
66
  iface = gr.Interface(
67
  fn=process_inputs,
68
  inputs=[
 
74
  gr.Textbox(label="Doctor's Response"),
75
  gr.Audio(label="Doctor's Voice")
76
  ],
77
+ title="MediVox : AI Doctor with Vision and Voice",
78
  css=".gradio-container {text-align: center;}"
79
  )
80
 
 
81
  iface.launch()