Tonic commited on
Commit
f123231
·
1 Parent(s): 4ec9fa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -43
app.py CHANGED
@@ -124,7 +124,7 @@ def process_speech(input_language, audio_input):
124
 
125
  def convert_text_to_speech(input_text, source_language, target_language):
126
  """
127
- Convert text to speech in the specified language and return the new audio file path.
128
  """
129
  client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
130
 
@@ -139,46 +139,35 @@ def convert_text_to_speech(input_text, source_language, target_language):
139
  api_name="/run" # API name
140
  )
141
 
142
- # Assuming the audio file path is returned in the result
143
- original_audio_file = result[1] if len(result) > 1 else None
 
 
144
 
145
- if original_audio_file:
146
- # Generate a new file name with a random UUID
147
- new_file_name = f"audio_output_{uuid.uuid4()}.wav"
148
- new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
 
149
 
150
- # Rename the file
151
- os.rename(original_audio_file, new_file_path)
 
 
152
 
153
- return new_file_path
 
 
 
 
 
154
  else:
155
- return "No audio file generated."
156
 
157
  except Exception as e:
158
  # Return a concise error message
159
- return f"Error in text-to-speech conversion: {str(e)}"
160
-
161
- def save_image(image_input, output_dir="saved_images"):
162
- if not os.path.exists(output_dir):
163
- os.makedirs(output_dir)
164
 
165
- # Generate a unique file name
166
- file_name = f"image_{int(time.time())}.png"
167
- file_path = os.path.join(output_dir, file_name)
168
-
169
- # Check the type of image_input and handle accordingly
170
- if isinstance(image_input, np.ndarray): # If image_input is a NumPy array
171
- Image.fromarray(image_input).save(file_path)
172
- elif isinstance(image_input, Image.Image): # If image_input is a PIL image
173
- image_input.save(file_path)
174
- elif isinstance(image_input, str) and image_input.startswith('data:image'): # If image_input is a base64 string
175
- image_data = base64.b64decode(image_input.split(',')[1])
176
- with open(file_path, 'wb') as f:
177
- f.write(image_data)
178
- else:
179
- raise ValueError("Unsupported image format")
180
-
181
- return file_path
182
 
183
  def process_image(image_input):
184
  # Initialize the Gradio client with the URL of the Gradio server
@@ -365,9 +354,9 @@ def process_summary_with_stablemed(summary):
365
  response_text = bot.predict(summary, system_prompt)
366
  return response_text
367
 
 
368
  # Main function to handle the Gradio interface logic
369
 
370
-
371
  def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
372
  try:
373
 
@@ -414,10 +403,6 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
414
  summary = vectara_response.get('summary', 'No summary available')
415
  sources_info = vectara_response.get('sources', [])
416
 
417
-
418
-
419
-
420
-
421
  # Format Vectara response in Markdown
422
  markdown_output = "### Vectara Response Summary\n"
423
  markdown_output += f"* **Summary**: {summary}\n"
@@ -448,8 +433,6 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
448
  except Exception as e:
449
  return f"Error occurred during processing: {e}. No hallucination evaluation.", None
450
 
451
-
452
-
453
  welcome_message = """
454
  # 👋🏻Welcome to ⚕🗣️😷MultiMed - Access Chat ⚕🗣️😷
455
 
@@ -572,13 +555,10 @@ languages = [
572
  "Zulu"
573
  ]
574
 
575
-
576
  def clear():
577
  # Return default values for each component
578
  return "English", None, None, "", None
579
 
580
-
581
-
582
  def create_interface():
583
  with gr.Blocks(theme='ParityError/Anime') as iface:
584
  # Display the welcome message
 
124
 
125
  def convert_text_to_speech(input_text, source_language, target_language):
126
  """
127
+ Convert text to speech in the specified language and return the audio file path and translated text.
128
  """
129
  client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
130
 
 
139
  api_name="/run" # API name
140
  )
141
 
142
+ # Check if result contains files and select the first one
143
+ if isinstance(result, list) and len(result) > 1:
144
+ # Select the first audio file from the result
145
+ original_audio_file = result[1] # Assuming the first element is the audio file
146
 
147
+ # Iterate over the result to find the last text item
148
+ translated_text = ""
149
+ for item in result:
150
+ if isinstance(item, str):
151
+ translated_text = item
152
 
153
+ if original_audio_file:
154
+ # Generate a new file name with a random UUID
155
+ new_file_name = f"audio_output_{uuid.uuid4()}.wav"
156
+ new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
157
 
158
+ # Rename the file
159
+ os.rename(original_audio_file, new_file_path)
160
+
161
+ return new_file_path, translated_text
162
+ else:
163
+ return "No audio file generated.", translated_text
164
  else:
165
+ return "Unexpected result format or insufficient data received.", ""
166
 
167
  except Exception as e:
168
  # Return a concise error message
169
+ return f"Error in text-to-speech conversion: {str(e)}", ""
 
 
 
 
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  def process_image(image_input):
173
  # Initialize the Gradio client with the URL of the Gradio server
 
354
  response_text = bot.predict(summary, system_prompt)
355
  return response_text
356
 
357
+
358
  # Main function to handle the Gradio interface logic
359
 
 
360
  def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
361
  try:
362
 
 
403
  summary = vectara_response.get('summary', 'No summary available')
404
  sources_info = vectara_response.get('sources', [])
405
 
 
 
 
 
406
  # Format Vectara response in Markdown
407
  markdown_output = "### Vectara Response Summary\n"
408
  markdown_output += f"* **Summary**: {summary}\n"
 
433
  except Exception as e:
434
  return f"Error occurred during processing: {e}. No hallucination evaluation.", None
435
 
 
 
436
  welcome_message = """
437
  # 👋🏻Welcome to ⚕🗣️😷MultiMed - Access Chat ⚕🗣️😷
438
 
 
555
  "Zulu"
556
  ]
557
 
 
558
  def clear():
559
  # Return default values for each component
560
  return "English", None, None, "", None
561
 
 
 
562
  def create_interface():
563
  with gr.Blocks(theme='ParityError/Anime') as iface:
564
  # Display the welcome message