Tonic commited on
Commit
5a0e49a
1 Parent(s): a543a78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -31
app.py CHANGED
@@ -195,16 +195,20 @@ def process_speech(input_language, audio_input):
195
  return f"{e}"
196
 
197
 
198
- def convert_text_to_speech(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
199
- if not input_text or not source_language or not target_language:
200
- return None, "Invalid input parameters."
 
 
201
 
 
202
  client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
203
 
204
  try:
 
205
  result = client.predict(
206
  "T2ST",
207
- "text",
208
  None,
209
  None,
210
  input_text,
@@ -212,44 +216,31 @@ def convert_text_to_speech(input_text: str, source_language: str, target_languag
212
  target_language,
213
  api_name="/run"
214
  )
215
- except Exception as e:
216
- return None, f"Error during prediction: {str(e)}"
217
 
218
- try:
 
 
 
219
  translated_text = ""
220
  audio_file_path = ""
221
 
 
222
  if result:
223
  for item in result:
224
  if isinstance(item, str):
225
- if item.endswith('.mp3') and not audio_file_path:
226
- audio_file_path = item
 
 
227
  else:
 
228
  translated_text += item + " "
229
- except Exception as e:
230
- return None, f"Error processing result: {str(e)}"
231
 
232
- if not audio_file_path:
233
- return None, "No audio file path found in the result."
234
 
235
- return audio_file_path, translated_text.strip()
236
-
237
-
238
- def process_image(image_input):
239
- # Initialize the Gradio client with the URL of the Gradio server
240
- client = Client("https://adept-fuyu-8b-demo.hf.space/--replicas/pqjvl/")
241
-
242
- # Assuming image_input is a URL path to the image
243
- image_path = image_input
244
-
245
- # Call the predict method of the client
246
- result = client.predict(
247
- image_path, # URL of the image
248
- True, # Additional parameter for the server (e.g., enable detailed captioning)
249
- fn_index=2
250
- )
251
-
252
- return result
253
 
254
 
255
  def query_vectara(text):
 
195
  return f"{e}"
196
 
197
 
198
+ def is_base64(s):
199
+ try:
200
+ return base64.b64encode(base64.b64decode(s)) == s.encode()
201
+ except Exception:
202
+ return False
203
 
204
+ def convert_text_to_speech(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
205
  client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
206
 
207
  try:
208
+ # Make a prediction request to the client
209
  result = client.predict(
210
  "T2ST",
211
+ "text", # Since we are doing text-to-speech
212
  None,
213
  None,
214
  input_text,
 
216
  target_language,
217
  api_name="/run"
218
  )
 
 
219
 
220
+ # Print or log the raw API response for inspection
221
+ print("Raw API Response:", result)
222
+
223
+ # Initialize variables
224
  translated_text = ""
225
  audio_file_path = ""
226
 
227
+ # Process the result
228
  if result:
229
  for item in result:
230
  if isinstance(item, str):
231
+ # Check if the item is a URL pointing to an audio file or a base64 encoded string
232
+ if any(ext in item.lower() for ext in ['.mp3', '.wav', '.ogg']) or is_base64(item):
233
+ if not audio_file_path: # Store only the first audio file path or base64 string
234
+ audio_file_path = item
235
  else:
236
+ # Concatenate the translated text
237
  translated_text += item + " "
 
 
238
 
239
+ return audio_file_path, translated_text.strip()
 
240
 
241
+ except Exception as e:
242
+ print(f"Error in text-to-speech conversion: {str(e)}")
243
+ return None, f"Error in text-to-speech conversion: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
 
246
  def query_vectara(text):