MultiMed

Runtime error

not-lain commited on Nov 7, 2023

Commit

8b809c0

2 Parent(s): 7dc22ca af23861

Merge branch 'main' of https://huggingface.co/spaces/TeamTonic/MultiMed

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,6 +31,11 @@ from lang_list import (
     LANG_TO_SPKR_ID,
 )
 def process_speech(sound):
     """
@@ -221,6 +226,15 @@ def process_and_query(text, image,audio):
         # If an image is provided, process it with OpenAI and use the response as the text query for Vectara
         if image is not None:
             text = process_image_with_openai(image)
         # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
         vectara_response_json = query_vectara(text)

     LANG_TO_SPKR_ID,
 )
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+#processor = AutoProcessor.from_pretrained("ylacombe/hf-seamless-m4t-large")
+#model = SeamlessM4TModel.from_pretrained("ylacombe/hf-seamless-m4t-large").to(device)
 def process_speech(sound):
     """
         # If an image is provided, process it with OpenAI and use the response as the text query for Vectara
         if image is not None:
             text = process_image_with_openai(image)
+        if audio is not None:
+            audio = audio[0].numpy()
+            audio = audio.astype(np.float32)
+            audio = audio / np.max(np.abs(audio))
+            audio = audio * 32768
+            audio = audio.astype(np.int16)
+            audio = audio.tobytes()
+            audio = base64.b64encode(audio).decode('utf-8')
         # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
         vectara_response_json = query_vectara(text)