MultiMed

Runtime error

not-lain commited on Nov 7, 2023

Commit

25734b6

1 Parent(s): 8b809c0

implemented the audio function

Files changed (1) hide show

app.py CHANGED Viewed

@@ -228,13 +228,15 @@ def process_and_query(text, image,audio):
             text = process_image_with_openai(image)
         if audio is not None:
-            audio = audio[0].numpy()
-            audio = audio.astype(np.float32)
-            audio = audio / np.max(np.abs(audio))
-            audio = audio * 32768
-            audio = audio.astype(np.int16)
-            audio = audio.tobytes()
-            audio = base64.b64encode(audio).decode('utf-8')
         # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
         vectara_response_json = query_vectara(text)

             text = process_image_with_openai(image)
         if audio is not None:
+            # audio = audio[0].numpy()
+            # audio = audio.astype(np.float32)
+            # audio = audio / np.max(np.abs(audio))
+            # audio = audio * 32768
+            # audio = audio.astype(np.int16)
+            # audio = audio.tobytes()
+            # audio = base64.b64encode(audio).decode('utf-8')
+            text = process_speech(audio)
+            return text
         # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
         vectara_response_json = query_vectara(text)