Spaces:
Runtime error
Runtime error
Merge branch 'main' of https://huggingface.co/spaces/TeamTonic/MultiMed
Browse files
app.py
CHANGED
@@ -31,6 +31,11 @@ from lang_list import (
|
|
31 |
LANG_TO_SPKR_ID,
|
32 |
)
|
33 |
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
def process_speech(sound):
|
36 |
"""
|
@@ -221,6 +226,15 @@ def process_and_query(text, image,audio):
|
|
221 |
# If an image is provided, process it with OpenAI and use the response as the text query for Vectara
|
222 |
if image is not None:
|
223 |
text = process_image_with_openai(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
# Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
|
226 |
vectara_response_json = query_vectara(text)
|
|
|
31 |
LANG_TO_SPKR_ID,
|
32 |
)
|
33 |
|
34 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
35 |
+
|
36 |
+
#processor = AutoProcessor.from_pretrained("ylacombe/hf-seamless-m4t-large")
|
37 |
+
#model = SeamlessM4TModel.from_pretrained("ylacombe/hf-seamless-m4t-large").to(device)
|
38 |
+
|
39 |
|
40 |
def process_speech(sound):
|
41 |
"""
|
|
|
226 |
# If an image is provided, process it with OpenAI and use the response as the text query for Vectara
|
227 |
if image is not None:
|
228 |
text = process_image_with_openai(image)
|
229 |
+
|
230 |
+
if audio is not None:
|
231 |
+
audio = audio[0].numpy()
|
232 |
+
audio = audio.astype(np.float32)
|
233 |
+
audio = audio / np.max(np.abs(audio))
|
234 |
+
audio = audio * 32768
|
235 |
+
audio = audio.astype(np.int16)
|
236 |
+
audio = audio.tobytes()
|
237 |
+
audio = base64.b64encode(audio).decode('utf-8')
|
238 |
|
239 |
# Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
|
240 |
vectara_response_json = query_vectara(text)
|