Spaces:
Sleeping
Sleeping
File size: 5,279 Bytes
d4a5e8c 97cc5f3 d4a5e8c d0a28d9 9d03774 867343a 68f0d8d 94023b9 d4a5e8c 8b2c674 c191795 9d03774 68f0d8d 9338b19 282a1f2 9d03774 68f0d8d c949392 867343a 68f0d8d 9d03774 867343a 9d03774 867343a 9d03774 1b35ec9 9d03774 1b35ec9 68f0d8d 9d03774 1b35ec9 68f0d8d 9d03774 1b35ec9 0a1d26c 4fe53d8 8084a16 4fe53d8 8084a16 0a1d26c 2189879 0a1d26c 4fe53d8 0a1d26c 2189879 0a1d26c 1bbd0bd 98f4015 80d8e52 0a1d26c cade13f 0a1d26c 1bbd0bd d01e2e3 1bbd0bd 8084a16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import gradio as gr
import numpy as np
from huggingface_hub import InferenceClient
import os
import requests
import scipy.io.wavfile
import io
import time
from gradio_client import Client, file
client = InferenceClient(
"meta-llama/Meta-Llama-3-8B-Instruct", token=os.getenv("hf_token")
)
def process_audio(audio_data):
if audio_data is None:
return "No audio provided.", ""
# Check if audio_data is a tuple and extract data
if isinstance(audio_data, tuple):
sample_rate, data = audio_data
else:
return "Invalid audio data format.", ""
# Convert the audio data to WAV format in memory
buf = io.BytesIO()
scipy.io.wavfile.write(buf, sample_rate, data)
wav_bytes = buf.getvalue()
buf.close()
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}
def query(wav_data):
response = requests.post(API_URL, headers=headers, data=wav_data)
return response.json()
# Call the API to process the audio
output = query(wav_bytes)
print(output) # Check output in console (logs in HF space)
# Check the API response
if "text" in output:
recognized_text = output["text"]
return recognized_text, recognized_text
else:
recognized_text = (
"The ASR module is still loading, please press the button again!"
)
return recognized_text, ""
def master_decision(message):
decision_response = ""
judge_system_message = """You are helpful assistant. You will be given queries from the user and you decide on which domain the query belongs to. You have three domains : ["movies","music","others"]. If you don't know about the domain of a query, it is to be classified as "others". Please give a one word answer in smaller caps."""
m_message = [
{"role": "system", "content": judge_system_message},
{"role": "user", "content": message},
]
for m in client.chat_completion(
m_message,
stream=True,
):
token = m.choices[0].delta.content
decision_response += token
print(decision_response)
if "movies" in decision_response:
movie_client = Client("ironserengety/movies-recommender")
result = movie_client.predict(
message=message,
system_message="You are a movie recommender named 'Exodia'. You are extremely reliable. You always mention your name in the beginning of conversation. You will provide me with answers from the given info. Give not more than 3 choices and make sure that answers are complete sentences. Give short one-line descriptions of each sentence.",
max_tokens=512,
temperature=0.7,
top_p=0.95,
api_name="/chat",
)
print(result)
return decision_response, result
# elif "music" in decision_response:
elif "music" in decision_response:
music_client = Client("ironserengety/MusicRetriever")
result = music_client.predict(message=message, api_name="/respond")
response = result
return decision_response, response
else:
# others
system_message = "You are a helpful chatbot that answers questions. Give any answer within 50 words."
messages = [{"role": "system", "content": system_message}]
# for val in history:
# print(val[0])
# if val[0] != None:
# if val[0]:
# messages.append({"role": "user", "content": val[0]})
# if val[1]:
# messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
print(messages)
for message in client.chat_completion(
messages,
stream=True,
):
token = message.choices[0].delta.content
response += token
return decision_response, response
def tts_part_new(response):
result = ""
text = response
client = Client("tonyassi/voice-clone")
result = client.predict(text, audio=file("siri.wav"), api_name="/predict")
return result
def get_chatbot_response(audio_data):
response_text, _ = process_audio(audio_data)
domain, response = master_decision(response_text)
if domain == "music":
return response, response
else:
return response, tts_part_new(response)
def chat_interface():
with gr.Blocks() as demo:
# audio_input = gr.Audio(source="microphone", type="filepath", label="Speak")
audio_input = gr.Audio(
sources="microphone",
type="numpy", # Get audio data and sample rate
label="Say Something...",
)
btn = gr.Button(value="Send")
response_textbox = gr.Textbox(label="Response Text")
audio_output = gr.Audio(label="Response Audio")
btn.click(
get_chatbot_response,
inputs=[audio_input],
outputs=[response_textbox, audio_output],
)
return demo
if __name__ == "__main__":
# demo = create_interface()
demo = chat_interface()
demo.launch(show_error=True)
|