Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
from transformers import pipeline
|
3 |
|
4 |
# Create pipelines for ASR, QA, and TTS
|
@@ -6,13 +8,29 @@ asr_pipeline = pipeline("automatic-speech-recognition", model="canary/asr-small-
|
|
6 |
qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
|
7 |
tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0) # Adjust device based on your hardware
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
def ai_assistant(audio_input):
|
10 |
# Perform automatic speech recognition (ASR)
|
11 |
transcribed_text = asr_pipeline(audio_input)[0]['transcription']
|
12 |
|
13 |
# Perform question answering (QA)
|
14 |
question = transcribed_text
|
15 |
-
context = "
|
|
|
16 |
answer = qa_pipeline(question=question, context=context)
|
17 |
|
18 |
# Convert the answer to speech using text-to-speech (TTS)
|
@@ -23,9 +41,8 @@ def ai_assistant(audio_input):
|
|
23 |
|
24 |
if __name__ == "__main__":
|
25 |
# Create a Gradio interface
|
26 |
-
gr.Interface(ai_assistant,
|
27 |
-
inputs=gr.inputs.Audio(
|
28 |
outputs=gr.outputs.Audio(type="audio", label="Assistant's Response"),
|
29 |
-
title="AI Assistant",
|
30 |
-
description="An AI Assistant
|
31 |
-
.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import sounddevice as sd
|
3 |
+
import soundfile as sf
|
4 |
from transformers import pipeline
|
5 |
|
6 |
# Create pipelines for ASR, QA, and TTS
|
|
|
8 |
qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
|
9 |
tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0) # Adjust device based on your hardware
|
10 |
|
11 |
+
# Function to capture audio
|
12 |
+
def capture_audio(duration=10, filename="temp.wav"):
|
13 |
+
print("Listening for trigger word...")
|
14 |
+
# Listen for trigger word ("Hey, Alexander")
|
15 |
+
while True:
|
16 |
+
audio_input, _ = sd.rec(int(duration * 16000), samplerate=16000, channels=1, dtype="int16")
|
17 |
+
sd.wait()
|
18 |
+
sf.write(filename, audio_input, 16000)
|
19 |
+
transcript = asr_pipeline(filename)[0]['transcription']
|
20 |
+
if "hey alex" in transcript.lower():
|
21 |
+
print("Hi! I'm listening...")
|
22 |
+
break
|
23 |
+
print("Listening...")
|
24 |
+
|
25 |
+
# AI assistant function
|
26 |
def ai_assistant(audio_input):
|
27 |
# Perform automatic speech recognition (ASR)
|
28 |
transcribed_text = asr_pipeline(audio_input)[0]['transcription']
|
29 |
|
30 |
# Perform question answering (QA)
|
31 |
question = transcribed_text
|
32 |
+
context = "Friends is a popular American sitcom that aired from 1994 to 2004. The show revolves around a group of six friends living in New York City—Ross, Rachel, Chandler, Monica, Joey, and Phoebe—as they navigate various aspects of their personal and professional lives. Friends is known for its humor, memorable characters, and iconic catchphrases, making it a beloved and enduring cultural phenomenon."
|
33 |
+
# Provide the context for the question answering model
|
34 |
answer = qa_pipeline(question=question, context=context)
|
35 |
|
36 |
# Convert the answer to speech using text-to-speech (TTS)
|
|
|
41 |
|
42 |
if __name__ == "__main__":
|
43 |
# Create a Gradio interface
|
44 |
+
gr.Interface(ai_assistant,
|
45 |
+
inputs=gr.inputs.Audio(capture= capture_audio, label="Speak Here"),
|
46 |
outputs=gr.outputs.Audio(type="audio", label="Assistant's Response"),
|
47 |
+
title="Alexander the Great AI Assistant",
|
48 |
+
description="An AI Assistant. Say 'Hi Alexander' to speak to Alex").launch(inbrowser=True)
|
|