jiuuee commited on
Commit
beebab3
1 Parent(s): 8e56bac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -1,6 +1,4 @@
1
  import gradio as gr
2
- import sounddevice as sd
3
- import soundfile as sf
4
  from transformers import pipeline
5
 
6
  # Create pipelines for ASR, QA, and TTS
@@ -8,19 +6,18 @@ asr_pipeline = pipeline("automatic-speech-recognition", model="canary/asr-small-
8
  qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
9
  tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0) # Adjust device based on your hardware
10
 
11
- # Function to capture audio
12
- def capture_audio(duration=10, filename="temp.wav"):
13
- print("Listening for trigger word...")
14
- # Listen for trigger word ("Hey, Alexander")
15
  while True:
16
- audio_input, _ = sd.rec(int(duration * 16000), samplerate=16000, channels=1, dtype="int16")
17
- sd.wait()
18
- sf.write(filename, audio_input, 16000)
19
- transcript = asr_pipeline(filename)[0]['transcription']
20
  if "hey alex" in transcript.lower():
21
- print("Hi! I'm listening...")
22
  break
23
  print("Listening...")
 
24
 
25
  # AI assistant function
26
  def ai_assistant(audio_input):
@@ -29,8 +26,8 @@ def ai_assistant(audio_input):
29
 
30
  # Perform question answering (QA)
31
  question = transcribed_text
32
- context = "Friends is a popular American sitcom that aired from 1994 to 2004. The show revolves around a group of six friends living in New York City—Ross, Rachel, Chandler, Monica, Joey, and Phoebe—as they navigate various aspects of their personal and professional lives. Friends is known for its humor, memorable characters, and iconic catchphrases, making it a beloved and enduring cultural phenomenon."
33
  # Provide the context for the question answering model
 
34
  answer = qa_pipeline(question=question, context=context)
35
 
36
  # Convert the answer to speech using text-to-speech (TTS)
@@ -45,4 +42,5 @@ if __name__ == "__main__":
45
  inputs=gr.inputs.Audio(capture= capture_audio, label="Speak Here"),
46
  outputs=gr.outputs.Audio(type="audio", label="Assistant's Response"),
47
  title="Alexander the Great AI Assistant",
48
- description="An AI Assistant. Say 'Hi Alexander' to speak to Alex").launch(inbrowser=True)
 
 
1
  import gradio as gr
 
 
2
  from transformers import pipeline
3
 
4
  # Create pipelines for ASR, QA, and TTS
 
6
  qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
7
  tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0) # Adjust device based on your hardware
8
 
9
+ # Function to capture audio using Canary ASR
10
+ def capture_audio():
 
 
11
  while True:
12
+ print("Say, 'Hey, Alex'")
13
+ # Use Canary ASR pipeline to capture audio
14
+ audio_input = asr_pipeline(None)[0]['input_values']
15
+ transcript = asr_pipeline(audio_input)[0]['transcription']
16
  if "hey alex" in transcript.lower():
17
+ print("I hear you!")
18
  break
19
  print("Listening...")
20
+ return audio_input
21
 
22
  # AI assistant function
23
  def ai_assistant(audio_input):
 
26
 
27
  # Perform question answering (QA)
28
  question = transcribed_text
 
29
  # Provide the context for the question answering model
30
+ context = "Friends is a popular American sitcom that aired from 1994 to 2004. The show revolves around a group of six friends living in New York City—Ross, Rachel, Chandler, Monica, Joey, and Phoebe—as they navigate various aspects of their personal and professional lives. Friends is known for its humor, memorable characters, and iconic catchphrases, making it a beloved and enduring cultural phenomenon."
31
  answer = qa_pipeline(question=question, context=context)
32
 
33
  # Convert the answer to speech using text-to-speech (TTS)
 
42
  inputs=gr.inputs.Audio(capture= capture_audio, label="Speak Here"),
43
  outputs=gr.outputs.Audio(type="audio", label="Assistant's Response"),
44
  title="Alexander the Great AI Assistant",
45
+ description="An AI Assistant. Say 'Hey Alex' to speak to Alexander").launch(inbrowser=True)
46
+