Gabriel C commited on
Commit
1324088
1 Parent(s): 21285af

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+
4
+ import gradio as gr
5
+ import numpy as np
6
+ import soundfile as sf
7
+
8
+ from groq import Groq
9
+ from openai import OpenAI
10
+
11
+ groq_client = Groq(api_key=os.getenv('GROQ_API_KEY'))
12
+ openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
13
+
14
+ def transcribe(audio_path):
15
+ """
16
+ Transcribe the audio segment using Whisper.
17
+ """
18
+ with open(audio_path, 'rb') as audio_file:
19
+ transcription = openai_client.audio.transcriptions.create(
20
+ file=audio_file,
21
+ language="en",
22
+ model="whisper-1"
23
+ )
24
+ return transcription.text
25
+
26
+ def autocomplete(text):
27
+ """
28
+ Autocomplete the text using Gemma.
29
+ """
30
+ if text != "":
31
+ response = groq_client.chat.completions.create(
32
+ model='gemma-7b-it',
33
+ messages=[{"role": "system", "content": "You are a friendly assistant."},
34
+ {"role": "user", "content": text}]
35
+ )
36
+
37
+ return response.choices[0].message.content
38
+
39
+ def process_audio(input_audio):
40
+ """
41
+ Process the audio input by transcribing and completing the sentences.
42
+ """
43
+ # Now you can use the audio_file_path with soundfile.read()
44
+ audio_data, sample_rate = sf.read(input_audio)
45
+
46
+ # Ensure mono audio
47
+ if len(audio_data.shape) > 1:
48
+ audio_data = np.mean(audio_data, axis=1)
49
+
50
+ transcription_list = []
51
+ for start in range(0, len(audio_data), sample_rate):
52
+ end = start + sample_rate
53
+ segment = audio_data[start:end]
54
+
55
+ # Temporarily saving each segment to a file (Whisper requires a file input)
56
+ segment_filename = f"/tmp/audio_segment_{start}.wav"
57
+ sf.write(segment_filename, segment, sample_rate)
58
+
59
+ # Transcribe the audio segment
60
+ transcription = transcribe(segment_filename)
61
+
62
+ transcription_list.append(transcription)
63
+
64
+ # # Send the transcription for completion
65
+ completion_result = autocomplete(transcription)
66
+
67
+ text = f"Qn: {transcription} \n \n Ans: {completion_result}"
68
+
69
+ return text
70
+
71
+ # Define the Gradio interface
72
+ interface = gr.Interface(
73
+ fn=process_audio,
74
+ inputs=gr.Audio(sources="microphone", streaming=True, type="filepath"),
75
+ outputs=gr.Markdown(),
76
+ title="Dear Gemma",
77
+ description="Talk to the AI assistant. It completes your sentences in real time.",
78
+ live=True,
79
+ allow_flagging="never"
80
+ )
81
+
82
+ if __name__ == "__main__":
83
+ interface.launch()