prathmeshadsod commited on
Commit
5eeb931
·
verified ·
1 Parent(s): a2d3635

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env +1 -0
  2. .gitignore +1 -0
  3. app.py +137 -0
  4. packages.txt +3 -0
  5. requirements.txt +10 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ SAMBANOVA_API_KEY=6f77154e-13ca-4e74-869d-183684dc7b3f
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import whisper
3
+ from gtts import gTTS
4
+ from dotenv import load_dotenv
5
+ import openai
6
+ import streamlit as st
7
+ import tempfile
8
+ from pydub import AudioSegment
9
+ import wave
10
+ import pyaudio
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ # Initialize Whisper Model
16
+ @st.cache_resource
17
+ def load_whisper_model():
18
+ return whisper.load_model("medium")
19
+
20
+ whisper_model = load_whisper_model()
21
+
22
+ # Streamlit UI
23
+ st.title("Conversational AI with Speech-to-Speech Response")
24
+ st.write("Upload an audio file or record your voice to start the process.")
25
+
26
+ # Add a sidebar for interaction options
27
+ interaction_mode = st.sidebar.selectbox(
28
+ "Choose Interaction Mode:", ["Record Voice", "Upload Audio"]
29
+ )
30
+
31
+ # Record Voice Functionality using pydub and pyaudio
32
+ def record_audio(filename, duration=5, sample_rate=44100):
33
+ st.info(f"Recording for {duration} seconds...")
34
+ p = pyaudio.PyAudio()
35
+
36
+ # Open a stream for recording
37
+ stream = p.open(format=pyaudio.paInt16, channels=1, rate=sample_rate, input=True, frames_per_buffer=1024)
38
+
39
+ frames = []
40
+ for _ in range(0, int(sample_rate / 1024 * duration)):
41
+ data = stream.read(1024)
42
+ frames.append(data)
43
+
44
+ stream.stop_stream()
45
+ stream.close()
46
+ p.terminate()
47
+
48
+ # Save the recorded frames as a WAV file
49
+ with wave.open(filename, 'wb') as wf:
50
+ wf.setnchannels(1)
51
+ wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
52
+ wf.setframerate(sample_rate)
53
+ wf.writeframes(b''.join(frames))
54
+
55
+ st.success("Recording complete!")
56
+
57
+ # Process Audio Input
58
+ if interaction_mode == "Record Voice":
59
+ duration = st.slider("Select Recording Duration (seconds):", min_value=10, max_value=120, step=10)
60
+ record_btn = st.button("Start Recording")
61
+
62
+ if record_btn:
63
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
64
+ record_audio(temp_audio.name, duration=duration)
65
+ temp_audio_path = temp_audio.name
66
+ st.audio(temp_audio_path, format="audio/wav")
67
+ elif interaction_mode == "Upload Audio":
68
+ uploaded_file = st.file_uploader("Upload your audio file (MP3/WAV)", type=["mp3", "wav"])
69
+ if uploaded_file is not None:
70
+ # Save the uploaded file temporarily
71
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
72
+ temp_audio.write(uploaded_file.read())
73
+ temp_audio_path = temp_audio.name
74
+ st.audio(temp_audio_path, format="audio/mp3")
75
+
76
+ # Process and Transcribe Audio
77
+ if 'temp_audio_path' in locals() and temp_audio_path is not None:
78
+ st.write("Processing the audio file...")
79
+
80
+ # If the uploaded or recorded audio is in MP3 format, convert it to WAV for Whisper
81
+ if temp_audio_path.endswith(".mp3"):
82
+ audio = AudioSegment.from_mp3(temp_audio_path)
83
+ temp_audio_path = temp_audio_path.replace(".mp3", ".wav")
84
+ audio.export(temp_audio_path, format="wav")
85
+
86
+ # Transcribe audio using Whisper
87
+ result = whisper_model.transcribe(temp_audio_path)
88
+ user_text = result["text"]
89
+ st.write("Transcribed Text:", user_text)
90
+
91
+ # Generate AI Response
92
+ st.write("Generating a conversational response...")
93
+ client = openai.OpenAI(
94
+ api_key=os.environ.get("SAMBANOVA_API_KEY"),
95
+ base_url="https://api.sambanova.ai/v1",
96
+ )
97
+
98
+ response = client.chat.completions.create(
99
+ model='Meta-Llama-3.1-8B-Instruct',
100
+ messages=[
101
+ {"role": "system", "content": (
102
+ "You are a kind, empathetic, and intelligent assistant capable of meaningful conversations and emotional support. "
103
+ "Your primary goals are: "
104
+ "1. To engage in casual, friendly, and supportive conversations when the user seeks companionship or emotional relief. "
105
+ "2. To adapt your tone and responses to match the user's mood, providing warmth and encouragement if they seem distressed or seeking emotional support. "
106
+ "3. To answer questions accurately and provide explanations when asked, adjusting the depth and length of your answers based on the user's needs. "
107
+ "4. To maintain a positive and non-judgmental tone, offering helpful advice or lighthearted dialogue when appropriate. "
108
+ "5. To ensure the user feels heard, understood, and valued during every interaction. "
109
+ "If the user does not ask a question, keep the conversation engaging and meaningful by responding thoughtfully or with light humor where appropriate."
110
+ )},
111
+ {"role": "user", "content": user_text},
112
+ ],
113
+ temperature=0.1,
114
+ top_p=0.1,
115
+ )
116
+
117
+ answer = response.choices[0].message.content
118
+ st.write("Response:", answer)
119
+
120
+ # Convert response text to speech using gTTS
121
+ st.write("Converting the response to speech...")
122
+ tts = gTTS(text=answer, slow=False)
123
+ response_audio_path = "final_response.mp3"
124
+ tts.save(response_audio_path)
125
+
126
+ # Play and download the response MP3
127
+ st.audio(response_audio_path, format="audio/mp3")
128
+ st.download_button(
129
+ label="Download the Response",
130
+ data=open(response_audio_path, "rb"),
131
+ file_name="final_response.mp3",
132
+ mime="audio/mpeg",
133
+ )
134
+
135
+ # Clean up temporary files
136
+ os.remove(temp_audio_path)
137
+ os.remove(response_audio_path)
packages.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ portaudio19-dev
2
+ python3-all-dev
3
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ openai-whisper
2
+ gTTS
3
+ python-dotenv
4
+ openai
5
+ streamlit
6
+ pydub
7
+ pyaudio
8
+ numpy
9
+ torch
10
+ ffmpeg