File size: 5,471 Bytes
1b10d27 b486b3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import streamlit as st
from dotenv import load_dotenv
from audiorecorder import audiorecorder
from langchain_core.messages import HumanMessage, AIMessage
import requests
from transformers import pipeline
from gtts import gTTS
import io
from langchain_core.runnables.base import RunnableSequence
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
import os
import requests
from dotenv import load_dotenv
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
from langchain_community.tools.tavily_search import TavilySearchResults
st.set_page_config(page_title="Urdu Virtual Assistant", page_icon="🤖") # set the page title and icon
# Load environment variables (if any)
load_dotenv()
user_id = "1" # example user id
llm = ChatGroq(
model="llama-3.1-70b-versatile",
temperature=0.3,
max_tokens=None,
timeout=None,
max_retries=5,
groq_api_key=os.getenv("GROQ_API_KEY")
)
search = TavilySearchResults(
max_results=2,
)
tools = [search]
agent_executor = create_react_agent(llm, tools)
# Initialize the wav2vec2 model for Urdu speech-to-text
pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
def translate(target, text):
'''
Translates given text into target language
Parameters:
target (string): 2 character code to specify the target language.
text (string): Text to be translated.
Returns:
res (string): Translated text.
'''
url = "https://microsoft-translator-text.p.rapidapi.com/translate"
querystring = {"api-version":"3.0","profanityAction":"NoAction","textType":"plain", "to":target}
payload = [{ "Text": text }]
headers = {
"x-rapidapi-key": os.getenv("RAPIDAPI_LANG_TRANS"),
"x-rapidapi-host": "microsoft-translator-text.p.rapidapi.com",
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers, params=querystring)
res = response.json()
return res[0]["translations"][0]["text"]
def infer(user_input: str):
'''
Returns the translated response from the LLM in response to a user query.
Parameters:
user_id (string): User ID of a user.
user_input (string): User query.
Returns:
res (string): Returns a translated response from the LLM.
'''
user_input = translate("en", user_input) # translate user query to english
prompt = ChatPromptTemplate.from_messages( # define a prompt
[
(
"system",
"You are a compassionate and friendly AI virtual assistant. You will provide helpful answers to user queries using the provided tool to ensure the accuracy and relevance of your responses."
),
("human", "{user_input}")
]
)
runnable = prompt | agent_executor # define a chain
conversation = RunnableSequence( # wrap the chain along with chat history and user input
runnable,
)
response = conversation.invoke( # invoke the chain by giving the user input and the chat history
{"user_input": user_input},
)
res = translate("ur", response["messages"][-1].content) # translate the response to Urdu
return res
def text_to_speech(text, lang='ur'):
'''
Converts text to speech using gTTS.
Parameters:
text (string): Text to be converted to speech.
lang (string): Language for the speech synthesis. Default is 'ur' (Urdu).
Returns:
response_audio_io (BytesIO): BytesIO object containing the audio data.
'''
tts = gTTS(text, lang=lang)
response_audio_io = io.BytesIO()
tts.write_to_fp(response_audio_io)
response_audio_io.seek(0)
return response_audio_io
col1, col2 = st.columns([1, 5]) # Adjust the ratio to control the logo and title sizes
# Display the logo in the first column
with col1:
st.image("bolo_logo-removebg-preview.png", width=100) # Adjust the width as needed
# Display the title in the second column
with col2:
st.title("Urdu Virtual Assistant") # set the main title of the application
st.write("This application is a comprehensive speech-to-speech model designed to understand and respond in Urdu. It not only handles natural conversations but also has the capability to access and provide real-time information by integrating with the Tavily search engine. Whether you're asking for the weather or engaging in everyday dialogue, this assistant delivers accurate and context-aware responses, all in Urdu.")
# Add a text input box
audio = audiorecorder()
if len(audio) > 0:
# Save the audio to a file
audio.export("audio.wav", format="wav")
# Convert audio to text using the wav2vec2 model
with open("audio.wav", "rb") as f:
audio_bytes = f.read()
# Process the audio file
result = pipe("audio.wav")
user_query = result["text"]
with st.chat_message("Human"): # create the message box for human input
st.audio(audio.export().read()) # display the audio player
st.markdown(user_query)
# Get response from the LLM
response_text = infer(user_input=user_query)
response_audio = text_to_speech(response_text, lang='ur')
# Play the generated speech in the app
with st.chat_message("AI"):
st.audio(response_audio.read(), format='audio/mp3')
st.markdown(response_text) |