groq-llama3 / app.py
dromerosm's picture
Add .gitignore, update requirements, and enhance README with detailed features and usage instructions
f792b11
raw
history blame
6.75 kB
import os
from dotenv import find_dotenv, load_dotenv
import streamlit as st
from groq import Groq
# Load environment variables
load_dotenv(find_dotenv())
# Set up Streamlit page configuration
st.set_page_config(
page_icon="πŸ“ƒ",
layout="wide",
page_title="Groq & LLaMA3x Chat Bot"
)
# App Title
st.title("Groq Chat with LLaMA3x")
# Initialize the Groq client using the API key from the environment variables
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Cache the model fetching function to improve performance
@st.cache_data
def fetch_available_models():
"""
Fetches the available models from the Groq API.
Returns a list of models or an empty list if there's an error.
"""
try:
models_response = client.models.list()
return models_response.data
except Exception as e:
st.error(f"Error fetching models: {e}")
return []
# Load available models and filter them
available_models = fetch_available_models()
filtered_models = [
model for model in available_models if model.id.startswith('llama-3')
]
# Prepare a dictionary of model metadata
models = {
model.id: {
"name": model.id,
"tokens": 4000,
"developer": model.owned_by,
}
for model in filtered_models
}
# Initialize session state variables
if "messages" not in st.session_state:
st.session_state.messages = []
if "selected_model" not in st.session_state:
st.session_state.selected_model = None
# Sidebar: Controls
with st.sidebar:
# Powered by Groq logo
st.markdown(
"""
<a href="https://groq.com" target="_blank" rel="noopener noreferrer">
<img
src="https://groq.com/wp-content/uploads/2024/03/PBG-mark1-color.svg"
alt="Powered by Groq for fast inference."
width="100%"
/>
</a>
""",
unsafe_allow_html=True
)
st.markdown("---")
# Define a function to clear messages when the model changes
def reset_chat_on_model_change():
st.session_state.messages = []
# Model selection dropdown
if models:
model_option = st.selectbox(
"Choose a model:",
options=list(models.keys()),
format_func=lambda x: f"{models[x]['name']} ({models[x]['developer']})",
on_change=reset_chat_on_model_change, # Reset chat when model changes
)
else:
st.warning("No available models to select.")
model_option = None
# Token limit slider
if models:
max_tokens_range = models[model_option]["tokens"]
max_tokens = st.slider(
"Max Tokens:",
min_value=200,
max_value=max_tokens_range,
value=max(100, int(max_tokens_range * 0.5)),
step=256,
help=f"Adjust the maximum number of tokens for the response. Maximum for the selected model: {max_tokens_range}"
)
else:
max_tokens = 200
# Additional options
stream_mode = st.checkbox("Enable Streaming", value=True)
# Button to clear the chat
if st.button("Clear Chat"):
st.session_state.messages = []
st.markdown("### Usage Summary")
usage_box = st.empty()
# Disclaimer
st.markdown(
"""
-----
⚠️ **Important:**
*The responses provided by this application are generated automatically using an AI model.
Users are responsible for verifying the accuracy of the information before relying on it.
Always cross-check facts and data for critical decisions.*
"""
)
# Main Chat Interface
st.markdown("### Chat Interface")
# Display the chat history
for message in st.session_state.messages:
avatar = "πŸ”‹" if message["role"] == "assistant" else "πŸ§‘β€πŸ’»"
with st.chat_message(message["role"], avatar=avatar):
st.markdown(message["content"])
# Capture user input
user_input = st.chat_input("Enter your message here...")
if user_input:
# Append the user input to the session state
st.session_state.messages.append({"role": "user", "content": user_input})
with st.chat_message("user", avatar="πŸ§‘β€πŸ’»"):
st.markdown(user_input)
# Generate a response using the selected model
try:
full_response = ""
usage_summary = ""
if stream_mode:
# Generate a response with streaming enabled
chat_completion = client.chat.completions.create(
model=model_option,
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
max_tokens=max_tokens,
stream=True
)
with st.chat_message("assistant", avatar="πŸ”‹"):
response_placeholder = st.empty()
for chunk in chat_completion:
if chunk.choices[0].delta.content:
full_response += chunk.choices[0].delta.content
response_placeholder.markdown(full_response)
else:
# Generate a response without streaming
chat_completion = client.chat.completions.create(
model=model_option,
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
max_tokens=max_tokens,
stream=False
)
response = chat_completion.choices[0].message.content
usage_data = chat_completion.usage
with st.chat_message("assistant", avatar="πŸ”‹"):
st.markdown(response)
full_response = response
if usage_data:
usage_summary = (
f"**Token Usage:**\n"
f"- Prompt Tokens: {usage_data.prompt_tokens}\n"
f"- Response Tokens: {usage_data.completion_tokens}\n"
f"- Total Tokens: {usage_data.total_tokens}\n\n"
f"**Timings:**\n"
f"- Prompt Time: {round(usage_data.prompt_time,5)} secs\n"
f"- Response Time: {round(usage_data.completion_time,5)} secs\n"
f"- Total Time: {round(usage_data.total_time,5)} secs"
)
if usage_summary:
usage_box.markdown(usage_summary)
# Append the assistant's response to the session state
st.session_state.messages.append(
{"role": "assistant", "content": full_response}
)
except Exception as e:
st.error(f"Error generating the response: {e}")