groq-llama3

Running

App Files Files Community

dromerosm commited on 3 days ago

Commit

f792b11

•

1 Parent(s): 506e6b6

Add .gitignore, update requirements, and enhance README with detailed features and usage instructions

Browse files

Files changed (4) hide show

.gitignore +2 -0
README.md +86 -2
app.py +177 -95
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .conda
2	+ .env

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Groq-LLaMA3.1
 emoji: 📚
 colorFrom: yellow
 colorTo: blue
@@ -10,4 +10,88 @@ pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Groq-LLaMA3.x
 emoji: 📚
 colorFrom: yellow
 colorTo: blue
 license: mit
 ---
+# Groq Chat with LLaMA3x
+A Streamlit-based chat application that leverages Groq's API to interact with LLaMA3x models.
+## Features
+### Model Integration
+- Seamless integration with Groq's LLaMA3x model family
+- Dynamic model selection from available LLaMA variants
+- Automatic model metadata fetching and display
+- Model-specific token limit handling
+### Chat Interface
+- Real-time streaming responses with character-by-character display
+- Non-streaming mode for batch responses
+- Persistent chat history with session management
+- Clear chat functionality
+- User-friendly message input system
+- Distinct avatars for user (🧑‍💻) and assistant (🔋) messages
+### Performance Controls
+- Adjustable token limit slider with model-specific maximums
+- Toggle between streaming and non-streaming modes
+- Automatic session state management
+- Error handling with user-friendly error messages
+### Usage Analytics
+- Real-time token usage tracking
+  - Prompt tokens
+  - Response tokens
+  - Total tokens used
+- Performance timing metrics
+  - Prompt processing time
+  - Response generation time
+  - Total interaction time
+### UI/UX Features
+- Responsive wide-layout design
+- Sidebar with model controls and settings
+- Groq branding integration
+- Important disclaimer for AI-generated content
+- Clear visual hierarchy with markdown formatting
+## Prerequisites
+- Python 3.7+
+- Groq API key
+- Required Python packages:
+  - streamlit
+  - groq
+  - python-dotenv
+## Installation
+1. Clone the repository
+2. Install dependencies:
+```bash
+pip install streamlit groq python-dotenv
+```
+3. Create a `.env` file and add your Groq API key:
+```
+GROQ_API_KEY=your_api_key_here
+```
+## Usage
+Run the application:
+```bash
+streamlit run app.py
+```
+The app will open in your default browser, featuring:
+- Model selection dropdown
+- Adjustable token limit slider
+- Streaming mode toggle
+- Clear chat functionality
+- Real-time usage statistics
+## Security Note
+Always keep your API key secure and never commit it to version control. The application uses environment variables for sensitive data management.
+## License
+MIT

app.py CHANGED Viewed

@@ -1,128 +1,210 @@
 import os
 from dotenv import find_dotenv, load_dotenv
 import streamlit as st
-from typing import Generator
 from groq import Groq
-_ = load_dotenv(find_dotenv())
-st.set_page_config(page_icon="📃", layout="wide", page_title="Groq & LLaMA3.1 Chat Bot...")
-def icon(emoji: str):
-    """Shows an emoji as a Notion-style page icon."""
-    st.write(
-        f'<span style="font-size: 78px; line-height: 1">{emoji}</span>',
-        unsafe_allow_html=True,
-    )
-# icon("⚡️")
-st.subheader("Groq Chat with LLaMA3.1 App", divider="rainbow", anchor=False)
-client = Groq(
-    api_key=os.environ['GROQ_API_KEY'],
-)
-# Initialize chat history and selected model
 if "messages" not in st.session_state:
     st.session_state.messages = []
 if "selected_model" not in st.session_state:
     st.session_state.selected_model = None
-# Define model details
-models = {
-    "llama-3.1-70b-versatile": {"name": "LLaMA3.1-70b", "tokens": 4096, "developer": "Meta"},
-    "llama-3.1-8b-instant": {"name": "LLaMA3.1-8b", "tokens": 4096, "developer": "Meta"},
-    "llama3-70b-8192": {"name": "Meta Llama 3 70B", "tokens": 4096, "developer": "Meta"},
-    "llama3-8b-8192": {"name": "Meta Llama 3 8B", "tokens": 4096, "developer": "Meta"},
-    "llama3-groq-70b-8192-tool-use-preview": {"name": "Llama 3 Groq 70B Tool Use (Preview)", "tokens": 4096, "developer": "Groq"},
-    "gemma-7b-it": {"name": "Gemma-7b-it", "tokens": 4096, "developer": "Google"},
-    "mixtral-8x7b-32768": {
-        "name": "Mixtral-8x7b-Instruct-v0.1",
-        "tokens": 32768,
-        "developer": "Mistral",
-    },
-}
-# Layout for model selection and max_tokens slider
-col1, col2 = st.columns([1, 3])  # Adjust the ratio to make the first column smaller
-with col1:
-    model_option = st.selectbox(
-        "Choose a model:",
-        options=list(models.keys()),
-        format_func=lambda x: models[x]["name"],
-        index=0,  # Default to the first model in the list
     )
-    max_tokens_range = models[model_option]["tokens"]
-    max_tokens = st.slider(
-        "Max Tokens:",
-        min_value=512,
-        max_value=max_tokens_range,
-        value=min(32768, max_tokens_range),
-        step=512,
-        help=f"Adjust the maximum number of tokens (words) for the model's response. Max for selected model: {max_tokens_range}",
     )
-# Detect model change and clear chat history if model has changed
-if st.session_state.selected_model != model_option:
-    st.session_state.messages = []
-    st.session_state.selected_model = model_option
-# Add a "Clear Chat" button
-if st.button("Clear Chat"):
-    st.session_state.messages = []
-# Display chat messages from history on app rerun
 for message in st.session_state.messages:
     avatar = "🔋" if message["role"] == "assistant" else "🧑‍💻"
     with st.chat_message(message["role"], avatar=avatar):
         st.markdown(message["content"])
-def generate_chat_responses(chat_completion) -> Generator[str, None, None]:
-    """Yield chat response content from the Groq API response."""
-    for chunk in chat_completion:
-        if chunk.choices[0].delta.content:
-            yield chunk.choices[0].delta.content
-if prompt := st.chat_input("Enter your prompt here..."):
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.chat_message("user", avatar="🧑‍💻"):
-        st.markdown(prompt)
-    # Fetch response from Groq API
     try:
-        chat_completion = client.chat.completions.create(
-            model=model_option,
-            messages=[
-                {"role": m["role"], "content": m["content"]}
-                for m in st.session_state.messages
-            ],
-            max_tokens=max_tokens,
-            stream=True,
-        )
-        # Use the generator function with st.write_stream
-        with st.chat_message("assistant", avatar="🔋"):
-            chat_responses_generator = generate_chat_responses(chat_completion)
-            full_response = st.write_stream(chat_responses_generator)
-    except Exception as e:
-        st.error(e, icon="❌")
-    # Append the full response to session_state.messages
-    if isinstance(full_response, str):
         st.session_state.messages.append(
             {"role": "assistant", "content": full_response}
         )
-    else:
-        # Handle the case where full_response is not a string
-        combined_response = "\n".join(str(item) for item in full_response)
-        st.session_state.messages.append(
-            {"role": "assistant", "content": combined_response}
-        )

 import os
 from dotenv import find_dotenv, load_dotenv
 import streamlit as st
 from groq import Groq
+# Load environment variables
+load_dotenv(find_dotenv())
+# Set up Streamlit page configuration
+st.set_page_config(
+    page_icon="📃",
+    layout="wide",
+    page_title="Groq & LLaMA3x Chat Bot"
+)
+# App Title
+st.title("Groq Chat with LLaMA3x")
+# Initialize the Groq client using the API key from the environment variables
+client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Cache the model fetching function to improve performance
+@st.cache_data
+def fetch_available_models():
+    """
+    Fetches the available models from the Groq API.
+    Returns a list of models or an empty list if there's an error.
+    """
+    try:
+        models_response = client.models.list()
+        return models_response.data
+    except Exception as e:
+        st.error(f"Error fetching models: {e}")
+        return []
+# Load available models and filter them
+available_models = fetch_available_models()
+filtered_models = [
+    model for model in available_models if model.id.startswith('llama-3')
+]
+# Prepare a dictionary of model metadata
+models = {
+    model.id: {
+        "name": model.id,
+        "tokens": 4000,
+        "developer": model.owned_by,
+    }
+    for model in filtered_models
+}
+# Initialize session state variables
 if "messages" not in st.session_state:
     st.session_state.messages = []
 if "selected_model" not in st.session_state:
     st.session_state.selected_model = None
+# Sidebar: Controls
+with st.sidebar:
+    # Powered by Groq logo
+    st.markdown(
+        """
+        <a href="https://groq.com" target="_blank" rel="noopener noreferrer">
+            <img
+                src="https://groq.com/wp-content/uploads/2024/03/PBG-mark1-color.svg"
+                alt="Powered by Groq for fast inference."
+                width="100%"
+            />
+        </a>
+        """,
+        unsafe_allow_html=True
     )
+    st.markdown("---")
+    # Define a function to clear messages when the model changes
+    def reset_chat_on_model_change():
+        st.session_state.messages = []
+    # Model selection dropdown
+    if models:
+        model_option = st.selectbox(
+            "Choose a model:",
+            options=list(models.keys()),
+            format_func=lambda x: f"{models[x]['name']} ({models[x]['developer']})",
+            on_change=reset_chat_on_model_change,  # Reset chat when model changes
+        )
+    else:
+        st.warning("No available models to select.")
+        model_option = None
+    # Token limit slider
+    if models:
+        max_tokens_range = models[model_option]["tokens"]
+        max_tokens = st.slider(
+            "Max Tokens:",
+            min_value=200,
+            max_value=max_tokens_range,
+            value=max(100, int(max_tokens_range * 0.5)),
+            step=256,
+            help=f"Adjust the maximum number of tokens for the response. Maximum for the selected model: {max_tokens_range}"
+        )
+    else:
+        max_tokens = 200
+    # Additional options
+    stream_mode = st.checkbox("Enable Streaming", value=True)
+    # Button to clear the chat
+    if st.button("Clear Chat"):
+        st.session_state.messages = []
+    st.markdown("### Usage Summary")
+    usage_box = st.empty()
+    # Disclaimer
+    st.markdown(
+        """
+        -----
+        ⚠️ **Important:**
+        *The responses provided by this application are generated automatically using an AI model.
+        Users are responsible for verifying the accuracy of the information before relying on it.
+        Always cross-check facts and data for critical decisions.*
+        """
     )
+# Main Chat Interface
+st.markdown("### Chat Interface")
+# Display the chat history
 for message in st.session_state.messages:
     avatar = "🔋" if message["role"] == "assistant" else "🧑‍💻"
     with st.chat_message(message["role"], avatar=avatar):
         st.markdown(message["content"])
+# Capture user input
+user_input = st.chat_input("Enter your message here...")
+if user_input:
+    # Append the user input to the session state
+    st.session_state.messages.append({"role": "user", "content": user_input})
+    with st.chat_message("user", avatar="🧑‍💻"):
+        st.markdown(user_input)
+    # Generate a response using the selected model
     try:
+        full_response = ""
+        usage_summary = ""
+        if stream_mode:
+            # Generate a response with streaming enabled
+            chat_completion = client.chat.completions.create(
+                model=model_option,
+                messages=[
+                    {"role": m["role"], "content": m["content"]}
+                    for m in st.session_state.messages
+                ],
+                max_tokens=max_tokens,
+                stream=True
+            )
+            with st.chat_message("assistant", avatar="🔋"):
+                response_placeholder = st.empty()
+                for chunk in chat_completion:
+                    if chunk.choices[0].delta.content:
+                        full_response += chunk.choices[0].delta.content
+                        response_placeholder.markdown(full_response)
+        else:
+            # Generate a response without streaming
+            chat_completion = client.chat.completions.create(
+                model=model_option,
+                messages=[
+                    {"role": m["role"], "content": m["content"]}
+                    for m in st.session_state.messages
+                ],
+                max_tokens=max_tokens,
+                stream=False
+            )
+            response = chat_completion.choices[0].message.content
+            usage_data = chat_completion.usage
+            with st.chat_message("assistant", avatar="🔋"):
+                st.markdown(response)
+                full_response = response
+            if usage_data:
+                usage_summary = (
+                    f"**Token Usage:**\n"
+                    f"- Prompt Tokens: {usage_data.prompt_tokens}\n"
+                    f"- Response Tokens: {usage_data.completion_tokens}\n"
+                    f"- Total Tokens: {usage_data.total_tokens}\n\n"
+                    f"**Timings:**\n"
+                    f"- Prompt Time: {round(usage_data.prompt_time,5)} secs\n"
+                    f"- Response Time: {round(usage_data.completion_time,5)} secs\n"
+                    f"- Total Time: {round(usage_data.total_time,5)} secs"
+                )
+        if usage_summary:
+            usage_box.markdown(usage_summary)
+        # Append the assistant's response to the session state
         st.session_state.messages.append(
             {"role": "assistant", "content": full_response}
         )
+    except Exception as e:
+        st.error(f"Error generating the response: {e}")

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 streamlit
 groq
 python-dotenv

 streamlit
 groq
 python-dotenv
+watchdog