dromerosm commited on
Commit
f792b11
β€’
1 Parent(s): 506e6b6

Add .gitignore, update requirements, and enhance README with detailed features and usage instructions

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. README.md +86 -2
  3. app.py +177 -95
  4. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .conda
2
+ .env
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Groq-LLaMA3.1
3
  emoji: πŸ“š
4
  colorFrom: yellow
5
  colorTo: blue
@@ -10,4 +10,88 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Groq-LLaMA3.x
3
  emoji: πŸ“š
4
  colorFrom: yellow
5
  colorTo: blue
 
10
  license: mit
11
  ---
12
 
13
+ # Groq Chat with LLaMA3x
14
+
15
+ A Streamlit-based chat application that leverages Groq's API to interact with LLaMA3x models.
16
+
17
+ ## Features
18
+
19
+ ### Model Integration
20
+ - Seamless integration with Groq's LLaMA3x model family
21
+ - Dynamic model selection from available LLaMA variants
22
+ - Automatic model metadata fetching and display
23
+ - Model-specific token limit handling
24
+
25
+ ### Chat Interface
26
+ - Real-time streaming responses with character-by-character display
27
+ - Non-streaming mode for batch responses
28
+ - Persistent chat history with session management
29
+ - Clear chat functionality
30
+ - User-friendly message input system
31
+ - Distinct avatars for user (πŸ§‘β€πŸ’») and assistant (πŸ”‹) messages
32
+
33
+ ### Performance Controls
34
+ - Adjustable token limit slider with model-specific maximums
35
+ - Toggle between streaming and non-streaming modes
36
+ - Automatic session state management
37
+ - Error handling with user-friendly error messages
38
+
39
+ ### Usage Analytics
40
+ - Real-time token usage tracking
41
+ - Prompt tokens
42
+ - Response tokens
43
+ - Total tokens used
44
+ - Performance timing metrics
45
+ - Prompt processing time
46
+ - Response generation time
47
+ - Total interaction time
48
+
49
+ ### UI/UX Features
50
+ - Responsive wide-layout design
51
+ - Sidebar with model controls and settings
52
+ - Groq branding integration
53
+ - Important disclaimer for AI-generated content
54
+ - Clear visual hierarchy with markdown formatting
55
+
56
+ ## Prerequisites
57
+
58
+ - Python 3.7+
59
+ - Groq API key
60
+ - Required Python packages:
61
+ - streamlit
62
+ - groq
63
+ - python-dotenv
64
+
65
+ ## Installation
66
+
67
+ 1. Clone the repository
68
+ 2. Install dependencies:
69
+ ```bash
70
+ pip install streamlit groq python-dotenv
71
+ ```
72
+ 3. Create a `.env` file and add your Groq API key:
73
+ ```
74
+ GROQ_API_KEY=your_api_key_here
75
+ ```
76
+
77
+ ## Usage
78
+
79
+ Run the application:
80
+ ```bash
81
+ streamlit run app.py
82
+ ```
83
+
84
+ The app will open in your default browser, featuring:
85
+ - Model selection dropdown
86
+ - Adjustable token limit slider
87
+ - Streaming mode toggle
88
+ - Clear chat functionality
89
+ - Real-time usage statistics
90
+
91
+ ## Security Note
92
+
93
+ Always keep your API key secure and never commit it to version control. The application uses environment variables for sensitive data management.
94
+
95
+ ## License
96
+
97
+ MIT
app.py CHANGED
@@ -1,128 +1,210 @@
1
  import os
2
  from dotenv import find_dotenv, load_dotenv
3
  import streamlit as st
4
- from typing import Generator
5
  from groq import Groq
6
 
7
- _ = load_dotenv(find_dotenv())
8
- st.set_page_config(page_icon="πŸ“ƒ", layout="wide", page_title="Groq & LLaMA3.1 Chat Bot...")
9
 
 
 
 
 
 
 
10
 
11
- def icon(emoji: str):
12
- """Shows an emoji as a Notion-style page icon."""
13
- st.write(
14
- f'<span style="font-size: 78px; line-height: 1">{emoji}</span>',
15
- unsafe_allow_html=True,
16
- )
17
 
 
 
18
 
19
- # icon("⚑️")
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- st.subheader("Groq Chat with LLaMA3.1 App", divider="rainbow", anchor=False)
 
 
 
 
22
 
23
- client = Groq(
24
- api_key=os.environ['GROQ_API_KEY'],
25
- )
 
 
 
 
 
 
26
 
27
- # Initialize chat history and selected model
28
  if "messages" not in st.session_state:
29
  st.session_state.messages = []
30
 
31
  if "selected_model" not in st.session_state:
32
  st.session_state.selected_model = None
33
 
34
- # Define model details
35
- models = {
36
- "llama-3.1-70b-versatile": {"name": "LLaMA3.1-70b", "tokens": 4096, "developer": "Meta"},
37
- "llama-3.1-8b-instant": {"name": "LLaMA3.1-8b", "tokens": 4096, "developer": "Meta"},
38
- "llama3-70b-8192": {"name": "Meta Llama 3 70B", "tokens": 4096, "developer": "Meta"},
39
- "llama3-8b-8192": {"name": "Meta Llama 3 8B", "tokens": 4096, "developer": "Meta"},
40
- "llama3-groq-70b-8192-tool-use-preview": {"name": "Llama 3 Groq 70B Tool Use (Preview)", "tokens": 4096, "developer": "Groq"},
41
- "gemma-7b-it": {"name": "Gemma-7b-it", "tokens": 4096, "developer": "Google"},
42
- "mixtral-8x7b-32768": {
43
- "name": "Mixtral-8x7b-Instruct-v0.1",
44
- "tokens": 32768,
45
- "developer": "Mistral",
46
- },
47
- }
48
-
49
- # Layout for model selection and max_tokens slider
50
- col1, col2 = st.columns([1, 3]) # Adjust the ratio to make the first column smaller
51
-
52
-
53
- with col1:
54
- model_option = st.selectbox(
55
- "Choose a model:",
56
- options=list(models.keys()),
57
- format_func=lambda x: models[x]["name"],
58
- index=0, # Default to the first model in the list
59
  )
60
- max_tokens_range = models[model_option]["tokens"]
61
- max_tokens = st.slider(
62
- "Max Tokens:",
63
- min_value=512,
64
- max_value=max_tokens_range,
65
- value=min(32768, max_tokens_range),
66
- step=512,
67
- help=f"Adjust the maximum number of tokens (words) for the model's response. Max for selected model: {max_tokens_range}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  )
69
 
70
- # Detect model change and clear chat history if model has changed
71
- if st.session_state.selected_model != model_option:
72
- st.session_state.messages = []
73
- st.session_state.selected_model = model_option
74
 
75
- # Add a "Clear Chat" button
76
- if st.button("Clear Chat"):
77
- st.session_state.messages = []
78
-
79
- # Display chat messages from history on app rerun
80
  for message in st.session_state.messages:
81
  avatar = "πŸ”‹" if message["role"] == "assistant" else "πŸ§‘β€πŸ’»"
82
  with st.chat_message(message["role"], avatar=avatar):
83
  st.markdown(message["content"])
84
 
 
 
85
 
86
- def generate_chat_responses(chat_completion) -> Generator[str, None, None]:
87
- """Yield chat response content from the Groq API response."""
88
- for chunk in chat_completion:
89
- if chunk.choices[0].delta.content:
90
- yield chunk.choices[0].delta.content
91
 
92
-
93
- if prompt := st.chat_input("Enter your prompt here..."):
94
- st.session_state.messages.append({"role": "user", "content": prompt})
95
-
96
- with st.chat_message("user", avatar="πŸ§‘β€πŸ’»"):
97
- st.markdown(prompt)
98
-
99
- # Fetch response from Groq API
100
  try:
101
- chat_completion = client.chat.completions.create(
102
- model=model_option,
103
- messages=[
104
- {"role": m["role"], "content": m["content"]}
105
- for m in st.session_state.messages
106
- ],
107
- max_tokens=max_tokens,
108
- stream=True,
109
- )
110
-
111
- # Use the generator function with st.write_stream
112
- with st.chat_message("assistant", avatar="πŸ”‹"):
113
- chat_responses_generator = generate_chat_responses(chat_completion)
114
- full_response = st.write_stream(chat_responses_generator)
115
- except Exception as e:
116
- st.error(e, icon="❌")
117
-
118
- # Append the full response to session_state.messages
119
- if isinstance(full_response, str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  st.session_state.messages.append(
121
  {"role": "assistant", "content": full_response}
122
  )
123
- else:
124
- # Handle the case where full_response is not a string
125
- combined_response = "\n".join(str(item) for item in full_response)
126
- st.session_state.messages.append(
127
- {"role": "assistant", "content": combined_response}
128
- )
 
1
  import os
2
  from dotenv import find_dotenv, load_dotenv
3
  import streamlit as st
 
4
  from groq import Groq
5
 
6
+ # Load environment variables
7
+ load_dotenv(find_dotenv())
8
 
9
+ # Set up Streamlit page configuration
10
+ st.set_page_config(
11
+ page_icon="πŸ“ƒ",
12
+ layout="wide",
13
+ page_title="Groq & LLaMA3x Chat Bot"
14
+ )
15
 
16
+ # App Title
17
+ st.title("Groq Chat with LLaMA3x")
 
 
 
 
18
 
19
+ # Initialize the Groq client using the API key from the environment variables
20
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
21
 
22
+ # Cache the model fetching function to improve performance
23
+ @st.cache_data
24
+ def fetch_available_models():
25
+ """
26
+ Fetches the available models from the Groq API.
27
+ Returns a list of models or an empty list if there's an error.
28
+ """
29
+ try:
30
+ models_response = client.models.list()
31
+ return models_response.data
32
+ except Exception as e:
33
+ st.error(f"Error fetching models: {e}")
34
+ return []
35
 
36
+ # Load available models and filter them
37
+ available_models = fetch_available_models()
38
+ filtered_models = [
39
+ model for model in available_models if model.id.startswith('llama-3')
40
+ ]
41
 
42
+ # Prepare a dictionary of model metadata
43
+ models = {
44
+ model.id: {
45
+ "name": model.id,
46
+ "tokens": 4000,
47
+ "developer": model.owned_by,
48
+ }
49
+ for model in filtered_models
50
+ }
51
 
52
+ # Initialize session state variables
53
  if "messages" not in st.session_state:
54
  st.session_state.messages = []
55
 
56
  if "selected_model" not in st.session_state:
57
  st.session_state.selected_model = None
58
 
59
+ # Sidebar: Controls
60
+ with st.sidebar:
61
+
62
+ # Powered by Groq logo
63
+ st.markdown(
64
+ """
65
+ <a href="https://groq.com" target="_blank" rel="noopener noreferrer">
66
+ <img
67
+ src="https://groq.com/wp-content/uploads/2024/03/PBG-mark1-color.svg"
68
+ alt="Powered by Groq for fast inference."
69
+ width="100%"
70
+ />
71
+ </a>
72
+ """,
73
+ unsafe_allow_html=True
 
 
 
 
 
 
 
 
 
 
74
  )
75
+ st.markdown("---")
76
+
77
+ # Define a function to clear messages when the model changes
78
+ def reset_chat_on_model_change():
79
+ st.session_state.messages = []
80
+
81
+ # Model selection dropdown
82
+ if models:
83
+ model_option = st.selectbox(
84
+ "Choose a model:",
85
+ options=list(models.keys()),
86
+ format_func=lambda x: f"{models[x]['name']} ({models[x]['developer']})",
87
+ on_change=reset_chat_on_model_change, # Reset chat when model changes
88
+ )
89
+ else:
90
+ st.warning("No available models to select.")
91
+ model_option = None
92
+
93
+ # Token limit slider
94
+ if models:
95
+ max_tokens_range = models[model_option]["tokens"]
96
+ max_tokens = st.slider(
97
+ "Max Tokens:",
98
+ min_value=200,
99
+ max_value=max_tokens_range,
100
+ value=max(100, int(max_tokens_range * 0.5)),
101
+ step=256,
102
+ help=f"Adjust the maximum number of tokens for the response. Maximum for the selected model: {max_tokens_range}"
103
+ )
104
+ else:
105
+ max_tokens = 200
106
+
107
+ # Additional options
108
+ stream_mode = st.checkbox("Enable Streaming", value=True)
109
+
110
+ # Button to clear the chat
111
+ if st.button("Clear Chat"):
112
+ st.session_state.messages = []
113
+
114
+ st.markdown("### Usage Summary")
115
+ usage_box = st.empty()
116
+
117
+ # Disclaimer
118
+ st.markdown(
119
+ """
120
+ -----
121
+ ⚠️ **Important:**
122
+ *The responses provided by this application are generated automatically using an AI model.
123
+ Users are responsible for verifying the accuracy of the information before relying on it.
124
+ Always cross-check facts and data for critical decisions.*
125
+ """
126
  )
127
 
128
+ # Main Chat Interface
129
+ st.markdown("### Chat Interface")
 
 
130
 
131
+ # Display the chat history
 
 
 
 
132
  for message in st.session_state.messages:
133
  avatar = "πŸ”‹" if message["role"] == "assistant" else "πŸ§‘β€πŸ’»"
134
  with st.chat_message(message["role"], avatar=avatar):
135
  st.markdown(message["content"])
136
 
137
+ # Capture user input
138
+ user_input = st.chat_input("Enter your message here...")
139
 
140
+ if user_input:
141
+ # Append the user input to the session state
142
+ st.session_state.messages.append({"role": "user", "content": user_input})
143
+ with st.chat_message("user", avatar="πŸ§‘β€πŸ’»"):
144
+ st.markdown(user_input)
145
 
146
+ # Generate a response using the selected model
 
 
 
 
 
 
 
147
  try:
148
+ full_response = ""
149
+ usage_summary = ""
150
+
151
+ if stream_mode:
152
+ # Generate a response with streaming enabled
153
+ chat_completion = client.chat.completions.create(
154
+ model=model_option,
155
+ messages=[
156
+ {"role": m["role"], "content": m["content"]}
157
+ for m in st.session_state.messages
158
+ ],
159
+ max_tokens=max_tokens,
160
+ stream=True
161
+ )
162
+
163
+ with st.chat_message("assistant", avatar="πŸ”‹"):
164
+ response_placeholder = st.empty()
165
+
166
+ for chunk in chat_completion:
167
+ if chunk.choices[0].delta.content:
168
+ full_response += chunk.choices[0].delta.content
169
+ response_placeholder.markdown(full_response)
170
+ else:
171
+ # Generate a response without streaming
172
+ chat_completion = client.chat.completions.create(
173
+ model=model_option,
174
+ messages=[
175
+ {"role": m["role"], "content": m["content"]}
176
+ for m in st.session_state.messages
177
+ ],
178
+ max_tokens=max_tokens,
179
+ stream=False
180
+ )
181
+
182
+ response = chat_completion.choices[0].message.content
183
+ usage_data = chat_completion.usage
184
+
185
+ with st.chat_message("assistant", avatar="πŸ”‹"):
186
+ st.markdown(response)
187
+ full_response = response
188
+
189
+ if usage_data:
190
+ usage_summary = (
191
+ f"**Token Usage:**\n"
192
+ f"- Prompt Tokens: {usage_data.prompt_tokens}\n"
193
+ f"- Response Tokens: {usage_data.completion_tokens}\n"
194
+ f"- Total Tokens: {usage_data.total_tokens}\n\n"
195
+ f"**Timings:**\n"
196
+ f"- Prompt Time: {round(usage_data.prompt_time,5)} secs\n"
197
+ f"- Response Time: {round(usage_data.completion_time,5)} secs\n"
198
+ f"- Total Time: {round(usage_data.total_time,5)} secs"
199
+ )
200
+
201
+ if usage_summary:
202
+ usage_box.markdown(usage_summary)
203
+
204
+ # Append the assistant's response to the session state
205
  st.session_state.messages.append(
206
  {"role": "assistant", "content": full_response}
207
  )
208
+
209
+ except Exception as e:
210
+ st.error(f"Error generating the response: {e}")
 
 
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  streamlit
2
  groq
3
  python-dotenv
 
 
1
  streamlit
2
  groq
3
  python-dotenv
4
+ watchdog