Aborman commited on
Commit
9d344de
1 Parent(s): 465fe5b

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
convert-caffe2-to-onnx.exe ADDED
Binary file (108 kB). View file
 
convert-onnx-to-caffe2.exe ADDED
Binary file (108 kB). View file
 
dotenv.exe ADDED
Binary file (108 kB). View file
 
isympy.exe ADDED
Binary file (108 kB). View file
 
lmstudio_gradio.py CHANGED
@@ -1,217 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- import requests
3
  import logging
4
  import json
5
  import os
6
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- # Set up logging to help troubleshoot issues
9
- logging.basicConfig(level=logging.DEBUG)
 
 
 
 
10
 
11
- # LM Studio REST API base URL
12
- BASE_URL = "http://localhost:1234/v1"
 
 
 
 
 
 
 
 
 
13
 
14
- # Function to handle chat completions with streaming support
15
- def chat_with_lmstudio(messages):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  url = f"{BASE_URL}/chat/completions"
17
  payload = {
18
- "model": "bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-IQ2_M.gguf", # Replace with your chat model
19
  "messages": messages,
20
  "temperature": 0.7,
21
- "max_tokens": 4096,
22
- "stream": True
23
  }
24
- logging.debug(f"Sending POST request to URL: {url}")
25
- logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
26
- try:
27
- with requests.post(url, json=payload, stream=True) as response:
28
- logging.debug(f"Response Status Code: {response.status_code}")
29
- response.raise_for_status()
30
- collected_response = ""
31
- for chunk in response.iter_lines():
32
- if chunk:
33
- chunk_data = chunk.decode('utf-8').strip()
34
- if chunk_data == "[DONE]":
35
- logging.debug("Received [DONE] signal. Ending stream.")
36
- break
37
- if chunk_data.startswith("data: "):
38
- chunk_data = chunk_data[6:].strip()
39
- logging.debug(f"Received Chunk: {chunk_data}")
40
- try:
41
- response_data = json.loads(chunk_data)
42
- if "choices" in response_data and len(response_data["choices"]) > 0:
43
- content = response_data['choices'][0].get('delta', {}).get('content', "")
44
- collected_response += content
45
- yield content
46
- except json.JSONDecodeError:
47
- logging.error(f"Failed to decode JSON from chunk: {chunk_data}")
48
- if not collected_response:
49
- yield "I'm sorry, I couldn't generate a response. Could you please try again?"
50
- except requests.exceptions.RequestException as e:
51
- logging.error(f"Request to LM Studio failed: {e}")
52
- yield "An error occurred while connecting to LM Studio. Please try again later."
53
-
54
- # Function to get embeddings from LM Studio
55
- def get_embeddings(text):
56
- url = f"{BASE_URL}/embeddings"
57
- payload = {
58
- "model": "nomad_embed_text_v1_5_Q8_0", # Use the exact model name registered in LM Studio
59
- "input": text
60
- }
61
- logging.debug(f"Sending POST request to URL: {url}")
62
- logging.debug(f"Payload: {json.dumps(payload, indent=2)}")
63
- try:
64
- response = requests.post(url, json=payload)
65
- response.raise_for_status()
66
- data = response.json()
67
- embedding = data['data'][0]['embedding']
68
- logging.debug(f"Received Embedding: {embedding}")
69
- return embedding
70
- except requests.exceptions.RequestException as e:
71
- logging.error(f"Request to LM Studio for embeddings failed: {e}")
72
- return None
73
-
74
- # Function to calculate cosine similarity
75
- def cosine_similarity(vec1, vec2):
76
- if not vec1 or not vec2:
77
- return 0
78
- vec1 = np.array(vec1)
79
- vec2 = np.array(vec2)
80
- if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0:
81
- return 0
82
- return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
83
-
84
- # Gradio Blocks interface for chat with file upload and embeddings
85
  def gradio_chat_interface():
86
- with gr.Blocks() as iface:
87
- gr.Markdown("# Chat with LM Studio 🚀")
88
- gr.Markdown("A chat interface powered by LM Studio. You can send text messages or upload files (e.g., `.txt`) to include in the conversation.")
89
-
90
- chatbot = gr.Chatbot(type='messages') # Specify 'messages' type to avoid deprecated tuple format
91
- state = gr.State([]) # To store conversation history as list of dicts
92
- embeddings_state = gr.State([]) # To store embeddings
93
-
94
- with gr.Row():
95
- with gr.Column(scale=4):
96
- user_input = gr.Textbox(
97
- label="Type your message here",
98
- placeholder="Enter text and press enter",
99
- lines=1
100
- )
101
- with gr.Column(scale=1):
102
- file_input = gr.File(
103
- label="Upload a file",
104
- file_types=[".txt"], # Restrict to text files; modify as needed
105
- type="binary" # Corrected from 'file' to 'binary'
106
- )
107
 
108
- send_button = gr.Button("Send")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- # Function to handle chat interactions
111
- def chat_interface(user_message, uploaded_file, history, embeddings):
112
- # Initialize history and embeddings if None
113
- if history is None:
114
- history = []
115
- if embeddings is None:
116
- embeddings = []
117
 
118
- # Process uploaded file if present
119
- if uploaded_file is not None:
 
 
 
 
 
 
 
 
120
  try:
121
- # Read the uploaded file's content
122
- file_content = uploaded_file.read().decode('utf-8')
123
- user_message += f"\n\n[File Content]:\n{file_content}"
124
- logging.debug(f"Processed uploaded file: {uploaded_file.name}")
125
-
126
- # Generate embedding for the file content
127
- file_embedding = get_embeddings(file_content)
128
- if file_embedding:
129
- embeddings.append((file_content, file_embedding))
130
- logging.debug(f"Stored embedding for uploaded file: {uploaded_file.name}")
131
  except Exception as e:
132
- logging.error(f"Error reading uploaded file: {e}")
133
- user_message += "\n\n[Error reading the uploaded file.]"
134
-
135
- # Generate embedding for the user message
136
- user_embedding = get_embeddings(user_message)
137
- if user_embedding:
138
- embeddings.append((user_message, user_embedding))
139
- logging.debug("Stored embedding for user message.")
140
-
141
- # Retrieve relevant context based on embeddings (optional)
142
- # For demonstration, we'll retrieve top 2 similar past messages
143
- context_messages = []
144
- if embeddings:
145
- similarities = []
146
- for idx, (text, embed) in enumerate(embeddings[:-1]): # Exclude the current user message
147
- sim = cosine_similarity(user_embedding, embed)
148
- similarities.append((sim, idx))
149
- # Sort by similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  similarities.sort(reverse=True, key=lambda x: x[0])
151
- top_n = 2
152
- top_indices = [idx for (_, idx) in similarities[:top_n]]
153
- for idx in top_indices:
154
- context_messages.append(history[idx]['content']) # Append user messages as context
155
-
156
- # Append user message to history
157
- history.append({"role": "user", "content": user_message})
158
- logging.debug(f"Updated History: {history}")
159
-
160
- # Format history with additional context
161
- messages = []
162
- if context_messages:
163
- messages.append({"role": "system", "content": "You have the following context:"})
164
- for ctx in context_messages:
165
- messages.append({"role": "user", "content": ctx})
166
- messages.append({"role": "system", "content": "Use this context to assist the user."})
167
-
168
- # Append all messages from history
169
- messages.extend(history)
170
-
171
- # Get response from LM Studio
172
- response_stream = chat_with_lmstudio(messages)
173
  response = ""
 
 
 
 
 
 
 
 
 
174
 
175
- # To handle streaming, we'll initialize the assistant message and update it incrementally
176
- assistant_message = {"role": "assistant", "content": ""}
177
- history.append(assistant_message)
178
- logging.debug(f"Appended empty assistant message: {assistant_message}")
179
-
180
- for chunk in response_stream:
181
- response += chunk
182
- # Update the assistant message content
183
- assistant_message['content'] = response
184
- logging.debug(f"Updated assistant message: {assistant_message}")
185
- # Yield the updated history and embeddings
186
- yield history, embeddings
187
-
188
- # Finalize the history with the complete response
189
- assistant_message['content'] = response
190
- logging.debug(f"Final assistant message: {assistant_message}")
191
- yield history, embeddings
192
-
193
- # Connect the send button to the chat function
194
- send_button.click(
195
- fn=chat_interface,
196
- inputs=[user_input, file_input, state, embeddings_state],
197
- outputs=[chatbot, embeddings_state],
198
- queue=True # Enable queuing for handling multiple requests
199
- )
200
 
201
- # Also allow pressing Enter in the textbox to send the message
202
- user_input.submit(
203
- fn=chat_interface,
204
- inputs=[user_input, file_input, state, embeddings_state],
205
- outputs=[chatbot, embeddings_state],
206
- queue=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  )
208
 
209
- # Add debug statements to determine file pattern issues
210
- logging.debug(f"Current working directory: {os.getcwd()}")
211
- logging.debug(f"Files in current directory: {os.listdir(os.getcwd())}")
 
212
 
213
- iface.launch(share=True)
 
 
214
 
215
- # Main function to launch the chat interface
216
  if __name__ == "__main__":
217
- gradio_chat_interface()
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ High-Performance Chat Interface for LM Studio
6
+
7
+ This script creates a robust and efficient chat interface using Gradio,
8
+ facilitating seamless interactions with the LM Studio API. It leverages
9
+ GPU capabilities for accelerated processing and adheres to best practices
10
+ in modern Python programming. Comprehensive logging and error handling
11
+ ensure reliability and ease of maintenance.
12
+
13
+ Author: Your Name
14
+ Date: YYYY-MM-DD
15
+ """
16
+
17
  import gradio as gr
18
+ import httpx # Replacing 'requests' with 'httpx' for asynchronous HTTP calls
19
  import logging
20
  import json
21
  import os
22
  import numpy as np
23
+ import torch
24
+ import asyncio
25
+
26
+ # ===========================
27
+ # Configuration and Constants
28
+ # ===========================
29
+
30
+ # Set up logging for detailed diagnostics
31
+ logging.basicConfig(
32
+ level=logging.DEBUG, # Set to DEBUG for more verbose output
33
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
34
+ )
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # LM Studio REST API Base URL
38
+ BASE_URL = os.getenv("LMSTUDIO_API_BASE_URL", "http://localhost:1234/v1")
39
+
40
+ # GPU Availability and Device Configuration
41
+ USE_GPU = torch.cuda.is_available()
42
+ DEVICE = torch.device("cuda" if USE_GPU else "cpu")
43
+ logger.info(f"Using device: {DEVICE}")
44
+
45
+ # Constants for Dynamic max_tokens Calculation
46
+ MODEL_MAX_TOKENS = 32768 # Model's maximum context length
47
+ AVERAGE_CHARS_PER_TOKEN = 4 # Approximate average characters per token
48
+ BUFFER_TOKENS = 2000 # Reserved tokens for system prompts and overhead
49
+ MIN_OUTPUT_TOKENS = 1000 # Minimum tokens to ensure meaningful responses
50
+
51
+ # Maximum number of embeddings to store to optimize memory usage
52
+ MAX_EMBEDDINGS = 100
53
+
54
+ # HTTPX Timeout Configuration
55
+ HTTPX_TIMEOUT = 300 # seconds, adjust as needed for longer processing times
56
+
57
+ # ===========================
58
+ # Utility Functions
59
+ # ===========================
60
+
61
+ def calculate_max_tokens(message, model_max_tokens=MODEL_MAX_TOKENS,
62
+ buffer=BUFFER_TOKENS, avg_chars_per_token=AVERAGE_CHARS_PER_TOKEN,
63
+ min_tokens=MIN_OUTPUT_TOKENS):
64
+ """
65
+ Calculate the maximum number of tokens for the output based on the input message length.
66
 
67
+ Args:
68
+ message (str): The input message from the user.
69
+ model_max_tokens (int): The total token capacity of the model.
70
+ buffer (int): Reserved tokens for system prompts and overhead.
71
+ avg_chars_per_token (int): Approximate number of characters per token.
72
+ min_tokens (int): Minimum number of tokens to ensure a meaningful response.
73
 
74
+ Returns:
75
+ int: The calculated maximum tokens for the output.
76
+ """
77
+ input_length = len(message)
78
+ input_tokens = input_length / avg_chars_per_token
79
+ max_tokens = model_max_tokens - int(input_tokens) - buffer
80
+ calculated_max = max(max_tokens, min_tokens)
81
+ logger.debug(f"Input length (chars): {input_length}, "
82
+ f"Estimated input tokens: {input_tokens}, "
83
+ f"Max tokens for output: {calculated_max}")
84
+ return calculated_max
85
 
86
+ async def get_embeddings(text):
87
+ """
88
+ Retrieve embeddings for the given text from the LM Studio API.
89
+
90
+ Args:
91
+ text (str): The input text to generate embeddings for.
92
+
93
+ Returns:
94
+ list or None: The embedding vector as a list if successful, else None.
95
+ """
96
+ url = f"{BASE_URL}/embeddings"
97
+ payload = {"model": "nomad_embed_text_v1_5_Q8_0", "input": text}
98
+ logger.info(f"Requesting embeddings for input: {text[:100]}...")
99
+ async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT) as client:
100
+ try:
101
+ response = await client.post(
102
+ url,
103
+ json=payload, # Proper JSON serialization
104
+ headers={
105
+ "Content-Type": "application/json" # Ensuring correct Content-Type
106
+ }
107
+ )
108
+ logger.info(f"Embeddings response status code: {response.status_code}")
109
+ response.raise_for_status()
110
+ data = response.json()
111
+ logger.debug(f"Embeddings response data: {data}")
112
+ if "data" in data and len(data["data"]) > 0:
113
+ embedding = np.array(data["data"][0]["embedding"])
114
+ if USE_GPU:
115
+ embedding = torch.tensor(embedding, device=DEVICE).tolist() # Convert to list for serialization
116
+ return embedding
117
+ else:
118
+ logger.error("Invalid response structure for embeddings.")
119
+ return None
120
+ except httpx.RequestError as e:
121
+ logger.error(f"Failed to retrieve embeddings: {e}")
122
+ return None
123
+ except httpx.HTTPStatusError as e:
124
+ logger.error(f"HTTP error while retrieving embeddings: {e}")
125
+ return None
126
+ except json.JSONDecodeError as e:
127
+ logger.error(f"JSON decode error: {e}")
128
+ return None
129
+
130
+ def calculate_similarity(vec1, vec2):
131
+ """
132
+ Calculate the cosine similarity between two vectors using GPU acceleration.
133
+
134
+ Args:
135
+ vec1 (list or torch.Tensor): The first embedding vector.
136
+ vec2 (list or torch.Tensor): The second embedding vector.
137
+
138
+ Returns:
139
+ float: The cosine similarity score.
140
+ """
141
+ if vec1 is None or vec2 is None:
142
+ logger.warning("One or both vectors for similarity calculation are None.")
143
+ return 0.0
144
+ logger.debug("Calculating similarity between vectors.")
145
+ vec1_tensor = torch.tensor(vec1, device=DEVICE) if not isinstance(vec1, torch.Tensor) else vec1.to(DEVICE)
146
+ vec2_tensor = torch.tensor(vec2, device=DEVICE) if not isinstance(vec2, torch.Tensor) else vec2.to(DEVICE)
147
+ similarity = torch.nn.functional.cosine_similarity(vec1_tensor.unsqueeze(0), vec2_tensor.unsqueeze(0)).item()
148
+ logger.debug(f"Calculated similarity: {similarity}")
149
+ return similarity
150
+
151
+ # ===========================
152
+ # API Interaction Handling
153
+ # ===========================
154
+
155
+ async def chat_with_lmstudio(messages, max_tokens):
156
+ """
157
+ Handle chat completions with the LM Studio API using streaming.
158
+
159
+ Args:
160
+ messages (list): A list of message dictionaries following OpenAI's format.
161
+ max_tokens (int): The maximum number of tokens to generate in the response.
162
+
163
+ Yields:
164
+ str: Chunks of the generated response.
165
+ """
166
  url = f"{BASE_URL}/chat/completions"
167
  payload = {
168
+ "model": "Qwen2.5-Coder-32B-Instruct", # Adjusted model name if necessary
169
  "messages": messages,
170
  "temperature": 0.7,
171
+ "max_tokens": max_tokens,
172
+ "stream": True,
173
  }
174
+ logger.info(f"Sending request to chat/completions with max_tokens: {max_tokens}")
175
+ async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT) as client:
176
+ try:
177
+ async with client.stream("POST", url, json=payload, headers={"Content-Type": "application/json"}) as response:
178
+ logger.info(f"chat/completions response status code: {response.status_code}")
179
+ response.raise_for_status()
180
+ async for line in response.aiter_lines():
181
+ if line:
182
+ try:
183
+ decoded_line = line.strip()
184
+ if decoded_line.startswith("data: "):
185
+ data = json.loads(decoded_line[6:])
186
+ logger.debug(f"Received chunk: {data}")
187
+ content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
188
+ yield content
189
+ except json.JSONDecodeError as e:
190
+ logger.error(f"JSON decode error: {e}")
191
+ except httpx.RequestError as e:
192
+ logger.error(f"LM Studio chat/completions request failed: {e}")
193
+ yield "An error occurred while generating a response."
194
+ except httpx.HTTPStatusError as e:
195
+ logger.error(f"HTTP error during chat/completions: {e}")
196
+ yield "An HTTP error occurred while generating a response."
197
+
198
+ # ===========================
199
+ # User Interface Implementation
200
+ # ===========================
201
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  def gradio_chat_interface():
203
+ """
204
+ Create and launch the Gradio Blocks interface for the chat application.
205
+ """
206
+ with gr.Blocks() as interface:
207
+ gr.Markdown("# 🚀 High-Performance Chat Interface for LM Studio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
+ # Chatbot component to display the conversation
210
+ chatbot = gr.Chatbot(label="Conversation", type="messages")
211
+
212
+ # User input textbox
213
+ user_input = gr.Textbox(
214
+ label="Your Message",
215
+ placeholder="Type your message here...",
216
+ lines=2,
217
+ interactive=True
218
+ )
219
+
220
+ # File upload component for context files
221
+ file_input = gr.File(
222
+ label="Upload Context File (.txt)",
223
+ type="binary", # Correct value as per Gradio's expectations
224
+ interactive=True
225
+ )
226
+
227
+ # Display relevant context based on similarity
228
+ context_display = gr.Textbox(
229
+ label="Relevant Context",
230
+ interactive=False
231
+ )
232
+
233
+ # State to store embeddings and message history
234
+ embeddings_state = gr.State({"embeddings": [], "messages_history": []})
235
+
236
+ async def chat_handler(message, file, state):
237
+ """
238
+ Handle user input, process embeddings, retrieve context, and generate responses.
239
 
240
+ Args:
241
+ message (str): The user's input message.
242
+ file (UploadedFile): The uploaded context file.
243
+ state (dict): The current state containing embeddings and message history.
 
 
 
244
 
245
+ Yields:
246
+ list: Updated chatbot messages, new state, and context display text.
247
+ """
248
+ embeddings = state.get("embeddings", [])
249
+ messages_history = state.get("messages_history", [])
250
+
251
+ # ===========================
252
+ # File Processing
253
+ # ===========================
254
+ if file:
255
  try:
256
+ file_content = file.read().decode("utf-8")
257
+ message += f"\n[File Content]:\n{file_content}"
258
+ logger.info("Successfully processed uploaded file.")
 
 
 
 
 
 
 
259
  except Exception as e:
260
+ error_msg = f"Error reading file: {e}"
261
+ logger.error(error_msg)
262
+ yield [error_msg, state, ""]
263
+ return # Terminate the generator after yielding the error
264
+
265
+ # ===========================
266
+ # Embeddings Generation
267
+ # ===========================
268
+ user_embedding = await get_embeddings(message)
269
+ if user_embedding is not None:
270
+ embeddings.append(user_embedding)
271
+ messages_history.append({"role": "user", "content": message})
272
+ logger.info("Embeddings generated and appended to state.")
273
+ else:
274
+ error_msg = "Failed to generate embeddings."
275
+ logger.error(error_msg)
276
+ yield [error_msg, state, ""]
277
+ return # Terminate the generator after yielding the error
278
+
279
+ # Limit the number of stored embeddings to optimize memory usage
280
+ if len(embeddings) > MAX_EMBEDDINGS:
281
+ embeddings = embeddings[-MAX_EMBEDDINGS:]
282
+ messages_history = messages_history[-MAX_EMBEDDINGS:]
283
+
284
+ # ===========================
285
+ # Similarity Calculation and Context Retrieval
286
+ # ===========================
287
+ history = [{"role": "user", "content": message}]
288
+ context_text = ""
289
+ if len(embeddings) > 1:
290
+ similarities = [
291
+ (calculate_similarity(user_embedding, emb), idx)
292
+ for idx, emb in enumerate(embeddings[:-1])
293
+ ]
294
  similarities.sort(reverse=True, key=lambda x: x[0])
295
+ top_context = similarities[:3]
296
+ for similarity, idx in top_context:
297
+ context_message = messages_history[idx]
298
+ history.insert(0, {"role": "system", "content": context_message["content"]})
299
+ context_text += f"Context: {context_message['content'][:100]}...\n"
300
+ logger.info("Relevant context retrieved based on similarity.")
301
+
302
+ # ===========================
303
+ # Dynamic max_tokens Calculation
304
+ # ===========================
305
+ max_tokens = calculate_max_tokens(message)
306
+ logger.info(f"Calculated max_tokens for output: {max_tokens}")
307
+
308
+ # ===========================
309
+ # Chat with LM Studio API
310
+ # ===========================
 
 
 
 
 
 
311
  response = ""
312
+ try:
313
+ async for chunk in chat_with_lmstudio(history, max_tokens):
314
+ response += chunk
315
+ # Ensure response is a string
316
+ if not isinstance(response, str):
317
+ response = str(response)
318
+ # Handle empty response
319
+ if not response.strip():
320
+ response = "Sorry, I couldn't process your request."
321
 
322
+ # Update chatbot in real-time with partial responses
323
+ updated_chat = chatbot.value.copy()
324
+ updated_chat.append({"role": "user", "content": message})
325
+ updated_chat.append({"role": "assistant", "content": response})
326
+ logger.debug(f"Updated Chat: {updated_chat}")
327
+ yield [
328
+ updated_chat,
329
+ {"embeddings": embeddings, "messages_history": messages_history},
330
+ context_text
331
+ ]
332
+ logger.info("Response generation completed.")
333
+ except Exception as e:
334
+ error_msg = f"An error occurred while generating a response: {e}"
335
+ logger.error(error_msg)
336
+ yield [error_msg, state, ""]
337
+ return # Terminate the generator after yielding the error
338
+
339
+ # ===========================
340
+ # Final State Update
341
+ # ===========================
342
+ messages_history.append({"role": "assistant", "content": response})
343
+ new_state = {"embeddings": embeddings, "messages_history": messages_history}
344
+ updated_chat = chatbot.value.copy()
345
+ updated_chat.append({"role": "user", "content": message})
346
+ updated_chat.append({"role": "assistant", "content": response})
347
 
348
+ # Final yield
349
+ try:
350
+ logger.debug(f"Final Updated Chat: {updated_chat}")
351
+ yield [
352
+ updated_chat,
353
+ new_state,
354
+ context_text
355
+ ]
356
+ except Exception as e:
357
+ error_msg = f"Error updating chatbot: {e}"
358
+ logger.error(error_msg)
359
+ yield ["An error occurred while updating the chat.", state, ""]
360
+
361
+ # ===========================
362
+ # Send Button Configuration
363
+ # ===========================
364
+ send_button = gr.Button("Send")
365
+ send_button.click(
366
+ chat_handler,
367
+ inputs=[user_input, file_input, embeddings_state],
368
+ outputs=[chatbot, embeddings_state, context_display],
369
+ show_progress=True
370
  )
371
 
372
+ # ===========================
373
+ # Launch the Interface
374
+ # ===========================
375
+ interface.launch(share=True, server_name="0.0.0.0", server_port=7860)
376
 
377
+ # ===========================
378
+ # Main Execution
379
+ # ===========================
380
 
 
381
  if __name__ == "__main__":
382
+ asyncio.run(gradio_chat_interface())
nltk.exe ADDED
Binary file (108 kB). View file
 
torchfrtrace.exe ADDED
Binary file (108 kB). View file
 
torchrun.exe ADDED
Binary file (108 kB). View file
 
transformers-cli.exe ADDED
Binary file (108 kB). View file