sitammeur commited on
Commit
8f0339b
·
verified ·
1 Parent(s): 4a1589c

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +212 -0
  2. exception.py +50 -0
  3. logger.py +21 -0
  4. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing required libraries
2
+ import warnings
3
+ warnings.filterwarnings("ignore")
4
+
5
+ import json
6
+ import subprocess
7
+ import sys
8
+ from llama_cpp import Llama
9
+ from llama_cpp_agent import LlamaCppAgent
10
+ from llama_cpp_agent import MessagesFormatterType
11
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
12
+ from llama_cpp_agent.chat_history import BasicChatHistory
13
+ from llama_cpp_agent.chat_history.messages import Roles
14
+ import gradio as gr
15
+ from huggingface_hub import hf_hub_download
16
+ from typing import List, Tuple
17
+ from logger import logging
18
+ from exception import CustomExceptionHandling
19
+
20
+
21
+ # Download gguf model files
22
+ llm = None
23
+ llm_model = None
24
+
25
+ hf_hub_download(
26
+ repo_id="bartowski/SmolLM2-135M-Instruct-GGUF",
27
+ filename="SmolLM2-135M-Instruct-Q6_K.gguf",
28
+ local_dir="./models",
29
+ )
30
+ hf_hub_download(
31
+ repo_id="bartowski/SmolLM2-360M-Instruct-GGUF",
32
+ filename="SmolLM2-360M-Instruct-Q6_K.gguf",
33
+ local_dir="./models",
34
+ )
35
+
36
+ # Set the title and description
37
+ title = "SmolLM🤗 Llama.cpp"
38
+ description = """SmolLM2, a family of three small language models, performs well in instruction following and reasoning. The largest model significantly improves over its predecessor through advanced training techniques."""
39
+
40
+
41
+ def respond(
42
+ message: str,
43
+ history: List[Tuple[str, str]],
44
+ model: str,
45
+ system_message: str,
46
+ max_tokens: int,
47
+ temperature: float,
48
+ top_p: float,
49
+ top_k: int,
50
+ repeat_penalty: float,
51
+ ):
52
+ """
53
+ Respond to a message using the SmolLM2 model via Llama.cpp.
54
+
55
+ Args:
56
+ - message (str): The message to respond to.
57
+ - history (List[Tuple[str, str]]): The chat history.
58
+ - model (str): The model to use.
59
+ - system_message (str): The system message to use.
60
+ - max_tokens (int): The maximum number of tokens to generate.
61
+ - temperature (float): The temperature of the model.
62
+ - top_p (float): The top-p of the model.
63
+ - top_k (int): The top-k of the model.
64
+ - repeat_penalty (float): The repetition penalty of the model.
65
+
66
+ Returns:
67
+ str: The response to the message.
68
+ """
69
+ try:
70
+ # Load the global variables
71
+ global llm
72
+ global llm_model
73
+
74
+ # Load the model
75
+ if llm is None or llm_model != model:
76
+ llm = Llama(
77
+ model_path=f"models/{model}",
78
+ flash_attn=False,
79
+ n_gpu_layers=0,
80
+ n_batch=32,
81
+ n_ctx=8192,
82
+ )
83
+ llm_model = model
84
+ provider = LlamaCppPythonProvider(llm)
85
+
86
+ # Create the agent
87
+ agent = LlamaCppAgent(
88
+ provider,
89
+ system_prompt=f"{system_message}",
90
+ predefined_messages_formatter_type=MessagesFormatterType.CHATML,
91
+ debug_output=True,
92
+ )
93
+
94
+ # Set the settings like temperature, top-k, top-p, max tokens, etc.
95
+ settings = provider.get_provider_default_settings()
96
+ settings.temperature = temperature
97
+ settings.top_k = top_k
98
+ settings.top_p = top_p
99
+ settings.max_tokens = max_tokens
100
+ settings.repeat_penalty = repeat_penalty
101
+ settings.stream = True
102
+
103
+ messages = BasicChatHistory()
104
+
105
+ # Add the chat history
106
+ for msn in history:
107
+ user = {"role": Roles.user, "content": msn[0]}
108
+ assistant = {"role": Roles.assistant, "content": msn[1]}
109
+ messages.add_message(user)
110
+ messages.add_message(assistant)
111
+
112
+ # Get the response stream
113
+ stream = agent.get_chat_response(
114
+ message,
115
+ llm_sampling_settings=settings,
116
+ chat_history=messages,
117
+ returns_streaming_generator=True,
118
+ print_output=False,
119
+ )
120
+
121
+ # Log the success
122
+ logging.info("Response stream generated successfully")
123
+
124
+ # Generate the response
125
+ outputs = ""
126
+ for output in stream:
127
+ outputs += output
128
+ yield outputs
129
+
130
+ # Handle exceptions that may occur during the process
131
+ except Exception as e:
132
+ # Custom exception handling
133
+ raise CustomExceptionHandling(e, sys) from e
134
+
135
+
136
+ # Create a chat interface
137
+ demo = gr.ChatInterface(
138
+ respond,
139
+ examples=[["What is the capital of France?"], ["Why is the color of the sky blue?"], ["What is gravity?"]],
140
+ additional_inputs_accordion=gr.Accordion(
141
+ label="⚙️ Parameters", open=False, render=False
142
+ ),
143
+ additional_inputs=[
144
+ gr.Dropdown(
145
+ choices=[
146
+ "SmolLM2-135M-Instruct-Q6_K.gguf",
147
+ "SmolLM2-360M-Instruct-Q6_K.gguf",
148
+ ],
149
+ value="SmolLM2-135M-Instruct-Q6_K.gguf",
150
+ label="Model",
151
+ info="Select the AI model to use for chat",
152
+ ),
153
+ gr.Textbox(
154
+ value="You are a helpful AI assistant focused on accurate and ethical responses.",
155
+ label="System Prompt",
156
+ info="Define the AI assistant's personality and behavior",
157
+ lines=2,
158
+ ),
159
+ gr.Slider(
160
+ minimum=512,
161
+ maximum=4096,
162
+ value=2048,
163
+ step=512,
164
+ label="Max Tokens",
165
+ info="Maximum length of response (higher = longer replies)",
166
+ ),
167
+ gr.Slider(
168
+ minimum=0.1,
169
+ maximum=2.0,
170
+ value=0.7,
171
+ step=0.1,
172
+ label="Temperature",
173
+ info="Creativity level (higher = more creative, lower = more focused)",
174
+ ),
175
+ gr.Slider(
176
+ minimum=0.1,
177
+ maximum=1.0,
178
+ value=0.95,
179
+ step=0.05,
180
+ label="Top-p",
181
+ info="Nucleus sampling threshold",
182
+ ),
183
+ gr.Slider(
184
+ minimum=1,
185
+ maximum=100,
186
+ value=40,
187
+ step=1,
188
+ label="Top-k",
189
+ info="Limit vocabulary choices to top K tokens",
190
+ ),
191
+ gr.Slider(
192
+ minimum=1.0,
193
+ maximum=2.0,
194
+ value=1.1,
195
+ step=0.1,
196
+ label="Repetition Penalty",
197
+ info="Penalize repeated words (higher = less repetition)",
198
+ ),
199
+ ],
200
+ theme="Ocean",
201
+ submit_btn="Send",
202
+ stop_btn="Stop",
203
+ title=title,
204
+ description=description,
205
+ chatbot=gr.Chatbot(scale=1, show_copy_button=True),
206
+ flagging_mode="never",
207
+ )
208
+
209
+
210
+ # Launch the chat interface
211
+ if __name__ == "__main__":
212
+ demo.launch(debug=False)
exception.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module defines a custom exception handling class and a function to get error message with details of the error.
3
+ """
4
+
5
+ # Standard Library
6
+ import sys
7
+
8
+ # Local imports
9
+ from logger import logging
10
+
11
+
12
+ # Function Definition to get error message with details of the error (file name and line number) when an error occurs in the program
13
+ def get_error_message(error, error_detail: sys):
14
+ """
15
+ Get error message with details of the error.
16
+
17
+ Args:
18
+ - error (Exception): The error that occurred.
19
+ - error_detail (sys): The details of the error.
20
+
21
+ Returns:
22
+ str: A string containing the error message along with the file name and line number where the error occurred.
23
+ """
24
+ _, _, exc_tb = error_detail.exc_info()
25
+
26
+ # Get error details
27
+ file_name = exc_tb.tb_frame.f_code.co_filename
28
+ return "Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
29
+ file_name, exc_tb.tb_lineno, str(error)
30
+ )
31
+
32
+
33
+ # Custom Exception Handling Class Definition
34
+ class CustomExceptionHandling(Exception):
35
+ """
36
+ Custom Exception Handling:
37
+ This class defines a custom exception that can be raised when an error occurs in the program.
38
+ It takes an error message and an error detail as input and returns a formatted error message when the exception is raised.
39
+ """
40
+
41
+ # Constructor
42
+ def __init__(self, error_message, error_detail: sys):
43
+ """Initialize the exception"""
44
+ super().__init__(error_message)
45
+
46
+ self.error_message = get_error_message(error_message, error_detail=error_detail)
47
+
48
+ def __str__(self):
49
+ """String representation of the exception"""
50
+ return self.error_message
logger.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing the required modules
2
+ import os
3
+ import logging
4
+ from datetime import datetime
5
+
6
+ # Creating a log file with the current date and time as the name of the file
7
+ LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
8
+
9
+ # Creating a logs folder if it does not exist
10
+ logs_path = os.path.join(os.getcwd(), "logs", LOG_FILE)
11
+ os.makedirs(logs_path, exist_ok=True)
12
+
13
+ # Setting the log file path and the log level
14
+ LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
15
+
16
+ # Configuring the logger
17
+ logging.basicConfig(
18
+ filename=LOG_FILE_PATH,
19
+ format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
20
+ level=logging.INFO,
21
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ huggingface_hub==0.22.2
2
+ scikit-build-core
3
+ llama-cpp-python
4
+ llama-cpp-agent>=0.2.10
5
+ gradio