sitammeur commited on
Commit
23f3a7b
·
verified ·
1 Parent(s): 2dc05b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -19
app.py CHANGED
@@ -2,38 +2,45 @@
2
  import warnings
3
  warnings.filterwarnings("ignore")
4
 
 
5
  import json
6
  import subprocess
7
  import sys
 
8
  from llama_cpp import Llama
9
  from llama_cpp_agent import LlamaCppAgent
10
  from llama_cpp_agent import MessagesFormatterType
11
  from llama_cpp_agent.providers import LlamaCppPythonProvider
12
  from llama_cpp_agent.chat_history import BasicChatHistory
13
  from llama_cpp_agent.chat_history.messages import Roles
14
- import gradio as gr
15
  from huggingface_hub import hf_hub_download
16
- from typing import List, Tuple
17
  from logger import logging
18
  from exception import CustomExceptionHandling
19
 
20
 
21
  # Download gguf model files
 
 
 
22
  hf_hub_download(
23
- repo_id="bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF",
24
- filename="Qwen2.5-Coder-1.5B-Instruct-Q6_K.gguf",
25
  local_dir="./models",
26
  )
27
  hf_hub_download(
28
- repo_id="bartowski/Qwen2.5-Coder-0.5B-Instruct-GGUF",
29
- filename="Qwen2.5-Coder-0.5B-Instruct-Q6_K.gguf",
30
  local_dir="./models",
31
  )
32
 
33
 
34
  # Set the title and description
35
  title = "Qwen-Coder Llama.cpp"
36
- description = """Qwen2.5-Coder, a six-model family of LLMs, boasts enhanced code generation, reasoning, and debugging. Trained on 5.5 trillion tokens, its 32B parameter model rivals GPT-4o, offering versatile capabilities for coding and broader applications."""
 
 
 
37
 
38
 
39
  llm = None
@@ -42,13 +49,13 @@ llm_model = None
42
  def respond(
43
  message: str,
44
  history: List[Tuple[str, str]],
45
- model: str,
46
- system_message: str,
47
- max_tokens: int,
48
- temperature: float,
49
- top_p: float,
50
- top_k: int,
51
- repeat_penalty: float,
52
  ):
53
  """
54
  Respond to a message using the Qwen2.5-Coder model via Llama.cpp.
@@ -72,8 +79,18 @@ def respond(
72
  global llm
73
  global llm_model
74
 
 
 
 
 
75
  # Load the model
76
  if llm is None or llm_model != model:
 
 
 
 
 
 
77
  llm = Llama(
78
  model_path=f"models/{model}",
79
  flash_attn=False,
@@ -146,10 +163,10 @@ demo = gr.ChatInterface(
146
  additional_inputs=[
147
  gr.Dropdown(
148
  choices=[
149
- "Qwen2.5-Coder-1.5B-Instruct-Q6_K.gguf",
150
- "Qwen2.5-Coder-0.5B-Instruct-Q6_K.gguf",
151
  ],
152
- value="Qwen2.5-Coder-0.5B-Instruct-Q6_K.gguf",
153
  label="Model",
154
  info="Select the AI model to use for chat",
155
  ),
@@ -205,11 +222,18 @@ demo = gr.ChatInterface(
205
  stop_btn="Stop",
206
  title=title,
207
  description=description,
208
- chatbot=gr.Chatbot(scale=1, show_copy_button=True),
209
  flagging_mode="never",
 
 
210
  )
211
 
212
 
213
  # Launch the chat interface
214
  if __name__ == "__main__":
215
- demo.launch(debug=False)
 
 
 
 
 
 
2
  import warnings
3
  warnings.filterwarnings("ignore")
4
 
5
+ import os
6
  import json
7
  import subprocess
8
  import sys
9
+ from typing import List, Tuple
10
  from llama_cpp import Llama
11
  from llama_cpp_agent import LlamaCppAgent
12
  from llama_cpp_agent import MessagesFormatterType
13
  from llama_cpp_agent.providers import LlamaCppPythonProvider
14
  from llama_cpp_agent.chat_history import BasicChatHistory
15
  from llama_cpp_agent.chat_history.messages import Roles
 
16
  from huggingface_hub import hf_hub_download
17
+ import gradio as gr
18
  from logger import logging
19
  from exception import CustomExceptionHandling
20
 
21
 
22
  # Download gguf model files
23
+ if not os.path.exists("./models"):
24
+ os.makedirs("./models")
25
+
26
  hf_hub_download(
27
+ repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
28
+ filename="qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
29
  local_dir="./models",
30
  )
31
  hf_hub_download(
32
+ repo_id="Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF",
33
+ filename="qwen2.5-coder-0.5b-instruct-q6_k.gguf",
34
  local_dir="./models",
35
  )
36
 
37
 
38
  # Set the title and description
39
  title = "Qwen-Coder Llama.cpp"
40
+ description = """**[Qwen2.5-Coder](https://huggingface.co/collections/Qwen/qwen25-coder-66eaa22e6f99801bf65b0c2f)**, a six-model family of LLMs, boasts enhanced code generation, reasoning, and debugging. Trained on 5.5 trillion tokens, its 32B parameter model rivals GPT-4o, offering versatile capabilities for coding and broader applications.
41
+ This interactive chat interface allows you to experiment with the [`Qwen2.5-Coder-0.5B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct) and [`Qwen2.5-Coder-1.5B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct) coding models using various prompts and generation parameters.
42
+ Users can select different model variants (GGUF format), system prompts, and observe generated responses in real-time.
43
+ Key generation parameters, such as ⁣`temperature`, `max_tokens`, `top_k` and others are exposed below for tuning model behavior."""
44
 
45
 
46
  llm = None
 
49
  def respond(
50
  message: str,
51
  history: List[Tuple[str, str]],
52
+ model: str = "qwen2.5-coder-0.5b-instruct-q6_k.gguf", # Set default model
53
+ system_message: str = "You are a helpful assistant.",
54
+ max_tokens: int = 1024,
55
+ temperature: float = 0.7,
56
+ top_p: float = 0.95,
57
+ top_k: int = 40,
58
+ repeat_penalty: float = 1.1,
59
  ):
60
  """
61
  Respond to a message using the Qwen2.5-Coder model via Llama.cpp.
 
79
  global llm
80
  global llm_model
81
 
82
+ # Ensure model is not None
83
+ if model is None:
84
+ model = "qwen2.5-coder-0.5b-instruct-q6_k.gguf"
85
+
86
  # Load the model
87
  if llm is None or llm_model != model:
88
+ # Check if model file exists
89
+ model_path = f"models/{model}"
90
+ if not os.path.exists(model_path):
91
+ yield f"Error: Model file not found at {model_path}. Please check your model path."
92
+ return
93
+
94
  llm = Llama(
95
  model_path=f"models/{model}",
96
  flash_attn=False,
 
163
  additional_inputs=[
164
  gr.Dropdown(
165
  choices=[
166
+ "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
167
+ "qwen2.5-coder-0.5b-instruct-q6_k.gguf",
168
  ],
169
+ value="qwen2.5-coder-0.5b-instruct-q6_k.gguf",
170
  label="Model",
171
  info="Select the AI model to use for chat",
172
  ),
 
222
  stop_btn="Stop",
223
  title=title,
224
  description=description,
225
+ chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
226
  flagging_mode="never",
227
+ editable=True,
228
+ cache_examples=False,
229
  )
230
 
231
 
232
  # Launch the chat interface
233
  if __name__ == "__main__":
234
+ demo.launch(
235
+ share=False,
236
+ server_name="0.0.0.0",
237
+ server_port=7860,
238
+ show_api=False,
239
+ )