Spaces:

sitammeur
/

Qwen-Coder-llamacpp

Running

App Files Files Community

sitammeur commited on 12 days ago

Commit

23f3a7b

verified ·

1 Parent(s): 2dc05b0

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -19

app.py CHANGED Viewed

@@ -2,38 +2,45 @@
 import warnings
 warnings.filterwarnings("ignore")
 import json
 import subprocess
 import sys
 from llama_cpp import Llama
 from llama_cpp_agent import LlamaCppAgent
 from llama_cpp_agent import MessagesFormatterType
 from llama_cpp_agent.providers import LlamaCppPythonProvider
 from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
-import gradio as gr
 from huggingface_hub import hf_hub_download
-from typing import List, Tuple
 from logger import logging
 from exception import CustomExceptionHandling
 # Download gguf model files
 hf_hub_download(
-    repo_id="bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF",
-    filename="Qwen2.5-Coder-1.5B-Instruct-Q6_K.gguf",
     local_dir="./models",
 )
 hf_hub_download(
-    repo_id="bartowski/Qwen2.5-Coder-0.5B-Instruct-GGUF",
-    filename="Qwen2.5-Coder-0.5B-Instruct-Q6_K.gguf",
     local_dir="./models",
 )
 # Set the title and description
 title = "Qwen-Coder Llama.cpp"
-description = """Qwen2.5-Coder, a six-model family of LLMs, boasts enhanced code generation, reasoning, and debugging. Trained on 5.5 trillion tokens, its 32B parameter model rivals GPT-4o, offering versatile capabilities for coding and broader applications."""
 llm = None
@@ -42,13 +49,13 @@ llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
-    model: str,
-    system_message: str,
-    max_tokens: int,
-    temperature: float,
-    top_p: float,
-    top_k: int,
-    repeat_penalty: float,
 ):
     """
     Respond to a message using the Qwen2.5-Coder model via Llama.cpp.
@@ -72,8 +79,18 @@ def respond(
         global llm
         global llm_model
         # Load the model
         if llm is None or llm_model != model:
             llm = Llama(
                 model_path=f"models/{model}",
                 flash_attn=False,
@@ -146,10 +163,10 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Dropdown(
             choices=[
-                "Qwen2.5-Coder-1.5B-Instruct-Q6_K.gguf",
-                "Qwen2.5-Coder-0.5B-Instruct-Q6_K.gguf",
             ],
-            value="Qwen2.5-Coder-0.5B-Instruct-Q6_K.gguf",
             label="Model",
             info="Select the AI model to use for chat",
         ),
@@ -205,11 +222,18 @@ demo = gr.ChatInterface(
     stop_btn="Stop",
     title=title,
     description=description,
-    chatbot=gr.Chatbot(scale=1, show_copy_button=True),
     flagging_mode="never",
 )
 # Launch the chat interface
 if __name__ == "__main__":
-    demo.launch(debug=False)

 import warnings
 warnings.filterwarnings("ignore")
+import os
 import json
 import subprocess
 import sys
+from typing import List, Tuple
 from llama_cpp import Llama
 from llama_cpp_agent import LlamaCppAgent
 from llama_cpp_agent import MessagesFormatterType
 from llama_cpp_agent.providers import LlamaCppPythonProvider
 from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
 from huggingface_hub import hf_hub_download
+import gradio as gr
 from logger import logging
 from exception import CustomExceptionHandling
 # Download gguf model files
+if not os.path.exists("./models"):
+    os.makedirs("./models")
 hf_hub_download(
+    repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
+    filename="qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
     local_dir="./models",
 )
 hf_hub_download(
+    repo_id="Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF",
+    filename="qwen2.5-coder-0.5b-instruct-q6_k.gguf",
     local_dir="./models",
 )
 # Set the title and description
 title = "Qwen-Coder Llama.cpp"
+description = """**[Qwen2.5-Coder](https://huggingface.co/collections/Qwen/qwen25-coder-66eaa22e6f99801bf65b0c2f)**, a six-model family of LLMs, boasts enhanced code generation, reasoning, and debugging. Trained on 5.5 trillion tokens, its 32B parameter model rivals GPT-4o, offering versatile capabilities for coding and broader applications.
+This interactive chat interface allows you to experiment with the [`Qwen2.5-Coder-0.5B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct) and [`Qwen2.5-Coder-1.5B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct) coding models using various prompts and generation parameters.
+Users can select different model variants (GGUF format), system prompts, and observe generated responses in real-time.
+Key generation parameters, such as ⁣`temperature`, `max_tokens`, `top_k` and others are exposed below for tuning model behavior."""
 llm = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
+    model: str = "qwen2.5-coder-0.5b-instruct-q6_k.gguf",  # Set default model
+    system_message: str = "You are a helpful assistant.",
+    max_tokens: int = 1024,
+    temperature: float = 0.7,
+    top_p: float = 0.95,
+    top_k: int = 40,
+    repeat_penalty: float = 1.1,
 ):
     """
     Respond to a message using the Qwen2.5-Coder model via Llama.cpp.
         global llm
         global llm_model
+        # Ensure model is not None
+        if model is None:
+            model = "qwen2.5-coder-0.5b-instruct-q6_k.gguf"
         # Load the model
         if llm is None or llm_model != model:
+            # Check if model file exists
+            model_path = f"models/{model}"
+            if not os.path.exists(model_path):
+                yield f"Error: Model file not found at {model_path}. Please check your model path."
+                return
             llm = Llama(
                 model_path=f"models/{model}",
                 flash_attn=False,
     additional_inputs=[
         gr.Dropdown(
             choices=[
+                "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf",
+                "qwen2.5-coder-0.5b-instruct-q6_k.gguf",
             ],
+            value="qwen2.5-coder-0.5b-instruct-q6_k.gguf",
             label="Model",
             info="Select the AI model to use for chat",
         ),
     stop_btn="Stop",
     title=title,
     description=description,
+    chatbot=gr.Chatbot(scale=1, show_copy_button=True, resizable=True),
     flagging_mode="never",
+    editable=True,
+    cache_examples=False,
 )
 # Launch the chat interface
 if __name__ == "__main__":
+    demo.launch(
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_api=False,
+    )