gokaygokay commited on
Commit
7c0f531
1 Parent(s): 12ec307

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -0
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #import spaces
2
+ import json
3
+ import subprocess
4
+ import os
5
+ import sys
6
+
7
+ def run_command(command):
8
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
9
+ output, error = process.communicate()
10
+ if process.returncode != 0:
11
+ print(f"Error executing command: {command}")
12
+ print(error.decode('utf-8'))
13
+ exit(1)
14
+ return output.decode('utf-8')
15
+
16
+ def install_packages():
17
+ # Clone the repository with submodules
18
+ run_command("git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python.git")
19
+
20
+ # Change to the cloned directory
21
+ os.chdir("llama-cpp-python")
22
+
23
+ # Checkout the specific commit in the llama.cpp submodule
24
+ os.chdir("vendor/llama.cpp")
25
+ run_command("git checkout 50e0535")
26
+ os.chdir("../..")
27
+
28
+ # Upgrade pip
29
+ run_command("pip install --upgrade pip")
30
+
31
+ # Install all optional dependencies
32
+ run_command("pip install -e .[all]")
33
+
34
+ # Clear the local build cache
35
+ run_command("make clean")
36
+
37
+ # Reinstall the package
38
+ run_command("pip install -e .")
39
+
40
+ # Install llama-cpp-agent
41
+ run_command("pip install llama-cpp-agent")
42
+
43
+ print("Installation complete!")
44
+
45
+ try:
46
+ install_packages()
47
+
48
+ # If installation is successful, import the libraries
49
+ from llama_cpp import Llama
50
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
51
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
52
+ from llama_cpp_agent.chat_history import BasicChatHistory
53
+ from llama_cpp_agent.chat_history.messages import Roles
54
+
55
+ print("Libraries imported successfully!")
56
+ except Exception as e:
57
+ print(f"Installation failed or libraries couldn't be imported: {str(e)}")
58
+ sys.exit(1)
59
+
60
+ import gradio as gr
61
+ from huggingface_hub import hf_hub_download
62
+
63
+ hf_hub_download(
64
+ repo_id="MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF",
65
+ filename="Mistral-Nemo-Instruct-2407.Q5_K_M.gguf",
66
+ local_dir="./models"
67
+ )
68
+
69
+ llm = None
70
+ llm_model = None
71
+
72
+ #@spaces.GPU(duration=120)
73
+ def respond(
74
+ message,
75
+ history: list[tuple[str, str]],
76
+ model,
77
+ system_message,
78
+ max_tokens,
79
+ temperature,
80
+ top_p,
81
+ top_k,
82
+ repeat_penalty,
83
+ ):
84
+
85
+
86
+ chat_template = MessagesFormatterType.MISTRAL
87
+
88
+ global llm
89
+ global llm_model
90
+
91
+ if llm is None or llm_model != model:
92
+ llm = Llama(
93
+ model_path=f"models/{model}",
94
+ flash_attn=True,
95
+ n_gpu_layers=81,
96
+ n_batch=1024,
97
+ n_ctx=32768,
98
+ )
99
+ llm_model = model
100
+
101
+ provider = LlamaCppPythonProvider(llm)
102
+
103
+ agent = LlamaCppAgent(
104
+ provider,
105
+ system_prompt=f"{system_message}",
106
+ predefined_messages_formatter_type=chat_template,
107
+ debug_output=True
108
+ )
109
+
110
+ settings = provider.get_provider_default_settings()
111
+ settings.temperature = temperature
112
+ settings.top_k = top_k
113
+ settings.top_p = top_p
114
+ settings.max_tokens = max_tokens
115
+ settings.repeat_penalty = repeat_penalty
116
+ settings.stream = True
117
+
118
+ messages = BasicChatHistory()
119
+
120
+ for msn in history:
121
+ user = {
122
+ 'role': Roles.user,
123
+ 'content': msn[0]
124
+ }
125
+ assistant = {
126
+ 'role': Roles.assistant,
127
+ 'content': msn[1]
128
+ }
129
+ messages.add_message(user)
130
+ messages.add_message(assistant)
131
+
132
+ stream = agent.get_chat_response(
133
+ message,
134
+ llm_sampling_settings=settings,
135
+ chat_history=messages,
136
+ returns_streaming_generator=True,
137
+ print_output=False
138
+ )
139
+
140
+ outputs = ""
141
+ for output in stream:
142
+ outputs += output
143
+ yield outputs
144
+
145
+ description = """<p><center>
146
+ <a href="https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407" target="_blank">[Instruct Model]</a>
147
+ <a href="https://huggingface.co/mistralai/Mistral-Nemo-Base-2407" target="_blank">[Base Model]</a>
148
+ <a href="https://huggingface.co/second-state/Mistral-Nemo-Instruct-2407-GGUF" target="_blank">[GGUF Version]</a>
149
+ </center></p>
150
+ """
151
+
152
+ demo = gr.ChatInterface(
153
+ respond,
154
+ additional_inputs=[
155
+ gr.Dropdown([
156
+ 'Mistral-Nemo-Instruct-2407.Q5_K_M.gguf'
157
+ ],
158
+ value="Mistral-Nemo-Instruct-2407.Q5_K_M.gguf",
159
+ label="Model"
160
+ ),
161
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
162
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
163
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
164
+ gr.Slider(
165
+ minimum=0.1,
166
+ maximum=1.0,
167
+ value=0.95,
168
+ step=0.05,
169
+ label="Top-p",
170
+ ),
171
+ gr.Slider(
172
+ minimum=0,
173
+ maximum=100,
174
+ value=40,
175
+ step=1,
176
+ label="Top-k",
177
+ ),
178
+ gr.Slider(
179
+ minimum=0.0,
180
+ maximum=2.0,
181
+ value=1.1,
182
+ step=0.1,
183
+ label="Repetition penalty",
184
+ ),
185
+ ],
186
+ retry_btn="Retry",
187
+ undo_btn="Undo",
188
+ clear_btn="Clear",
189
+ submit_btn="Send",
190
+ title="Chat with Mistral-NeMo using llama.cpp",
191
+ description=description,
192
+ chatbot=gr.Chatbot(
193
+ scale=1,
194
+ likeable=False,
195
+ show_copy_button=True
196
+ )
197
+ )
198
+
199
+ if __name__ == "__main__":
200
+ demo.launch(debug=True)