pabl-o-ce commited on
Commit
d9528c3
1 Parent(s): b8bc514
Files changed (3) hide show
  1. README.md +6 -4
  2. app.py +181 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,13 +1,15 @@
1
  ---
2
  title: Chat
3
- emoji: 🐠
4
- colorFrom: gray
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.31.5
8
  app_file: app.py
9
- pinned: false
10
  license: apache-2.0
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Chat
3
+ emoji: 🗿
4
+ colorFrom: red
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.31.5
8
  app_file: app.py
9
+ pinned: true
10
  license: apache-2.0
11
+ header: mini
12
+ fullWidth: true
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import json
3
+ import subprocess
4
+ import gradio as gr
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
8
+ subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
9
+
10
+ hf_hub_download(
11
+ repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
12
+ filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
13
+ local_dir = "./models"
14
+ )
15
+ hf_hub_download(
16
+ repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF",
17
+ filename="Llama-3-8B-Synthia-v3.5-f16.gguf",
18
+ local_dir = "./models"
19
+ )
20
+ hf_hub_download(
21
+ repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
22
+ filename="Mistral-7B-Instruct-v0.3-f32.gguf",
23
+ local_dir = "./models"
24
+ )
25
+
26
+ css = """
27
+ .message-row {
28
+ justify-content: space-evenly !important;
29
+ }
30
+ .message-bubble-border {
31
+ border-radius: 6px !important;
32
+ }
33
+ .dark.message-bubble-border {
34
+ border-color: #343140 !important;
35
+ }
36
+ .dark.user {
37
+ background: #1e1c26 !important;
38
+ }
39
+ .dark.assistant.dark, .dark.pending.dark {
40
+ background: #16141c !important;
41
+ }
42
+ """
43
+
44
+ def get_messages_formatter_type(model_name):
45
+ from llama_cpp_agent import MessagesFormatterType
46
+ if "Llama" in model_name:
47
+ return MessagesFormatterType.LLAMA_3
48
+ elif "Mistral" in model_name:
49
+ return MessagesFormatterType.MISTRAL
50
+ else:
51
+ raise ValueError(f"Unsupported model: {model_name}")
52
+
53
+ @spaces.GPU(duration=120)
54
+ def respond(
55
+ message,
56
+ history: list[tuple[str, str]],
57
+ system_message,
58
+ max_tokens,
59
+ temperature,
60
+ top_p,
61
+ top_k,
62
+ repeat_penalty,
63
+ model,
64
+ ):
65
+ from llama_cpp import Llama
66
+ from llama_cpp_agent import LlamaCppAgent
67
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
68
+ from llama_cpp_agent.chat_history import BasicChatHistory
69
+ from llama_cpp_agent.chat_history.messages import Roles
70
+
71
+ chat_template = get_messages_formatter_type(model)
72
+
73
+ llm = Llama(
74
+ model_path=f"models/{model}",
75
+ flash_attn=True,
76
+ n_threads=40,
77
+ n_gpu_layers=81,
78
+ n_batch=1024,
79
+ n_ctx=8192,
80
+ )
81
+ provider = LlamaCppPythonProvider(llm)
82
+
83
+ agent = LlamaCppAgent(
84
+ provider,
85
+ system_prompt=f"{system_message}",
86
+ predefined_messages_formatter_type=chat_template,
87
+ debug_output=True
88
+ )
89
+
90
+ settings = provider.get_provider_default_settings()
91
+ settings.temperature = temperature
92
+ settings.top_k = top_k
93
+ settings.top_p = top_p
94
+ settings.max_tokens = max_tokens
95
+ settings.repeat_penalty = repeat_penalty
96
+ settings.stream = True
97
+
98
+ messages = BasicChatHistory()
99
+
100
+ for msn in history:
101
+ user = {
102
+ 'role': Roles.user,
103
+ 'content': msn[0]
104
+ }
105
+ assistant = {
106
+ 'role': Roles.assistant,
107
+ 'content': msn[1]
108
+ }
109
+ messages.add_message(user)
110
+ messages.add_message(assistant)
111
+
112
+ stream = agent.get_chat_response(
113
+ message,
114
+ llm_sampling_settings=settings,
115
+ chat_history=messages,
116
+ returns_streaming_generator=True,
117
+ print_output=False
118
+ )
119
+
120
+ outputs = ""
121
+ for output in stream:
122
+ outputs += output
123
+ yield outputs
124
+
125
+ demo = gr.ChatInterface(
126
+ respond,
127
+ additional_inputs=[
128
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
129
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
130
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
131
+ gr.Slider(
132
+ minimum=0.1,
133
+ maximum=1.0,
134
+ value=0.95,
135
+ step=0.05,
136
+ label="Top-p",
137
+ ),
138
+ gr.Slider(
139
+ minimum=0,
140
+ maximum=100,
141
+ value=40,
142
+ step=1,
143
+ label="Top-k",
144
+ ),
145
+ gr.Slider(
146
+ minimum=0.0,
147
+ maximum=2.0,
148
+ value=1.1,
149
+ step=0.1,
150
+ label="Repetition penalty",
151
+ ),
152
+ gr.Dropdown([
153
+ 'Meta-Llama-3-70B-Instruct-Q3_K_M.gguf',
154
+ 'Llama-3-8B-Synthia-v3.5-f16.gguf',
155
+ 'Mistral-7B-Instruct-v0.3-f32.gguf'
156
+ ],
157
+ value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
158
+ label="Model"
159
+ ),
160
+ ],
161
+ theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
162
+ body_background_fill_dark="#16141c",
163
+ block_background_fill_dark="#16141c",
164
+ block_border_width="1px",
165
+ block_title_background_fill_dark="#1e1c26",
166
+ input_background_fill_dark="#292733",
167
+ button_secondary_background_fill_dark="#24212b",
168
+ border_color_primary_dark="#343140",
169
+ background_fill_secondary_dark="#16141c",
170
+ color_accent_soft_dark="transparent"
171
+ ),
172
+ css=css,
173
+ retry_btn="Retry",
174
+ undo_btn="Undo",
175
+ clear_btn="Clear",
176
+ submit_btn="Send",
177
+ description="Llama-cpp-agent: Chat multi llm selection"
178
+ )
179
+
180
+ if __name__ == "__main__":
181
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ huggingface_hub==0.22.2
2
+ scikit-build-core