Abhaykoul commited on
Commit
01f3c1d
·
verified ·
1 Parent(s): 8690be4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import subprocess
3
+ from llama_cpp import Llama
4
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
5
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
6
+ from llama_cpp_agent.chat_history import BasicChatHistory
7
+ from llama_cpp_agent.chat_history.messages import Roles
8
+ import gradio as gr
9
+ from huggingface_hub import hf_hub_download
10
+
11
+ # Download models
12
+ hf_hub_download(
13
+ repo_id="OEvortex/HelpingAI-3B-chat",
14
+ filename="helpingai-3b-chat-iq4_xs-imat.gguf",
15
+ local_dir="./models"
16
+ )
17
+ hf_hub_download(
18
+ repo_id="OEvortex/HelpingAI-3B-chat",
19
+ filename="helpingai-3b-chat-q4_k_m.gguf",
20
+ local_dir="./models"
21
+ )
22
+ llm = None
23
+ llm_model = None
24
+
25
+ def respond(
26
+ message,
27
+ history: list[tuple[str, str]],
28
+ model,
29
+ system_message,
30
+ max_tokens,
31
+ temperature,
32
+ top_p,
33
+ top_k,
34
+ repeat_penalty,
35
+ ):
36
+ chat_template = MessagesFormatterType.CHATML
37
+
38
+ global llm
39
+ global llm_model
40
+
41
+ if llm is None or llm_model != model:
42
+ llm = Llama(
43
+ model_path=f"models/{model}",
44
+ n_ctx=2048, # Reduced context size for CPU
45
+ n_threads=4, # Adjust this based on your CPU cores
46
+ n_gpu_layers=50
47
+ )
48
+ llm_model = model
49
+
50
+ provider = LlamaCppPythonProvider(llm)
51
+
52
+ agent = LlamaCppAgent(
53
+ provider,
54
+ system_prompt=f"{system_message}",
55
+ predefined_messages_formatter_type=chat_template,
56
+ debug_output=True
57
+ )
58
+
59
+ settings = provider.get_provider_default_settings()
60
+ settings.temperature = temperature
61
+ settings.top_k = top_k
62
+ settings.top_p = top_p
63
+ settings.max_tokens = max_tokens
64
+ settings.repeat_penalty = repeat_penalty
65
+ settings.stream = True
66
+
67
+ messages = BasicChatHistory()
68
+
69
+ for msn in history:
70
+ user = {
71
+ 'role': Roles.user,
72
+ 'content': msn[0]
73
+ }
74
+ assistant = {
75
+ 'role': Roles.assistant,
76
+ 'content': msn[1]
77
+ }
78
+ messages.add_message(user)
79
+ messages.add_message(assistant)
80
+
81
+ stream = agent.get_chat_response(
82
+ message,
83
+ llm_sampling_settings=settings,
84
+ chat_history=messages,
85
+ returns_streaming_generator=True,
86
+ print_output=False
87
+ )
88
+
89
+ outputs = ""
90
+ for output in stream:
91
+ outputs += output
92
+ yield outputs
93
+
94
+ description = "HelpingAI-3B-chat: The Compact Yet Powerful Small Language Model (SLM) for Emotionally Intelligent Conversations 🌟"
95
+
96
+ demo = gr.ChatInterface(
97
+ respond,
98
+ additional_inputs=[
99
+ gr.Dropdown([
100
+ 'helpingai-3b-chat-q4_k_m.gguf',
101
+ 'helpingai-3b-chat-iq4_xs-imat.gguf'
102
+ ],
103
+ value="helpingai-3b-chat-iq4_xs-imat.gguf",
104
+ label="Model"
105
+ ),
106
+ gr.Textbox(value="You are HelpingAI a emotional AI always answer my question in HelpingAI style and to the point", label="System message"),
107
+ gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
108
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
109
+ gr.Slider(
110
+ minimum=0.1,
111
+ maximum=1.0,
112
+ value=0.95,
113
+ step=0.05,
114
+ label="Top-p",
115
+ ),
116
+ gr.Slider(
117
+ minimum=0,
118
+ maximum=100,
119
+ value=40,
120
+ step=1,
121
+ label="Top-k",
122
+ ),
123
+ gr.Slider(
124
+ minimum=0.0,
125
+ maximum=2.0,
126
+ value=1.1,
127
+ step=0.1,
128
+ label="Repetition penalty",
129
+ ),
130
+ ],
131
+ retry_btn="Retry",
132
+ undo_btn="Undo",
133
+ clear_btn="Clear",
134
+ submit_btn="Send",
135
+ title="Chat with HelpingAI-3B using llama.cpp",
136
+ description=description,
137
+ chatbot=gr.Chatbot(
138
+ scale=1,
139
+ likeable=False,
140
+ show_copy_button=True
141
+ )
142
+ )
143
+
144
+ if __name__ == "__main__":
145
+ demo.launch()