ecyht2 commited on
Commit
dcb96e9
1 Parent(s): 27b9ede

feat: Added other controls

Browse files
Files changed (1) hide show
  1. app.py +132 -20
app.py CHANGED
@@ -1,30 +1,142 @@
 
 
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
4
- llm = Llama(model_path="model.gguf", n_ctx=4000, n_threads=2, chat_format="chatml")
5
-
6
- def generate(message, history,temperature=0.1,max_tokens=512):
7
- system_prompt = "You are Dolphin, a helpful AI assistant."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  formatted_prompt = [{"role": "system", "content": system_prompt}]
9
- for user_prompt, bot_response in history:
10
  formatted_prompt.append({"role": "user", "content": user_prompt})
11
- formatted_prompt.append({"role": "assistant", "content": bot_response })
12
  formatted_prompt.append({"role": "user", "content": message})
13
- stream_response = llm.create_chat_completion(messages=formatted_prompt, temperature=temperature, max_tokens=max_tokens, stream=True)
14
- response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  for chunk in stream_response:
16
- if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
17
- response += chunk['choices'][0]["delta"]["content"]
18
- yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- mychatbot = gr.Chatbot(
21
- avatar_images=["user.png", "botsc.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
22
-
23
- iface = gr.ChatInterface(fn=generate, chatbot=mychatbot, retry_btn=None, undo_btn=None)
24
 
25
- with gr.Blocks() as demo:
26
- gr.HTML("<center><h1>Tomoniai's Chat with Stable Code 3b</h1></center>")
27
- iface.render()
 
 
 
 
 
 
28
 
29
- demo.queue().launch(show_api=False, server_name="0.0.0.0")
30
-
 
1
+ """Python Application Script for AI chatbot using LLAMA CPP."""
2
+ import logging
3
+
4
  import gradio as gr
5
  from llama_cpp import Llama
6
 
7
+ # Setting up enviornment
8
+ log_level = os.environ.get("LOG_LEVEL", "WARNING")
9
+ logging.basicConfig(encoding='utf-8', level=log_level)
10
+ # Default System Prompt
11
+ DEFAULT_SYSTEM_PROMPT = os.environ.get("DEFAULT_SYSTEM", "You are Dolphin, a helpful AI assistant.")
12
+ # Model Path
13
+ model_path = "model.gguf"
14
+ logging.debug("Model Path: %s", model_path)
15
+
16
+ logging.info("Loading Moddel")
17
+ llm = Llama(model_path=model_path, n_ctx=4000, n_threads=2, chat_format="chatml")
18
+
19
+
20
+ def generate(
21
+ message: str,
22
+ history: list[tuple[str, str]],
23
+ system_prompt: str,
24
+ temperature: float = 0.1,
25
+ max_tokens: int = 512,
26
+ top_p: float = 0.95,
27
+ repetition_penalty: float = 1.0,
28
+ ):
29
+ """Function to generate text.
30
+
31
+ :param message: The new user prompt.
32
+ :param history: The history of the chat session.
33
+ :param system: The system prompt of the model.
34
+ :param temperature: The temperature parameter for the model.
35
+ :param max_tokens: The maximum amount of tokens to use for the model.
36
+ :param top_p: The top p value for the model.
37
+ :param repetition_penalty: The repetition penalty for the model.
38
+ """
39
+ logging.info("Generating Text")
40
+ logging.debug("message: %s", message)
41
+ logging.debug("history: %s", history)
42
+ logging.debug("system: %s", system)
43
+ logging.debug("temperature: %s", temperature)
44
+ logging.debug("max_tokens: %s", max_tokens)
45
+ logging.debug("top_p: %s", top_p)
46
+ logging.debug("repetion_penalty: %s", repetition_penalty)
47
+
48
+ # Formatting Prompt
49
+ logging.info("Formatting Prompt")
50
  formatted_prompt = [{"role": "system", "content": system_prompt}]
51
+ for user_prompt, bot_response in history:
52
  formatted_prompt.append({"role": "user", "content": user_prompt})
53
+ formatted_prompt.append({"role": "assistant", "content": bot_response})
54
  formatted_prompt.append({"role": "user", "content": message})
55
+ logging.debug("Formatted Prompt: %s", formatted_prompt)
56
+
57
+ # Generating Response
58
+ logging.info("Generating Response")
59
+ stream_response = llm.create_chat_completion(
60
+ messages=formatted_prompt,
61
+ temperature=temperature,
62
+ max_tokens=max_tokens,
63
+ top_p=top_p,
64
+ repeat_penalty=repetition_penalty,
65
+ stream=True,
66
+ )
67
+
68
+ # Parsing Response
69
+ logging.info("Parsing Response")
70
+ response = ""
71
  for chunk in stream_response:
72
+ if (
73
+ len(chunk["choices"][0]["delta"]) != 0
74
+ and "content" in chunk["choices"][0]["delta"]
75
+ ):
76
+ response += chunk["choices"][0]["delta"]["content"]
77
+ logging.debug("Response: %s", response)
78
+ yield response
79
+
80
+
81
+ additional_inputs = [
82
+ gr.Textbox(
83
+ label="System Prompt",
84
+ max_lines=1,
85
+ interactive=True,
86
+ value=DEFAULT_SYSTEM_PROMPT,
87
+ ),
88
+ gr.Slider(
89
+ label="Temperature",
90
+ value=0.9,
91
+ minimum=0.0,
92
+ maximum=1.0,
93
+ step=0.05,
94
+ interactive=True,
95
+ info="Higher values produce more diverse outputs",
96
+ ),
97
+ gr.Slider(
98
+ label="Max new tokens",
99
+ value=256,
100
+ minimum=0,
101
+ maximum=1048,
102
+ step=64,
103
+ interactive=True,
104
+ info="The maximum numbers of new tokens",
105
+ ),
106
+ gr.Slider(
107
+ label="Top-p (nucleus sampling)",
108
+ value=0.90,
109
+ minimum=0.0,
110
+ maximum=1,
111
+ step=0.05,
112
+ interactive=True,
113
+ info="Higher values sample more low-probability tokens",
114
+ ),
115
+ gr.Slider(
116
+ label="Repetition penalty",
117
+ value=1.2,
118
+ minimum=1.0,
119
+ maximum=2.0,
120
+ step=0.05,
121
+ interactive=True,
122
+ info="Penalize repeated tokens",
123
+ )
124
+ ]
125
+
126
+ examples = []
127
 
128
+ logging.info("Creating Chatbot")
129
+ mychatbot = gr.Chatbot(avatar_images=["user.png", "botsc.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
 
 
130
 
131
+ logging.info("Creating Chat Interface")
132
+ iface = gr.ChatInterface(
133
+ fn=generate,
134
+ chatbot=mychatbot,
135
+ additional_inputs=additional_inputs,
136
+ examples=examples,
137
+ concurrency_limit=20,
138
+ title="LLAMA CPP Template"
139
+ )
140
 
141
+ logging.info("Starting Application")
142
+ iface.launch(show_api=False, server_name="0.0.0.0")