arad1367 commited on
Commit
a59cdce
1 Parent(s): f3eac83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -176
app.py CHANGED
@@ -1,177 +1,180 @@
1
- import os
2
- import time
3
- import spaces
4
- import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
6
- import gradio as gr
7
- from threading import Thread
8
-
9
- MODEL_LIST = ["mistralai/mathstral-7B-v0.1"]
10
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
- MODEL = os.environ.get("MODEL_ID")
12
-
13
- TITLE = "<h1><center>mistralai/mathstral-7B-v0.1 - Your Math advisor</center></h1>"
14
-
15
- PLACEHOLDER = """
16
- <center>
17
- <p>Hi! I'm MisMath. A Math advisor. My model is based on mistralai/Mistral-Nemo-Instruct-2407. Feel free to ask your questions</p>
18
- </center>
19
- """
20
-
21
- CSS = """
22
- .duplicate-button {
23
- margin: auto !important;
24
- color: white !important;
25
- background: black !important;
26
- border-radius: 100vh !important;
27
- }
28
- h3 {
29
- text-align: center;
30
- }
31
- """
32
-
33
- device = "cuda" # for GPU usage or "cpu" for CPU usage
34
-
35
- quantization_config = BitsAndBytesConfig(
36
- load_in_4bit=True,
37
- bnb_4bit_compute_dtype=torch.bfloat16,
38
- bnb_4bit_use_double_quant=True,
39
- bnb_4bit_quant_type="nf4")
40
-
41
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
42
- model = AutoModelForCausalLM.from_pretrained(
43
- MODEL,
44
- torch_dtype=torch.bfloat16,
45
- device_map="auto",
46
- quantization_config=quantization_config)
47
-
48
- @spaces.GPU()
49
- def stream_chat(
50
- message: str,
51
- history: list,
52
- system_prompt: str,
53
- temperature: float = 0.8,
54
- max_new_tokens: int = 1024,
55
- top_p: float = 1.0,
56
- top_k: int = 20,
57
- penalty: float = 1.2,
58
- ):
59
- print(f'message: {message}')
60
- print(f'history: {history}')
61
-
62
- conversation = [
63
- {"role": "system", "content": system_prompt}
64
- ]
65
- for prompt, answer in history:
66
- conversation.extend([
67
- {"role": "user", "content": prompt},
68
- {"role": "assistant", "content": answer},
69
- ])
70
-
71
- conversation.append({"role": "user", "content": message})
72
-
73
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
74
-
75
- streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
76
-
77
- generate_kwargs = dict(
78
- input_ids=input_ids,
79
- max_new_tokens = max_new_tokens,
80
- do_sample = False if temperature == 0 else True,
81
- top_p = top_p,
82
- top_k = top_k,
83
- temperature = temperature,
84
- eos_token_id=[128001,128008,128009],
85
- streamer=streamer,
86
- )
87
-
88
- with torch.no_grad():
89
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
90
- thread.start()
91
-
92
- buffer = ""
93
- for new_text in streamer:
94
- buffer += new_text
95
- yield buffer
96
-
97
-
98
- chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
99
-
100
- footer = """
101
- <div style="text-align: center; margin-top: 20px;">
102
- <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
103
- <a href="https://github.com/arad1367" target="_blank">GitHub</a> |
104
- <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
105
- <br>
106
- Made with 💖 by Pejman Ebrahimi
107
- </div>
108
- """
109
-
110
- with gr.Blocks(css=CSS, theme="Ajaxon6255/Emerald_Isle") as demo:
111
- gr.HTML(TITLE)
112
- gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
113
- gr.ChatInterface(
114
- fn=stream_chat,
115
- chatbot=chatbot,
116
- fill_height=True,
117
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
118
- additional_inputs=[
119
- gr.Textbox(
120
- value="You are a helpful assistant for Math questions and complex calculations and programming and your name is MisMath",
121
- label="System Prompt",
122
- render=False,
123
- ),
124
- gr.Slider(
125
- minimum=0,
126
- maximum=1,
127
- step=0.1,
128
- value=0.8,
129
- label="Temperature",
130
- render=False,
131
- ),
132
- gr.Slider(
133
- minimum=128,
134
- maximum=8192,
135
- step=1,
136
- value=1024,
137
- label="Max new tokens",
138
- render=False,
139
- ),
140
- gr.Slider(
141
- minimum=0.0,
142
- maximum=1.0,
143
- step=0.1,
144
- value=1.0,
145
- label="top_p",
146
- render=False,
147
- ),
148
- gr.Slider(
149
- minimum=1,
150
- maximum=20,
151
- step=1,
152
- value=20,
153
- label="top_k",
154
- render=False,
155
- ),
156
- gr.Slider(
157
- minimum=0.0,
158
- maximum=2.0,
159
- step=0.1,
160
- value=1.2,
161
- label="Repetition penalty",
162
- render=False,
163
- ),
164
- ],
165
- examples=[
166
- ["How to make a self-driving car?"],
167
- ["Give me creative idea to establish a startup"],
168
- ["How can I improve my programming skills?"],
169
- ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
170
- ],
171
- cache_examples=False,
172
- )
173
- gr.HTML(footer)
174
-
175
-
176
- if __name__ == "__main__":
 
 
 
177
  demo.launch()
 
1
+ import os
2
+ import time
3
+ import spaces
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
6
+ import gradio as gr
7
+ from threading import Thread
8
+
9
+ MODEL_LIST = ["mistralai/mathstral-7B-v0.1"]
10
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
+ MODEL = os.environ.get("MODEL_ID")
12
+
13
+ TITLE = "<h1><center>MathΣtral - Your Math advisor</center></h1>"
14
+
15
+ PLACEHOLDER = """
16
+ <center>
17
+ <p>Hi! I'm MisMath. A Math advisor. My model is based on mathstral-7B-v0.1. Feel free to ask your questions</p>
18
+ <p>Mathstral 7B is a model specializing in mathematical and scientific tasks, based on Mistral 7B.</p>
19
+ <p>mathstral-7B-v0.1 is first Mathstral model</p>
20
+ <img src="https://www.google.com/url?sa=i&url=http%3A%2F%2Fwww.xuexiaigc.com%2Fgptgpts%2FMistral%25E6%259C%2580%25E6%2596%25B0%25E5%25BC%2580%25E6%25BA%2590%25E6%2595%25B0%25E5%25AD%25A6%25E6%25A8%25A1%25E5%259E%258B-Mathstral%25EF%25BC%258C%25E8%2583%25BD%25E4%25B8%258D%25E8%2583%25BD%25E7%25AE%2597%25E5%25AF%25B9-9-11-%25E5%2592%258C-9-9%25E8%25B0%2581%25E5%25A4%25A7%25EF%25BC%259F%25EF%25BD%259CAI%2F&psig=AOvVaw0NtVK20NoIjAxGJ1RtkP1C&ust=1721987390072000&source=images&cd=vfe&opi=89978449&ved=0CBUQjRxqFwoTCIil0Yj1wYcDFQAAAAAdAAAAABAJ" alt="MathStral Model" style="width:300px;height:200px;">
21
+ </center>
22
+ """
23
+
24
+ CSS = """
25
+ .duplicate-button {
26
+ margin: auto !important;
27
+ color: white !important;
28
+ background: black !important;
29
+ border-radius: 100vh !important;
30
+ }
31
+ h3 {
32
+ text-align: center;
33
+ }
34
+ """
35
+
36
+ device = "cuda" # for GPU usage or "cpu" for CPU usage
37
+
38
+ quantization_config = BitsAndBytesConfig(
39
+ load_in_4bit=True,
40
+ bnb_4bit_compute_dtype=torch.bfloat16,
41
+ bnb_4bit_use_double_quant=True,
42
+ bnb_4bit_quant_type="nf4")
43
+
44
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
45
+ model = AutoModelForCausalLM.from_pretrained(
46
+ MODEL,
47
+ torch_dtype=torch.bfloat16,
48
+ device_map="auto",
49
+ quantization_config=quantization_config)
50
+
51
+ @spaces.GPU()
52
+ def stream_chat(
53
+ message: str,
54
+ history: list,
55
+ system_prompt: str,
56
+ temperature: float = 0.8,
57
+ max_new_tokens: int = 1024,
58
+ top_p: float = 1.0,
59
+ top_k: int = 20,
60
+ penalty: float = 1.2,
61
+ ):
62
+ print(f'message: {message}')
63
+ print(f'history: {history}')
64
+
65
+ conversation = [
66
+ {"role": "system", "content": system_prompt}
67
+ ]
68
+ for prompt, answer in history:
69
+ conversation.extend([
70
+ {"role": "user", "content": prompt},
71
+ {"role": "assistant", "content": answer},
72
+ ])
73
+
74
+ conversation.append({"role": "user", "content": message})
75
+
76
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
77
+
78
+ streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
79
+
80
+ generate_kwargs = dict(
81
+ input_ids=input_ids,
82
+ max_new_tokens = max_new_tokens,
83
+ do_sample = False if temperature == 0 else True,
84
+ top_p = top_p,
85
+ top_k = top_k,
86
+ temperature = temperature,
87
+ eos_token_id=[128001,128008,128009],
88
+ streamer=streamer,
89
+ )
90
+
91
+ with torch.no_grad():
92
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
93
+ thread.start()
94
+
95
+ buffer = ""
96
+ for new_text in streamer:
97
+ buffer += new_text
98
+ yield buffer
99
+
100
+
101
+ chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
102
+
103
+ footer = """
104
+ <div style="text-align: center; margin-top: 20px;">
105
+ <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
106
+ <a href="https://github.com/arad1367" target="_blank">GitHub</a> |
107
+ <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
108
+ <br>
109
+ Made with 💖 by Pejman Ebrahimi
110
+ </div>
111
+ """
112
+
113
+ with gr.Blocks(css=CSS, theme="Ajaxon6255/Emerald_Isle") as demo:
114
+ gr.HTML(TITLE)
115
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
116
+ gr.ChatInterface(
117
+ fn=stream_chat,
118
+ chatbot=chatbot,
119
+ fill_height=True,
120
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
121
+ additional_inputs=[
122
+ gr.Textbox(
123
+ value="You are a helpful assistant for Math questions and complex calculations and programming and your name is MisMath",
124
+ label="System Prompt",
125
+ render=False,
126
+ ),
127
+ gr.Slider(
128
+ minimum=0,
129
+ maximum=1,
130
+ step=0.1,
131
+ value=0.8,
132
+ label="Temperature",
133
+ render=False,
134
+ ),
135
+ gr.Slider(
136
+ minimum=128,
137
+ maximum=8192,
138
+ step=1,
139
+ value=1024,
140
+ label="Max new tokens",
141
+ render=False,
142
+ ),
143
+ gr.Slider(
144
+ minimum=0.0,
145
+ maximum=1.0,
146
+ step=0.1,
147
+ value=1.0,
148
+ label="top_p",
149
+ render=False,
150
+ ),
151
+ gr.Slider(
152
+ minimum=1,
153
+ maximum=20,
154
+ step=1,
155
+ value=20,
156
+ label="top_k",
157
+ render=False,
158
+ ),
159
+ gr.Slider(
160
+ minimum=0.0,
161
+ maximum=2.0,
162
+ step=0.1,
163
+ value=1.2,
164
+ label="Repetition penalty",
165
+ render=False,
166
+ ),
167
+ ],
168
+ examples=[
169
+ ["How to make a self-driving car?"],
170
+ ["Give me creative idea to establish a startup"],
171
+ ["How can I improve my programming skills?"],
172
+ ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
173
+ ],
174
+ cache_examples=False,
175
+ )
176
+ gr.HTML(footer)
177
+
178
+
179
+ if __name__ == "__main__":
180
  demo.launch()