Corvius commited on
Commit
d7003c2
Β·
verified Β·
1 Parent(s): f99e888

so back uwu

Browse files

![mrow.jpg](https://cdn-uploads.huggingface.co/production/uploads/640b6a87a17ad54ebdcce62f/Yw8ovDNO6CeOoY0GB7uMQ.jpeg)

Files changed (1) hide show
  1. app.py +99 -60
app.py CHANGED
@@ -5,34 +5,35 @@ import os
5
  import datetime
6
  from requests.exceptions import RequestException
7
 
 
 
 
 
 
8
  API_URL = os.environ.get('API_URL')
9
- API_KEY = os.environ.get('API_KEY')
10
-
11
- headers = {
12
- "Authorization": f"Bearer {API_KEY}",
13
- "Content-Type": "application/json",
14
- 'Referer': os.environ.get('REFERRER_URL')
15
- }
16
-
17
- # debug switches
18
- USER_LOGGING_ENABLED = False
19
- RESPONSE_LOGGING_ENABLED = True
20
 
21
  DEFAULT_PARAMS = {
22
- "temperature": 0.8,
23
- "top_p": 0.95,
24
- "top_k": 40,
 
 
25
  "frequency_penalty": 0,
26
  "presence_penalty": 0,
27
  "repetition_penalty": 1.1,
28
  "max_tokens": 512
29
  }
30
 
 
 
 
 
31
  def get_timestamp():
32
  return datetime.datetime.now().strftime("%H:%M:%S")
33
 
34
- def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
35
- history_format = [{"role": "system", "content": system_prompt}]
 
36
  for human, assistant in history:
37
  history_format.append({"role": "user", "content": human})
38
  if assistant:
@@ -47,6 +48,8 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
47
  "temperature": temperature,
48
  "top_p": top_p,
49
  "top_k": top_k,
 
 
50
  "frequency_penalty": frequency_penalty,
51
  "presence_penalty": presence_penalty,
52
  "repetition_penalty": repetition_penalty,
@@ -60,48 +63,76 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
60
  print(f"{param}={value}")
61
 
62
  data = {
63
- "model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
64
  "messages": history_format,
65
  "stream": True,
66
  "temperature": temperature,
67
  "top_p": top_p,
68
  "top_k": top_k,
 
 
69
  "frequency_penalty": frequency_penalty,
70
  "presence_penalty": presence_penalty,
71
  "repetition_penalty": repetition_penalty,
72
  "max_tokens": max_tokens
73
  }
74
 
75
- try:
76
- with requests.post(API_URL, headers=headers, data=json.dumps(data), stream=True) as response:
77
- partial_message = ""
78
- for line in response.iter_lines():
79
- if stop_flag[0]:
80
- response.close()
81
- break
82
- if line:
83
- line = line.decode('utf-8')
84
- if RESPONSE_LOGGING_ENABLED:
85
- print(f"API Response: {line}")
86
- if line.startswith("data: "):
87
- if line.strip() == "data: [DONE]":
 
 
 
 
 
 
88
  break
89
- try:
90
- json_data = json.loads(line[6:])
91
- if 'choices' in json_data and json_data['choices']:
92
- content = json_data['choices'][0]['delta'].get('content', '')
93
- if content:
94
- partial_message += content
95
- yield partial_message
96
- except json.JSONDecodeError:
97
- continue
98
-
99
- if partial_message:
100
- yield partial_message
101
-
102
- except RequestException as e:
103
- print(f"Request error: {e}")
104
- yield f"An error occurred: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  def import_chat(custom_format_string):
107
  try:
@@ -147,7 +178,7 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
147
  with gr.Row():
148
  with gr.Column(scale=2):
149
  chatbot = gr.Chatbot(value=[])
150
- msg = gr.Textbox(label="Message (70B for now. The provider might bug out at random. The space may restart frequently)")
151
  with gr.Row():
152
  clear = gr.Button("Clear")
153
  regenerate = gr.Button("Regenerate")
@@ -161,25 +192,29 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
161
 
162
  with gr.Column(scale=1):
163
  system_prompt = gr.Textbox("", label="System Prompt", lines=5)
164
- temperature = gr.Slider(0, 2, value=0.8, step=0.01, label="Temperature")
165
- top_p = gr.Slider(0, 1, value=0.95, step=0.01, label="Top P")
166
- top_k = gr.Slider(1, 500, value=40, step=1, label="Top K")
167
- frequency_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Frequency Penalty")
168
- presence_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Presence Penalty")
169
- repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
170
- max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
 
 
171
 
172
  def user(user_message, history):
173
  history = history or []
174
  return "", history + [[user_message, None]]
175
 
176
- def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
177
  stop_flag[0] = False
178
  history = history or []
179
  if not history:
180
  return history
181
  user_message = history[-1][0]
182
- bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag)
 
183
  history[-1][1] = ""
184
  for chunk in bot_message:
185
  if stop_flag[0]:
@@ -188,11 +223,13 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
188
  history[-1][1] = chunk
189
  yield history
190
 
191
- def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
192
  if history and len(history) > 0:
193
  last_user_message = history[-1][0]
194
  history[-1][1] = None
195
- for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
196
  yield new_history
197
  else:
198
  yield []
@@ -202,14 +239,16 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
202
  return imported_history, imported_system_prompt
203
 
204
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
205
- bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag], chatbot
 
206
  )
207
 
208
  clear.click(lambda: None, None, chatbot, queue=False)
209
 
210
  regenerate.click(
211
  regenerate_response,
212
- [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag],
 
213
  chatbot
214
  )
215
 
 
5
  import datetime
6
  from requests.exceptions import RequestException
7
 
8
+ api_keys_env = os.environ.get('API_KEYS')
9
+ if api_keys_env:
10
+ API_KEYS = [key.strip() for key in api_keys_env.strip().split('\n') if key.strip()]
11
+ else:
12
+ raise ValueError("all keez ded go kys")
13
  API_URL = os.environ.get('API_URL')
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  DEFAULT_PARAMS = {
16
+ "temperature": 1.0,
17
+ "top_p": 1,
18
+ "top_k": 0,
19
+ "min_p": 0,
20
+ "top_a": 0.1,
21
  "frequency_penalty": 0,
22
  "presence_penalty": 0,
23
  "repetition_penalty": 1.1,
24
  "max_tokens": 512
25
  }
26
 
27
+ # debug switches
28
+ USER_LOGGING_ENABLED = False
29
+ RESPONSE_LOGGING_ENABLED = True
30
+
31
  def get_timestamp():
32
  return datetime.datetime.now().strftime("%H:%M:%S")
33
 
34
+ def predict(message, history, system_prompt, temperature, top_p, top_k, min_p, top_a,
35
+ frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
36
+ history_format = [{"role": "system", "content": system_prompt}] if system_prompt else []
37
  for human, assistant in history:
38
  history_format.append({"role": "user", "content": human})
39
  if assistant:
 
48
  "temperature": temperature,
49
  "top_p": top_p,
50
  "top_k": top_k,
51
+ "min_p": min_p,
52
+ "top_a": top_a,
53
  "frequency_penalty": frequency_penalty,
54
  "presence_penalty": presence_penalty,
55
  "repetition_penalty": repetition_penalty,
 
63
  print(f"{param}={value}")
64
 
65
  data = {
66
+ "model": "meta-llama/llama-3.1-405b-instruct:free",
67
  "messages": history_format,
68
  "stream": True,
69
  "temperature": temperature,
70
  "top_p": top_p,
71
  "top_k": top_k,
72
+ "min_p": min_p,
73
+ "top_a": top_a,
74
  "frequency_penalty": frequency_penalty,
75
  "presence_penalty": presence_penalty,
76
  "repetition_penalty": repetition_penalty,
77
  "max_tokens": max_tokens
78
  }
79
 
80
+ api_key_index = 0
81
+ retries = 0
82
+ max_retries = len(API_KEYS)
83
+
84
+ while retries < max_retries:
85
+ current_api_key = API_KEYS[api_key_index]
86
+
87
+ headers = {
88
+ "Authorization": f"Bearer {current_api_key}",
89
+ "Content-Type": "application/json"
90
+ }
91
+
92
+ try:
93
+ with requests.post(API_URL, headers=headers, data=json.dumps(data), stream=True) as response:
94
+ if response.status_code == 200:
95
+ partial_message = ""
96
+ for line in response.iter_lines():
97
+ if stop_flag[0]:
98
+ response.close()
99
  break
100
+ if line:
101
+ line = line.decode('utf-8')
102
+ if RESPONSE_LOGGING_ENABLED:
103
+ print(f"API Response: {line}")
104
+ if line.startswith("data: "):
105
+ if line.strip() == "data: [DONE]":
106
+ break
107
+ try:
108
+ json_data = json.loads(line[6:])
109
+ if 'choices' in json_data and json_data['choices']:
110
+ delta = json_data['choices'][0]['delta']
111
+ content = delta.get('content', '')
112
+ if content:
113
+ partial_message += content
114
+ yield partial_message
115
+ except json.JSONDecodeError:
116
+ continue
117
+ if partial_message:
118
+ yield partial_message
119
+ # successful response, break out of retry loop
120
+ break
121
+ elif response.status_code == 429:
122
+ print("rate limit hit, cycling keys...")
123
+ retries += 1
124
+ api_key_index = (api_key_index + 1) % len(API_KEYS)
125
+ continue
126
+ else:
127
+ # theothershits
128
+ error_message = f"Error: Received status code {response.status_code} - {response.text}"
129
+ print(error_message)
130
+ yield f"An error occurred: {error_message}"
131
+ break
132
+ except RequestException as e:
133
+ print(f"Request error: {e}")
134
+ yield f"An error occurred: {str(e)}"
135
+ break
136
 
137
  def import_chat(custom_format_string):
138
  try:
 
178
  with gr.Row():
179
  with gr.Column(scale=2):
180
  chatbot = gr.Chatbot(value=[])
181
+ msg = gr.Textbox(label="Message")
182
  with gr.Row():
183
  clear = gr.Button("Clear")
184
  regenerate = gr.Button("Regenerate")
 
192
 
193
  with gr.Column(scale=1):
194
  system_prompt = gr.Textbox("", label="System Prompt", lines=5)
195
+ temperature = gr.Slider(0, 2, value=DEFAULT_PARAMS["temperature"], step=0.01, label="Temperature")
196
+ top_p = gr.Slider(0, 1, value=DEFAULT_PARAMS["top_p"], step=0.01, label="Top P")
197
+ top_k = gr.Slider(0, 500, value=DEFAULT_PARAMS["top_k"], step=1, label="Top K")
198
+ min_p = gr.Slider(0, 1, value=DEFAULT_PARAMS["min_p"], step=0.01, label="Min P")
199
+ top_a = gr.Slider(0, 1, value=DEFAULT_PARAMS["top_a"], step=0.01, label="Top A")
200
+ frequency_penalty = gr.Slider(-2, 2, value=DEFAULT_PARAMS["frequency_penalty"], step=0.1, label="Frequency Penalty")
201
+ presence_penalty = gr.Slider(-2, 2, value=DEFAULT_PARAMS["presence_penalty"], step=0.1, label="Presence Penalty")
202
+ repetition_penalty = gr.Slider(0.01, 5, value=DEFAULT_PARAMS["repetition_penalty"], step=0.01, label="Repetition Penalty")
203
+ max_tokens = gr.Slider(1, 4096, value=DEFAULT_PARAMS["max_tokens"], step=1, label="Max Output (max_tokens)")
204
 
205
  def user(user_message, history):
206
  history = history or []
207
  return "", history + [[user_message, None]]
208
 
209
+ def bot(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
210
+ frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
211
  stop_flag[0] = False
212
  history = history or []
213
  if not history:
214
  return history
215
  user_message = history[-1][0]
216
+ bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, min_p, top_a,
217
+ frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag)
218
  history[-1][1] = ""
219
  for chunk in bot_message:
220
  if stop_flag[0]:
 
223
  history[-1][1] = chunk
224
  yield history
225
 
226
+ def regenerate_response(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
227
+ frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
228
  if history and len(history) > 0:
229
  last_user_message = history[-1][0]
230
  history[-1][1] = None
231
+ for new_history in bot(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
232
+ frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
233
  yield new_history
234
  else:
235
  yield []
 
239
  return imported_history, imported_system_prompt
240
 
241
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
242
+ bot, [chatbot, system_prompt, temperature, top_p, top_k, min_p, top_a,
243
+ frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag], chatbot
244
  )
245
 
246
  clear.click(lambda: None, None, chatbot, queue=False)
247
 
248
  regenerate.click(
249
  regenerate_response,
250
+ [chatbot, system_prompt, temperature, top_p, top_k, min_p, top_a,
251
+ frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag],
252
  chatbot
253
  )
254