Corvius commited on
Commit
82addc1
Β·
verified Β·
1 Parent(s): 2438f67

ufckfcukfcukfckufckufcufckufckuc

Browse files

ufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckucufckfcukfcukfckufckufcufckufckuc

Files changed (1) hide show
  1. app.py +57 -156
app.py CHANGED
@@ -3,39 +3,36 @@ import requests
3
  import json
4
  import os
5
  import datetime
6
- import time
7
  from requests.exceptions import RequestException
8
 
9
- api_keys_env = os.environ.get('API_KEYS')
10
- if api_keys_env:
11
- API_KEYS = [key.strip() for key in api_keys_env.strip().split('\n') if key.strip()]
12
- else:
13
- raise ValueError("all keez ded go kys")
14
-
15
  API_URL = os.environ.get('API_URL')
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  DEFAULT_PARAMS = {
18
- "temperature": 1.0,
19
- "top_p": 1,
20
- "top_k": 0,
21
- "min_p": 0,
22
- "top_a": 0.1,
23
  "frequency_penalty": 0,
24
  "presence_penalty": 0,
25
  "repetition_penalty": 1.1,
26
  "max_tokens": 512
27
  }
28
 
29
- # debug switches
30
- USER_LOGGING_ENABLED = False
31
- RESPONSE_LOGGING_ENABLED = True
32
-
33
  def get_timestamp():
34
  return datetime.datetime.now().strftime("%H:%M:%S")
35
 
36
- def predict(message, history, system_prompt, temperature, top_p, top_k, min_p, top_a,
37
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
38
- history_format = [{"role": "system", "content": system_prompt}] if system_prompt else []
39
  for human, assistant in history:
40
  history_format.append({"role": "user", "content": human})
41
  if assistant:
@@ -50,8 +47,6 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, min_p, t
50
  "temperature": temperature,
51
  "top_p": top_p,
52
  "top_k": top_k,
53
- "min_p": min_p,
54
- "top_a": top_a,
55
  "frequency_penalty": frequency_penalty,
56
  "presence_penalty": presence_penalty,
57
  "repetition_penalty": repetition_penalty,
@@ -65,134 +60,48 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, min_p, t
65
  print(f"{param}={value}")
66
 
67
  data = {
68
- "model": "meta-llama/llama-3.1-405b-instruct:free",
69
  "messages": history_format,
70
  "stream": True,
71
  "temperature": temperature,
72
  "top_p": top_p,
73
  "top_k": top_k,
74
- "min_p": min_p,
75
- "top_a": top_a,
76
  "frequency_penalty": frequency_penalty,
77
  "presence_penalty": presence_penalty,
78
  "repetition_penalty": repetition_penalty,
79
  "max_tokens": max_tokens
80
  }
81
 
82
- api_key_index = 0
83
- retries = 0
84
- max_retries = len(API_KEYS) * 30
85
- partial_message = ""
86
- processing_count = 0
87
- max_processing_attempts = 10
88
-
89
- while retries < max_retries:
90
- if stop_flag[0]:
91
- return
92
-
93
- current_api_key = API_KEYS[api_key_index]
94
- headers = {
95
- "Authorization": f"Bearer {current_api_key}",
96
- "Content-Type": "application/json"
97
- }
98
-
99
- try:
100
- response = requests.post(API_URL, headers=headers, data=json.dumps(data), stream=True)
101
-
102
- if response.status_code == 200:
103
- processing_count = 0
104
- for line in response.iter_lines():
105
- if stop_flag[0]:
106
- response.close()
107
- return
108
-
109
- if not line:
110
- continue
111
-
112
  line = line.decode('utf-8')
113
  if RESPONSE_LOGGING_ENABLED:
114
  print(f"API Response: {line}")
115
-
116
- if ": OPENROUTER PROCESSING" in line:
117
- processing_count += 1
118
- if processing_count >= max_processing_attempts:
119
- print("Too many processing attempts, cycling to next key...")
120
  break
121
- continue
122
-
123
- if not line.startswith("data: "):
124
- continue
125
-
126
- if line.strip() == "data: [DONE]":
127
- response.close()
128
- if partial_message:
129
- yield partial_message
130
- return
131
-
132
- try:
133
- json_data = json.loads(line[6:])
134
-
135
- # Check for rate limit error
136
- if 'error' in json_data:
137
- error_msg = json_data.get('error', {}).get('message', '')
138
- if isinstance(error_msg, str):
139
- try:
140
- error_obj = json.loads(error_msg)
141
- if error_obj.get('error', {}).get('type') == 'rate_limit_exceeded':
142
- print("Rate limit hit in streaming response, cycling keys...")
143
- break
144
- except json.JSONDecodeError:
145
- pass
146
- continue
147
-
148
- if 'choices' in json_data and json_data['choices']:
149
- delta = json_data['choices'][0]['delta']
150
- content = delta.get('content', '')
151
- if content:
152
- partial_message += content
153
- yield partial_message
154
-
155
- except json.JSONDecodeError as e:
156
- print(f"JSON decode error: {e}")
157
- continue
158
-
159
- response.close()
160
- if partial_message:
161
- return
162
-
163
- elif response.status_code == 429:
164
- print("Rate limit hit from status code, cycling keys...")
165
- time.sleep(1)
166
- elif response.status_code == 401:
167
- print(f"Invalid API key {api_key_index}, cycling to next...")
168
- api_key_index = (api_key_index + 1) % len(API_KEYS)
169
- retries += 1
170
- continue
171
- else:
172
- error_message = f"Error: Received status code {response.status_code} - {response.text}"
173
- print(error_message)
174
- if partial_message:
175
- yield partial_message + f"\n[Error: {error_message}]"
176
- else:
177
- yield f"An error occurred: {error_message}"
178
- return
179
-
180
- except RequestException as e:
181
- print(f"Request error: {e}")
182
- if partial_message:
183
- yield partial_message + f"\n[Error: {str(e)}]"
184
- else:
185
- yield f"An error occurred: {str(e)}"
186
- return
187
-
188
- retries += 1
189
- api_key_index = (api_key_index + 1) % len(API_KEYS)
190
- time.sleep(1)
191
-
192
- if partial_message:
193
- yield partial_message + "\n[Error: Maximum retries reached]"
194
- else:
195
- yield "Error: Maximum retries reached. Please try again later."
196
 
197
  def import_chat(custom_format_string):
198
  try:
@@ -238,7 +147,7 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
238
  with gr.Row():
239
  with gr.Column(scale=2):
240
  chatbot = gr.Chatbot(value=[])
241
- msg = gr.Textbox(label="Message (expect to wait minutes since the keys are getting 429'd like crazy πŸ’€)")
242
  with gr.Row():
243
  clear = gr.Button("Clear")
244
  regenerate = gr.Button("Regenerate")
@@ -252,29 +161,25 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
252
 
253
  with gr.Column(scale=1):
254
  system_prompt = gr.Textbox("", label="System Prompt", lines=5)
255
- temperature = gr.Slider(0, 2, value=DEFAULT_PARAMS["temperature"], step=0.01, label="Temperature")
256
- top_p = gr.Slider(0, 1, value=DEFAULT_PARAMS["top_p"], step=0.01, label="Top P")
257
- top_k = gr.Slider(0, 500, value=DEFAULT_PARAMS["top_k"], step=1, label="Top K")
258
- min_p = gr.Slider(0, 1, value=DEFAULT_PARAMS["min_p"], step=0.01, label="Min P")
259
- top_a = gr.Slider(0, 1, value=DEFAULT_PARAMS["top_a"], step=0.01, label="Top A")
260
- frequency_penalty = gr.Slider(-2, 2, value=DEFAULT_PARAMS["frequency_penalty"], step=0.1, label="Frequency Penalty")
261
- presence_penalty = gr.Slider(-2, 2, value=DEFAULT_PARAMS["presence_penalty"], step=0.1, label="Presence Penalty")
262
- repetition_penalty = gr.Slider(0.01, 5, value=DEFAULT_PARAMS["repetition_penalty"], step=0.01, label="Repetition Penalty")
263
- max_tokens = gr.Slider(1, 4096, value=DEFAULT_PARAMS["max_tokens"], step=1, label="Max Output (max_tokens)")
264
 
265
  def user(user_message, history):
266
  history = history or []
267
  return "", history + [[user_message, None]]
268
 
269
- def bot(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
270
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
271
  stop_flag[0] = False
272
  history = history or []
273
  if not history:
274
  return history
275
  user_message = history[-1][0]
276
- bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, min_p, top_a,
277
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag)
278
  history[-1][1] = ""
279
  for chunk in bot_message:
280
  if stop_flag[0]:
@@ -283,13 +188,11 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
283
  history[-1][1] = chunk
284
  yield history
285
 
286
- def regenerate_response(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
287
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
288
  if history and len(history) > 0:
289
  last_user_message = history[-1][0]
290
  history[-1][1] = None
291
- for new_history in bot(history, system_prompt, temperature, top_p, top_k, min_p, top_a,
292
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
293
  yield new_history
294
  else:
295
  yield []
@@ -299,16 +202,14 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
299
  return imported_history, imported_system_prompt
300
 
301
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
302
- bot, [chatbot, system_prompt, temperature, top_p, top_k, min_p, top_a,
303
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag], chatbot
304
  )
305
 
306
  clear.click(lambda: None, None, chatbot, queue=False)
307
 
308
  regenerate.click(
309
  regenerate_response,
310
- [chatbot, system_prompt, temperature, top_p, top_k, min_p, top_a,
311
- frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag],
312
  chatbot
313
  )
314
 
 
3
  import json
4
  import os
5
  import datetime
 
6
  from requests.exceptions import RequestException
7
 
 
 
 
 
 
 
8
  API_URL = os.environ.get('API_URL')
9
+ API_KEY = os.environ.get('API_KEY')
10
+
11
+ headers = {
12
+ "Authorization": f"Bearer {API_KEY}",
13
+ "Content-Type": "application/json",
14
+ 'Referer': os.environ.get('REFERRER_URL')
15
+ }
16
+
17
+ # debug switches
18
+ USER_LOGGING_ENABLED = False
19
+ RESPONSE_LOGGING_ENABLED = True
20
 
21
  DEFAULT_PARAMS = {
22
+ "temperature": 0.8,
23
+ "top_p": 0.95,
24
+ "top_k": 40,
 
 
25
  "frequency_penalty": 0,
26
  "presence_penalty": 0,
27
  "repetition_penalty": 1.1,
28
  "max_tokens": 512
29
  }
30
 
 
 
 
 
31
  def get_timestamp():
32
  return datetime.datetime.now().strftime("%H:%M:%S")
33
 
34
+ def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
35
+ history_format = [{"role": "system", "content": system_prompt}]
 
36
  for human, assistant in history:
37
  history_format.append({"role": "user", "content": human})
38
  if assistant:
 
47
  "temperature": temperature,
48
  "top_p": top_p,
49
  "top_k": top_k,
 
 
50
  "frequency_penalty": frequency_penalty,
51
  "presence_penalty": presence_penalty,
52
  "repetition_penalty": repetition_penalty,
 
60
  print(f"{param}={value}")
61
 
62
  data = {
63
+ "model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
64
  "messages": history_format,
65
  "stream": True,
66
  "temperature": temperature,
67
  "top_p": top_p,
68
  "top_k": top_k,
 
 
69
  "frequency_penalty": frequency_penalty,
70
  "presence_penalty": presence_penalty,
71
  "repetition_penalty": repetition_penalty,
72
  "max_tokens": max_tokens
73
  }
74
 
75
+ try:
76
+ with requests.post(API_URL, headers=headers, data=json.dumps(data), stream=True) as response:
77
+ partial_message = ""
78
+ for line in response.iter_lines():
79
+ if stop_flag[0]:
80
+ response.close()
81
+ break
82
+ if line:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  line = line.decode('utf-8')
84
  if RESPONSE_LOGGING_ENABLED:
85
  print(f"API Response: {line}")
86
+ if line.startswith("data: "):
87
+ if line.strip() == "data: [DONE]":
 
 
 
88
  break
89
+ try:
90
+ json_data = json.loads(line[6:])
91
+ if 'choices' in json_data and json_data['choices']:
92
+ content = json_data['choices'][0]['delta'].get('content', '')
93
+ if content:
94
+ partial_message += content
95
+ yield partial_message
96
+ except json.JSONDecodeError:
97
+ continue
98
+
99
+ if partial_message:
100
+ yield partial_message
101
+
102
+ except RequestException as e:
103
+ print(f"Request error: {e}")
104
+ yield f"An error occurred: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  def import_chat(custom_format_string):
107
  try:
 
147
  with gr.Row():
148
  with gr.Column(scale=2):
149
  chatbot = gr.Chatbot(value=[])
150
+ msg = gr.Textbox(label="Message (dolphin-2.9.1-llama-3-70b for now. The provider might bug out at random. The space may restart frequently)")
151
  with gr.Row():
152
  clear = gr.Button("Clear")
153
  regenerate = gr.Button("Regenerate")
 
161
 
162
  with gr.Column(scale=1):
163
  system_prompt = gr.Textbox("", label="System Prompt", lines=5)
164
+ temperature = gr.Slider(0, 2, value=0.8, step=0.01, label="Temperature")
165
+ top_p = gr.Slider(0, 1, value=0.95, step=0.01, label="Top P")
166
+ top_k = gr.Slider(1, 500, value=40, step=1, label="Top K")
167
+ frequency_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Frequency Penalty")
168
+ presence_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Presence Penalty")
169
+ repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
170
+ max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
 
 
171
 
172
  def user(user_message, history):
173
  history = history or []
174
  return "", history + [[user_message, None]]
175
 
176
+ def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
177
  stop_flag[0] = False
178
  history = history or []
179
  if not history:
180
  return history
181
  user_message = history[-1][0]
182
+ bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag)
 
183
  history[-1][1] = ""
184
  for chunk in bot_message:
185
  if stop_flag[0]:
 
188
  history[-1][1] = chunk
189
  yield history
190
 
191
+ def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
192
  if history and len(history) > 0:
193
  last_user_message = history[-1][0]
194
  history[-1][1] = None
195
+ for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
 
196
  yield new_history
197
  else:
198
  yield []
 
202
  return imported_history, imported_system_prompt
203
 
204
  msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
205
+ bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag], chatbot
 
206
  )
207
 
208
  clear.click(lambda: None, None, chatbot, queue=False)
209
 
210
  regenerate.click(
211
  regenerate_response,
212
+ [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag],
 
213
  chatbot
214
  )
215