tastypear commited on
Commit
0bf7de2
·
verified ·
1 Parent(s): a0435f4

use official option to disable cache

Browse files
Files changed (1) hide show
  1. main.py +96 -97
main.py CHANGED
@@ -1,98 +1,97 @@
1
- import random
2
- import requests
3
- from base64 import b64decode
4
- from flask import Flask, request, jsonify, Response, stream_with_context, render_template_string
5
-
6
- from transformers import AutoTokenizer
7
-
8
- def calc_tokens(text):
9
- tokenizer = AutoTokenizer.from_pretrained("PJMixers/CohereForAI_c4ai-command-r-plus-tokenizer")
10
- tokens = tokenizer.tokenize(text)
11
- return len(tokens)
12
-
13
- def calc_messages_tokens(json_data):
14
- messages = json_data["messages"]
15
- m_messages = []
16
- user_count = 0
17
- prompt = "<BOS_TOKEN>"
18
- for message in messages:
19
- if message["role"] == "system":
20
- prompt += f"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{message['content']}<|END_OF_TURN_TOKEN|>"
21
- elif message["role"] == "user":
22
- user_count += 1
23
- prompt += f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message['content']}<|END_OF_TURN_TOKEN|>"
24
- elif message["role"] == "assistant":
25
- prompt += f"<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{message['content']}<|END_OF_TURN_TOKEN|>"
26
- else:
27
- continue
28
- prompt += "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
29
- total_tokens = calc_tokens(prompt) + user_count + 1
30
- return total_tokens + 10 # for robustness
31
-
32
- app = Flask(__name__)
33
-
34
- @app.route('/', methods=['GET'])
35
- def index():
36
- template = '''
37
- <html>
38
- <head>
39
- <title>Command-R-Plus Chat API</title>
40
- </head>
41
- <body>
42
- <h1>Command-R-Plus OpenAI Compatible API</h1>
43
- <h1>You need to be a HF PRO user to use it.</h1>
44
- <li>1. Create your token(as api key) <a target="_blank" href="https://huggingface.co/settings/tokens/new">[here]</a> by selecting "serverless Inference API".</li>
45
- <li>2. Set `https://tastypear-command-r-plus-chat.hf.space/api" as the domain in the client configuration.</li>
46
- If you have multiple keys, you can concatenate them with a semicolon (`;`) to use them randomly, e.g., `hf_aaaa;hf_bbbb;hf_...`
47
- </body>
48
- </html>
49
- '''
50
- return render_template_string(template)
51
-
52
- def get_new_bearer(key):
53
- data = "C1RvUWoZAjd+ZBUyIV1CXjB3ay1VCA98Im4rWH5gVlZbKS1aBjhYU2YjHyVFeDwvI3x9cy92Vw1bKS5VHFM5VU9QVmpiDxJ6EmNSP1EHOgV6dCEOKEdncCJ7YBZmKQlkF1AYSkBOc0hiNhFBBHRWUmNrDQBycjUIOF5/WD1LRyZ/BidjFmEuelxBU3B9IhVDAnV5TXQRMGxFUDkDDVRnWzNVYg9DAQJiIVEqfFtRcXd3Lgd5CFx/U3AMDA1jPA4APUtifgh7fid7BhxJE28bSnVtYmdVAQt/CkdJYl4NDCRZQiNsCktrcwh3RwBlHQJUFngmdU9/Xl5eKC54KWdZXlYbAClJZAAlESdcUjt2eA5GASpUAmgKUkJ2cGdyBDZQCkpxUVQXLB51fy83GFh4PgxXcilgHCBdHVsZWnJjb0JEMAhaGWpeen8GDDR9fCMtLGBbeDA7RQdtLxJJDksCYGJ4VWVvNiRZNX9Ab0MtDRJ6RTM2NEVaeyJ+XGtaAzphAFcHd09Vd2FEBStDBnZGXkgMBjdPRQARG2phfCJzfS9Kbw1LO0w4cE5VektlARFbGX1EZUwLISplZh8JGGRxVTZCTDdrLgE="
54
- data = b64decode(data)
55
- key = (key * (len(data) // len(key) + 1))[:len(data)]
56
- data = (bytes([a ^ b for a, b in zip(data, key.encode())])).decode()
57
- return random.choice(data.split('\n'))
58
-
59
-
60
- @app.route('/api/v1/chat/completions', methods=['POST'])
61
- def proxy():
62
- headers = dict(request.headers)
63
- headers.pop('Host', None)
64
- headers.pop('Content-Length', None)
65
- bearer = request.headers['Authorization'].split(' ')[1]
66
-
67
- if(bearer.startswith('hf_')):
68
- # for public usage
69
- headers['Authorization'] = f"Bearer {random.choice(bearer.split(';'))}"
70
- else:
71
- # my private keys
72
- headers['Authorization'] = f'Bearer {get_new_bearer(bearer)}'
73
-
74
- json_data = request.get_json()
75
-
76
- # Avoid using cache
77
- json_data["messages"][-1]['content'] = ' '*random.randint(1, 20)+json_data["messages"][-1]['content']
78
-
79
- # Use the largest ctx
80
- json_data['max_tokens'] = 32768 - calc_messages_tokens(json_data)
81
-
82
- json_data['json_mode'] = False
83
-
84
- model = 'CohereForAI/c4ai-command-r-plus'
85
-
86
- def generate():
87
- with requests.post(f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions", json=request.json, headers=headers, stream=True) as resp:
88
- for chunk in resp.iter_content(chunk_size=1024):
89
- if chunk:
90
- yield chunk
91
-
92
- return Response(stream_with_context(generate()), content_type='text/event-stream')
93
-
94
- #import gevent.pywsgi
95
- #from gevent import monkey;monkey.patch_all()
96
- if __name__ == "__main__":
97
- app.run(debug=True)
98
  # gevent.pywsgi.WSGIServer((args.host, args.port), app).serve_forever()
 
1
+ import random
2
+ import requests
3
+ from base64 import b64decode
4
+ from flask import Flask, request, jsonify, Response, stream_with_context, render_template_string
5
+
6
+ from transformers import AutoTokenizer
7
+
8
+ def calc_tokens(text):
9
+ tokenizer = AutoTokenizer.from_pretrained("PJMixers/CohereForAI_c4ai-command-r-plus-tokenizer")
10
+ tokens = tokenizer.tokenize(text)
11
+ return len(tokens)
12
+
13
+ def calc_messages_tokens(json_data):
14
+ messages = json_data["messages"]
15
+ m_messages = []
16
+ user_count = 0
17
+ prompt = "<BOS_TOKEN>"
18
+ for message in messages:
19
+ if message["role"] == "system":
20
+ prompt += f"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{message['content']}<|END_OF_TURN_TOKEN|>"
21
+ elif message["role"] == "user":
22
+ user_count += 1
23
+ prompt += f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message['content']}<|END_OF_TURN_TOKEN|>"
24
+ elif message["role"] == "assistant":
25
+ prompt += f"<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{message['content']}<|END_OF_TURN_TOKEN|>"
26
+ else:
27
+ continue
28
+ prompt += "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
29
+ total_tokens = calc_tokens(prompt) + user_count + 1
30
+ return total_tokens + 10 # for robustness
31
+
32
+ app = Flask(__name__)
33
+
34
+ @app.route('/', methods=['GET'])
35
+ def index():
36
+ template = '''
37
+ <html>
38
+ <head>
39
+ <title>Command-R-Plus Chat API</title>
40
+ </head>
41
+ <body>
42
+ <h1>Command-R-Plus OpenAI Compatible API</h1>
43
+ <h1>You need to be a HF PRO user to use it.</h1>
44
+ <li>1. Create your token(as api key) <a target="_blank" href="https://huggingface.co/settings/tokens/new">[here]</a> by selecting "serverless Inference API".</li>
45
+ <li>2. Set `https://tastypear-command-r-plus-chat.hf.space/api" as the domain in the client configuration.</li>
46
+ If you have multiple keys, you can concatenate them with a semicolon (`;`) to use them randomly, e.g., `hf_aaaa;hf_bbbb;hf_...`
47
+ </body>
48
+ </html>
49
+ '''
50
+ return render_template_string(template)
51
+
52
+ def get_new_bearer(key):
53
+ data = "C1RvUWoZAjd+ZBUyIV1CXjB3ay1VCA98Im4rWH5gVlZbKS1aBjhYU2YjHyVFeDwvI3x9cy92Vw1bKS5VHFM5VU9QVmpiDxJ6EmNSP1EHOgV6dCEOKEdncCJ7YBZmKQlkF1AYSkBOc0hiNhFBBHRWUmNrDQBycjUIOF5/WD1LRyZ/BidjFmEuelxBU3B9IhVDAnV5TXQRMGxFUDkDDVRnWzNVYg9DAQJiIVEqfFtRcXd3Lgd5CFx/U3AMDA1jPA4APUtifgh7fid7BhxJE28bSnVtYmdVAQt/CkdJYl4NDCRZQiNsCktrcwh3RwBlHQJUFngmdU9/Xl5eKC54KWdZXlYbAClJZAAlESdcUjt2eA5GASpUAmgKUkJ2cGdyBDZQCkpxUVQXLB51fy83GFh4PgxXcilgHCBdHVsZWnJjb0JEMAhaGWpeen8GDDR9fCMtLGBbeDA7RQdtLxJJDksCYGJ4VWVvNiRZNX9Ab0MtDRJ6RTM2NEVaeyJ+XGtaAzphAFcHd09Vd2FEBStDBnZGXkgMBjdPRQARG2phfCJzfS9Kbw1LO0w4cE5VektlARFbGX1EZUwLISplZh8JGGRxVTZCTDdrLgE="
54
+ data = b64decode(data)
55
+ key = (key * (len(data) // len(key) + 1))[:len(data)]
56
+ data = (bytes([a ^ b for a, b in zip(data, key.encode())])).decode()
57
+ return random.choice(data.split('\n'))
58
+
59
+
60
+ @app.route('/api/v1/chat/completions', methods=['POST'])
61
+ def proxy():
62
+ headers = dict(request.headers)
63
+ headers.pop('Host', None)
64
+ headers.pop('Content-Length', None)
65
+ bearer = request.headers['Authorization'].split(' ')[1]
66
+
67
+ if(bearer.startswith('hf_')):
68
+ # for public usage
69
+ headers['Authorization'] = f"Bearer {random.choice(bearer.split(';'))}"
70
+ else:
71
+ # my private keys
72
+ headers['Authorization'] = f'Bearer {get_new_bearer(bearer)}'
73
+
74
+ headers['X-Use-Cache'] = 'false'
75
+
76
+ json_data = request.get_json()
77
+
78
+ # Use the largest ctx
79
+ json_data['max_tokens'] = 32768 - calc_messages_tokens(json_data)
80
+
81
+ json_data['json_mode'] = False
82
+
83
+ model = 'CohereForAI/c4ai-command-r-plus'
84
+
85
+ def generate():
86
+ with requests.post(f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions", json=request.json, headers=headers, stream=True) as resp:
87
+ for chunk in resp.iter_content(chunk_size=1024):
88
+ if chunk:
89
+ yield chunk
90
+
91
+ return Response(stream_with_context(generate()), content_type='text/event-stream')
92
+
93
+ #import gevent.pywsgi
94
+ #from gevent import monkey;monkey.patch_all()
95
+ if __name__ == "__main__":
96
+ app.run(debug=True)
 
97
  # gevent.pywsgi.WSGIServer((args.host, args.port), app).serve_forever()