Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
@@ -32,7 +32,7 @@ def index():
|
|
32 |
</head>
|
33 |
<body>
|
34 |
<h1>Mistral-Nemo OpenAI Compatible API</h1>
|
35 |
-
<li>
|
36 |
<li>2. Set "https://tastypear-mistral-nemo-chat.hf.space/api" as the domain in the client configuration.</li>
|
37 |
If you have multiple keys, you can concatenate them with a semicolon (`;`) to use them randomly, e.g., `hf_aaaa;hf_bbbb;hf_...`
|
38 |
</body>
|
@@ -47,20 +47,17 @@ def proxy():
|
|
47 |
headers.pop('Content-Length', None)
|
48 |
keys = request.headers['Authorization'].split(' ')[1].split(';')
|
49 |
headers['Authorization'] = f'Bearer {random.choice(keys)}'
|
50 |
-
|
51 |
json_data = request.get_json()
|
52 |
|
53 |
-
# Avoid using cache
|
54 |
-
json_data["messages"][-1]['content'] = ' '*random.randint(1, 20)+json_data["messages"][-1]['content']
|
55 |
-
|
56 |
# Use the largest ctx
|
57 |
json_data['max_tokens'] = 32768 - calc_messages_tokens(json_data)
|
58 |
|
59 |
json_data['json_mode'] = False
|
60 |
-
|
|
|
61 |
|
62 |
def generate():
|
63 |
-
model = 'mistralai/Mistral-Nemo-Instruct-2407'
|
64 |
with requests.post(f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions", json=request.json, headers=headers, stream=True) as resp:
|
65 |
for chunk in resp.iter_content(chunk_size=1024):
|
66 |
if chunk:
|
|
|
32 |
</head>
|
33 |
<body>
|
34 |
<h1>Mistral-Nemo OpenAI Compatible API</h1>
|
35 |
+
<li>Create your token(use as api key) <a target="_blank" href="https://huggingface.co/settings/tokens/new">[here]</a> by selecting "serverless Inference API".</li>
|
36 |
<li>2. Set "https://tastypear-mistral-nemo-chat.hf.space/api" as the domain in the client configuration.</li>
|
37 |
If you have multiple keys, you can concatenate them with a semicolon (`;`) to use them randomly, e.g., `hf_aaaa;hf_bbbb;hf_...`
|
38 |
</body>
|
|
|
47 |
headers.pop('Content-Length', None)
|
48 |
keys = request.headers['Authorization'].split(' ')[1].split(';')
|
49 |
headers['Authorization'] = f'Bearer {random.choice(keys)}'
|
50 |
+
headers['X-Use-Cache'] = 'false'
|
51 |
json_data = request.get_json()
|
52 |
|
|
|
|
|
|
|
53 |
# Use the largest ctx
|
54 |
json_data['max_tokens'] = 32768 - calc_messages_tokens(json_data)
|
55 |
|
56 |
json_data['json_mode'] = False
|
57 |
+
|
58 |
+
model = 'mistralai/Mistral-Nemo-Instruct-2407'
|
59 |
|
60 |
def generate():
|
|
|
61 |
with requests.post(f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions", json=request.json, headers=headers, stream=True) as resp:
|
62 |
for chunk in resp.iter_content(chunk_size=1024):
|
63 |
if chunk:
|