Spaces:

tastypear
/

mistral-nemo-chat

Runtime error

tastypear commited on Aug 10

Commit

39fbb88

•

1 Parent(s): 5e4853a

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -32,7 +32,7 @@ def index():
         </head>
         <body>
             <h1>Mistral-Nemo OpenAI Compatible API</h1>
-            <li>1. Create your key <a href="https://huggingface.co/settings/tokens/new">[here]</a> with "serverless Inference API" permission selected.</li>
             <li>2. Set "https://tastypear-mistral-nemo-chat.hf.space/api" as the domain in the client configuration.</li>
             If you have multiple keys, you can concatenate them with a semicolon (`;`) to use them randomly, e.g., `hf_aaaa;hf_bbbb;hf_...`
         </body>
@@ -47,20 +47,17 @@ def proxy():
     headers.pop('Content-Length', None)
     keys = request.headers['Authorization'].split(' ')[1].split(';')
     headers['Authorization'] = f'Bearer {random.choice(keys)}'
     json_data = request.get_json()
-    # Avoid using cache
-    json_data["messages"][-1]['content'] = ' '*random.randint(1, 20)+json_data["messages"][-1]['content']
     # Use the largest ctx
     json_data['max_tokens'] = 32768 - calc_messages_tokens(json_data)
     json_data['json_mode'] = False
-    model = json_data['model']
     def generate():
-        model = 'mistralai/Mistral-Nemo-Instruct-2407'
         with requests.post(f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions", json=request.json, headers=headers, stream=True) as resp:
             for chunk in resp.iter_content(chunk_size=1024):
                 if chunk:

         </head>
         <body>
             <h1>Mistral-Nemo OpenAI Compatible API</h1>
+            <li>Create your token(use as api key) <a target="_blank" href="https://huggingface.co/settings/tokens/new">[here]</a> by selecting "serverless Inference API".</li>
             <li>2. Set "https://tastypear-mistral-nemo-chat.hf.space/api" as the domain in the client configuration.</li>
             If you have multiple keys, you can concatenate them with a semicolon (`;`) to use them randomly, e.g., `hf_aaaa;hf_bbbb;hf_...`
         </body>
     headers.pop('Content-Length', None)
     keys = request.headers['Authorization'].split(' ')[1].split(';')
     headers['Authorization'] = f'Bearer {random.choice(keys)}'
+    headers['X-Use-Cache'] = 'false'
     json_data = request.get_json()
     # Use the largest ctx
     json_data['max_tokens'] = 32768 - calc_messages_tokens(json_data)
     json_data['json_mode'] = False
+    model = 'mistralai/Mistral-Nemo-Instruct-2407'
     def generate():
         with requests.post(f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions", json=request.json, headers=headers, stream=True) as resp:
             for chunk in resp.iter_content(chunk_size=1024):
                 if chunk: