|
import uvicorn |
|
import json |
|
import requests |
|
from flask import Flask, request, jsonify |
|
from flask import Response, stream_with_context |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
rq = requests.Session() |
|
|
|
model_names = [ |
|
"meta-llama/Meta-Llama-3-70B-Instruct", |
|
"meta-llama/Meta-Llama-3-8B-Instruct", |
|
"mistralai/Mixtral-8x22B-Instruct-v0.1", |
|
"mistralai/Mixtral-8x22B-v0.1", |
|
"microsoft/WizardLM-2-8x22B", |
|
"microsoft/WizardLM-2-7B", |
|
"HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", |
|
"google/gemma-1.1-7b-it", |
|
"databricks/dbrx-instruct", |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1", |
|
"mistralai/Mistral-7B-Instruct-v0.2", |
|
"meta-llama/Llama-2-70b-chat-hf", |
|
"cognitivecomputations/dolphin-2.6-mixtral-8x7b", |
|
"codellama/CodeLlama-70b-Instruct-hf" |
|
] |
|
|
|
|
|
|
|
|
|
def DeepinFra_No_stream(Api:str, messages:list ,model:str = "meta-llama/Meta-Llama-3-70B-Instruct", max_tokens: int = 512, temperature: float = 0.7): |
|
|
|
url = "https://api.deepinfra.com/v1/openai/chat/completions" |
|
headers = { |
|
"accept": "text/event-stream", |
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", |
|
|
|
} |
|
|
|
|
|
|
|
|
|
data = json.dumps( |
|
{ |
|
'model': model, |
|
'messages': messages, |
|
'temperature': temperature, |
|
'max_tokens': max_tokens, |
|
'stop': [], |
|
'stream': False |
|
}, separators=(',', ':') |
|
) |
|
|
|
try: |
|
result = rq.post(url=url, headers=headers, data=data) |
|
|
|
return result.json()['choices'][0]['message']['content'] |
|
except: |
|
|
|
|
|
return "Response content: " + result.text |
|
|
|
def DeepinFra_stream(Api:str, messages:list ,model: str = "meta-llama/Meta-Llama-3-70B-Instruct", max_tokens: int = 512, temperature: float = 0.7): |
|
|
|
url = "https://api.deepinfra.com/v1/openai/chat/completions" |
|
headers ={ |
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", |
|
'Content-Type': 'application/json', |
|
'Accept': 'text/event-stream', |
|
} |
|
|
|
|
|
|
|
|
|
data = json.dumps( |
|
{ |
|
'model': model, |
|
'messages': messages, |
|
'temperature': temperature, |
|
'max_tokens': max_tokens, |
|
'stream': True |
|
}, separators=(',', ':') |
|
) |
|
|
|
try: |
|
result = rq.post(url=url, headers=headers, data=data, stream=True) |
|
|
|
for line in result.iter_lines(): |
|
if line: |
|
line = line.decode('utf-8') |
|
data_json = line.split('data: ')[1] |
|
data = json.loads(data_json) |
|
try: |
|
content = data['choices'][0]['delta']['content'] |
|
yield content |
|
except: |
|
break |
|
except: |
|
|
|
|
|
return "Response content: " + result.text |
|
|
|
|
|
|
|
@app.route("/generate-text-deep", methods=["POST"]) |
|
def generate_text(): |
|
data = request.json |
|
message = data.get("message") |
|
Api = data.get("api_key") |
|
model_name = data.get("model_name", "meta-llama/Meta-Llama-3-70B-Instruct") |
|
max_tokens = data.get("max_tokens", 512) |
|
temperature = data.get("temperature", 0.7) |
|
stream = data.get("stream", True) |
|
|
|
if not message or not Api: |
|
return jsonify({"error": "Missing required fields"}), 400 |
|
|
|
def generate_response(stream: bool): |
|
if stream: |
|
for response in DeepinFra_stream(Api=Api, messages=message, model=model_name, max_tokens=max_tokens, |
|
temperature=temperature): |
|
yield json.dumps({"response": response}) + "\n" |
|
else: |
|
response = DeepinFra_No_stream(Api=Api, messages=message, model=model_name, max_tokens=max_tokens, |
|
temperature=temperature) |
|
yield json.dumps({"response": response}) + "\n" |
|
|
|
return Response(stream_with_context(generate_response(stream)), content_type='application/json'), 200 |
|
|
|
|
|
|
|
@app.route("/info", methods=["GET"]) |
|
def get_info(): |
|
return jsonify({"model_names": model_names}), 200 |
|
|
|
|
|
|
|
if __name__=="__main__": |
|
app.run() |