import uvicorn import json import requests from flask import Flask, request, jsonify from flask import Response, stream_with_context app = Flask(__name__) rq = requests.Session() model_names = [ "meta-llama/Meta-Llama-3-70B-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mixtral-8x22B-Instruct-v0.1", "mistralai/Mixtral-8x22B-v0.1", "microsoft/WizardLM-2-8x22B", "microsoft/WizardLM-2-7B", "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", "google/gemma-1.1-7b-it", "databricks/dbrx-instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2", "meta-llama/Llama-2-70b-chat-hf", "cognitivecomputations/dolphin-2.6-mixtral-8x7b", "codellama/CodeLlama-70b-Instruct-hf" ] def DeepinFra_No_stream(Api:str, messages:list ,model:str = "meta-llama/Meta-Llama-3-70B-Instruct", max_tokens: int = 512, temperature: float = 0.7): url = "https://api.deepinfra.com/v1/openai/chat/completions" headers = { "accept": "text/event-stream", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", } data = json.dumps( { 'model': model, 'messages': messages, 'temperature': temperature, 'max_tokens': max_tokens, 'stop': [], 'stream': False }, separators=(',', ':') ) try: result = rq.post(url=url, headers=headers, data=data) return result.json()['choices'][0]['message']['content'] except: return "Response content: " + result.text def DeepinFra_stream(Api:str, messages:list ,model: str = "meta-llama/Meta-Llama-3-70B-Instruct", max_tokens: int = 512, temperature: float = 0.7): url = "https://api.deepinfra.com/v1/openai/chat/completions" headers ={ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", 'Content-Type': 'application/json', 'Accept': 'text/event-stream', } data = json.dumps( { 'model': model, 'messages': messages, 'temperature': temperature, 'max_tokens': max_tokens, 'stream': True }, separators=(',', ':') ) try: result = rq.post(url=url, headers=headers, data=data, stream=True) for line in result.iter_lines(): if line: line = line.decode('utf-8') data_json = line.split('data: ')[1] data = json.loads(data_json) try: content = data['choices'][0]['delta']['content'] yield content except: break except: return "Response content: " + result.text @app.route("/generate-text-deep", methods=["POST"]) def generate_text(): data = request.json message = data.get("message") Api = data.get("api_key") model_name = data.get("model_name", "meta-llama/Meta-Llama-3-70B-Instruct") max_tokens = data.get("max_tokens", 512) temperature = data.get("temperature", 0.7) stream = data.get("stream", True) if not message or not Api: return jsonify({"error": "Missing required fields"}), 400 def generate_response(stream: bool): if stream: for response in DeepinFra_stream(Api=Api, messages=message, model=model_name, max_tokens=max_tokens, temperature=temperature): yield json.dumps({"response": response}) + "\n" else: response = DeepinFra_No_stream(Api=Api, messages=message, model=model_name, max_tokens=max_tokens, temperature=temperature) yield json.dumps({"response": response}) + "\n" return Response(stream_with_context(generate_response(stream)), content_type='application/json'), 200 @app.route("/info", methods=["GET"]) def get_info(): return jsonify({"model_names": model_names}), 200 if __name__=="__main__": app.run()