Spaces:

zerrin
/

test_space

Runtime error

File size: 6,018 Bytes

315d075

# ---------------------------------------- IMPORTS ---------------------------------------- #

# import flask and flask_cors to host the api
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS

# import the api class
from assets.source import api, non_streamed_format

# import addon
from assets.source.addons import * # here we only use 'create_cloudflare_tunnel' and 'translate' from the addons

# logging module for debugging
import logging

# json module to parse json
from json import loads

# ---------------------------------------- CONFIGURE LOCAL SERVER ---------------------------------------- #

# create flask app
app = Flask(__name__)
app.template_folder = "assets/templates"

# enable cors
CORS(app)

# ---------------------------------------- READ FROM CONFIG FILE ---------------------------------------- #
with (open("assets/config.json", "r")) as f:

    config_file = loads(f.read())

    # copy constants over
    DEBUG: bool = config_file.get("DEBUG", False)
    PORT: int = config_file.get("PORT", 5000)
    HOST: str = config_file.get("HOST", "0.0.0.0")

    # check if user wants to use a global server too
    if config_file["use_global"]:

        # create a cloudflare tunnel
        create_cloudflare_tunnel(PORT)

# ---------------------------------------- LOGGING CONFIG ---------------------------------------- #

# set logging level
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s')

# ---------------------------------------- ROUTES ---------------------------------------- #

# chat generaiton route
@app.route("/chat/completions", methods=["POST"])
def chat():

    # get request data
    data = request.get_json()

    # get messages
    messages = message_translation(data["messages"]) if config_file["use_addons"] else data["messages"]
        
    # get model
    model = translate(data["model"]) if config_file["use_addons"] else data["model"]

    # get max tokens
    max_tokens = data.get("max_tokens", 150)

    # top p and top k
    top_p = data.get("top_p", 0.99)
    top_k = data.get("top_k", 50)

    # temperature, frequency penalty and presence penalty
    temperature = data.get("temperature", 0.6)

    # frequency penalty
    frequency_penalty = data.get("frequency_penalty", 1)

    # presence penalty
    presence_penalty = data.get("presence_penalty", 1)

    # streaming function. uses text/event-stream instead of application/json
    def stream():

        # generate chat
        for chunk in api.chat(messages, 
                              model, 
                              stream=True, 
                              max_tokens=max_tokens, 
                              top_p=top_p, 
                              temperature=temperature, 
                              frequency_penalty=frequency_penalty, 
                              presence_penalty=presence_penalty,
                                top_k=top_k
        ):

            # yield chat
            #print(chunk)
            yield chunk + b'\n\n'

        # in the end, return done
        yield b'data: [DONE]'

    # check if user wants to stream
    if data.get("stream"):

        # log
        logging.info(f"Streaming requested for model {model}\n")

        # return stream
        return app.response_class(stream(), mimetype='text/event-stream')
    
    # even if not, stream but collect all data to a full string
    else:

        # log
        logging.info(f"Non-streaming requested for model {model}\n")

        # pre-init
        full: str = ""

        # generate chat
        for chunk in api.chat(messages, 
                              model, 
                              stream=True, 
                              max_tokens=max_tokens, 
                              top_p=top_p, 
                              temperature=temperature, 
                              frequency_penalty=frequency_penalty, 
                              presence_penalty=presence_penalty,
                              top_k=top_k
        ):

            try:

                # append chunk
                full += loads(chunk.decode("utf-8").removeprefix('data: '))["choices"][0]["delta"]["content"]

            except: pass

        # return full
        return jsonify(non_streamed_format(model, full))


# route to get all models
@app.route("/models", methods=["GET"])
def get_models():

    # return models
    return jsonify(api.get_models())

# root route to check if api is online
@app.route("/", methods=["GET"])
def root():
    
        # return root
        return render_template("index.html")

# ---------------------------------------- ERROR HANDLING ---------------------------------------- #
@app.errorhandler(403)
def forbidden(error):

    # return 403
    return jsonify(

        {"status": False},
        {'error': [

            {'message': 'Something went wrong, the API was blocked from sending a request to the DeepInfra API. Please try again later.'},
            {'tpye': 'forbidden'},
            {'error': f'{error}'}
        ]},
        {'hint': 'please report issues on the github page'}
    ), 403

@app.errorhandler(500)
def internal_server_error(error):
    
        # return 500
        return jsonify(
    
            {"status": False},
            {'error': [
    
                {'message': 'Something went wrong, the API was unable to complete your request. Please try again later.'},
                {'tpye': 'internal server error'},
                {'error': f'{error}'}
            ]},
            {'hint': 'please report issues on the github page'}
        ), 500

# ---------------------------------------- START API ---------------------------------------- #

# start the api
if __name__ == "__main__":

    app.run(debug=DEBUG, port=PORT, host=HOST)

# Path: app.py