from flask import Flask, request, jsonify, render_template from llama_cpp import Llama app = Flask(__name__) llm = Llama(model_path="./yuna-ai-v3-q3_k_m.gguf", verbose=False) @app.route('/') def index(): return render_template('index.html') @app.route('/api/generate', methods=['POST']) def generate(): user_message = request.json['message'] output = llm( f"Yuki: {user_message}\nYuna:", max_tokens=16, stop=["Yuki:", "Yuna:", "\n"], echo=False ) return jsonify({'response': output['choices'][0]['text']}) if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)