from flask import Flask, request, jsonify, render_template from llama_cpp import Llama app = Flask(__name__) llm = Llama.from_pretrained( repo_id="yukiarimo/yuna-ai-v3", filename="yuna-ai-v3-q3_k_m.gguf", verbose=False ) @app.route('/') def index(): return render_template('index.html') @app.route('/api/generate', methods=['POST']) def generate(): user_message = request.json['message'] output = llm( f"Q: {user_message}\nA:", max_tokens=32, stop=["Q:", "\n"], echo=False ) return jsonify({'response': output}) if __name__ == '__main__': app.run(host='0.0.0.0', port=5000)