import os from flask import Flask, render_template import threading import asyncio from openai import OpenAI # app = Flask(__name__) # client = OpenAI( # # This base_url points to the local Llamafile server running on port 8080 # base_url="http://127.0.0.1:8080/v1", # api_key="sk-no-key-required" # ) API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2" bearer = "Bearer " + os.getenv('TOKEN') headers = {"Authorization": bearer } print("headers") print(headers) app = Flask(__name__) @app.route('/app') def server_app(): llamafile = threading.Thread(target=threadserver) print('This /app will start the llamafile server on thread') llamafile.start() return 'llamafile.start()' @app.route('/') def server_one(): sourcesim = "Results" s1 = "Results" return render_template("similarity_1.html", sourcetxt = sourcesim, s1 = s1 , headertxt = bearer ) # @app.route('/chat', methods=['POST']) # def chat(): # try: # user_message = request.json['message'] # completion = client.chat.completions.create( # model="LLaMA_CPP", # messages=[ # {"role": "system", "content": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests."}, # {"role": "user", "content": user_message} # ] # ) # ai_response = completion.choices[0].message.content # ai_response = ai_response.replace('', '').strip() # return jsonify({'response': ai_response}) # except Exception as e: # print(f"Error: {str(e)}") # return jsonify({'response': f"Sorry, there was an error processing your request: {str(e)}"}), 500 if __name__ == '__main__': app.run(debug=True) def threadserver(): print('hi') os.system(' ./mxbai-embed-large-v1-f16.llamafile --server --nobrowser') async def query(data): response = await requests.post(API_URL, headers=headers, json=data) return response.json()