Spaces:

ArabianAI
/

new_space_phi3

Sleeping

File size: 1,004 Bytes

from flask import Flask, request, jsonify
from langchain_community.llms import LlamaCpp
import os 
app = Flask(__name__)

n_gpu_layers = 0
n_batch = 1024


llm = LlamaCpp(
    model_path="Phi-3-mini-4k-instruct-q4.gguf",  # path to GGUF file
    temperature=0.1,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    verbose=True,
    n_ctx=4096
)
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
print("model size ====> :", file_size.st_size, "bytes")


@app.route('/', methods=['POST'])
def get_skills():
    cv_body = request.json.get('cv_body')

    # Simple inference example
    output = llm(
        f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
        max_tokens=256,  # Generate up to 256 tokens
        stop=["<|end|>"], 
        echo=True,  # Whether to echo the prompt
    )

    return jsonify({'skills': output})

@app.get('/health')
def health():
    return jsonify({'status': 'healthy'})

if __name__ == '__main__':
    app.run()