from fastapi import FastAPI
from pydantic import BaseModel
from huggingface_hub import InferenceClient

app = FastAPI()

# Use Hugging Face Inference API (Replace model name if needed)
# Charger le modèle et le tokenizer
#model_name = "mistralai/Mistral-7B-Instruct-v0.1"  # Modèle Mistral 7B
#model_name = "HuggingFaceH4/zephyr-3b"
#model_name = "serkanarslan/mistral-7b-mini-ft"
# Choose a smaller model for free-tier
#model_name = "microsoft/phi-2"
#model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # You can switch to Phi-2, OpenChat, etc.


# ✅ Use the full Hugging Face Inference API URL
HF_API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0"
client = InferenceClient(HF_API_URL)

# Define request format
class ChatRequest(BaseModel):
    message: str

@app.post("/chat")
async def chat(request: ChatRequest):
    # ✅ Corrected function call with `model` argument
    response = client.text_generation(
        request.message, 
        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", 
        max_new_tokens=100
    )
    return {"response": response}  # ✅ Removed extra quote