from fastapi import FastAPI from pydantic import BaseModel from huggingface_hub import InferenceClient app = FastAPI() # Use Hugging Face Inference API (Replace model name if needed) # Charger le modèle et le tokenizer #model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Modèle Mistral 7B #model_name = "HuggingFaceH4/zephyr-3b" #model_name = "serkanarslan/mistral-7b-mini-ft" # Choose a smaller model for free-tier #model_name = "microsoft/phi-2" #model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2, OpenChat, etc. # ✅ Use the full Hugging Face Inference API URL HF_API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0" client = InferenceClient(HF_API_URL) # Define request format class ChatRequest(BaseModel): message: str @app.post("/chat") async def chat(request: ChatRequest): # ✅ Corrected function call with `model` argument response = client.text_generation( request.message, model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", max_new_tokens=100 ) return {"response": response} # ✅ Removed extra quote