from fastapi import FastAPI | |
from pydantic import BaseModel | |
from huggingface_hub import InferenceClient | |
app = FastAPI() | |
# Use Hugging Face Inference API (Replace model name if needed) | |
# Charger le modèle et le tokenizer | |
#model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Modèle Mistral 7B | |
#model_name = "HuggingFaceH4/zephyr-3b" | |
#model_name = "serkanarslan/mistral-7b-mini-ft" | |
# Choose a smaller model for free-tier | |
#model_name = "microsoft/phi-2" | |
#model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2, OpenChat, etc. | |
# ✅ Use the full Hugging Face Inference API URL | |
HF_API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
client = InferenceClient(HF_API_URL) | |
# Define request format | |
class ChatRequest(BaseModel): | |
message: str | |
async def chat(request: ChatRequest): | |
# ✅ Corrected function call with `model` argument | |
response = client.text_generation( | |
request.message, | |
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
max_new_tokens=100 | |
) | |
return {"response": response} # ✅ Removed extra quote | |