import os from fastapi import FastAPI from pydantic import BaseModel from huggingface_hub import InferenceClient # Initialize FastAPI app app = FastAPI() # Get the API key securely from the environment variables API_KEY = os.getenv("HF_API_KEY") # The key name matches the one you set in Secrets MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct" # Initialize Hugging Face Inference Client client = InferenceClient(api_key=API_KEY) # Define input data model class ChatInput(BaseModel): role: str content: str @app.post("/chat") async def chat(input_data: ChatInput): try: # Prepare input messages for the model messages = [ { "role": input_data.role, "content": input_data.content } ] # Get completion from the Hugging Face model completion = client.chat.completions.create( model=MODEL_NAME, messages=messages, max_tokens=500 ) # Extract and return the response return { "response": completion.choices[0].message } except Exception as e: return {"error": str(e)}