from fastapi import FastAPI from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer app = FastAPI() model_id = "ibleducation/ibl-fordham-7b-mistral" tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=".") model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", cache_dir=".", low_cpu_mem_usage=True, ) pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) @app.get("/") async def root(): return {"message": "Welcome to the Language Model API"} @app.get("/generate/") async def generate_text(prompt: str): # Generate text based on the prompt received prompt = f"[INST]{prompt}" response = pipeline(prompt) return {"generated_text": response['generated_text']}