|
from fastapi import FastAPI |
|
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer |
|
|
|
app = FastAPI() |
|
|
|
model_id = "ibleducation/ibl-fordham-7b-mistral" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=".") |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
device_map="auto", |
|
cache_dir=".", |
|
low_cpu_mem_usage=True, |
|
) |
|
pipeline = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
) |
|
@app.get("/") |
|
async def root(): |
|
return {"message": "Welcome to the Language Model API"} |
|
|
|
|
|
@app.get("/generate/") |
|
async def generate_text(prompt: str): |
|
|
|
prompt = f"<s>[INST]{prompt}</INST>" |
|
response = pipeline(prompt) |
|
return {"generated_text": response['generated_text']} |
|
|