import fastapi import uvicorn from fastapi.middleware.cors import CORSMiddleware from ctransformers import AutoModelForCausalLM from pydantic import BaseModel llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-V1.0-Uncensored-GGML", model_file="wizardlm-7b-v1.0-uncensored.ggmlv3.q2_K.bin", model_type="llama") app = fastapi.FastAPI(title="WizardLM-7B-V1.0-Uncensored") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/") async def index(): return {"message": "Hello World"} class InferenceRequest(BaseModel): input: str @app.post("/inference") async def inference(request: InferenceRequest): generated_text = llm(request.input) return {"generated_text": generated_text} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000)