notus-7B-v1-GGUF / main.py
limcheekin's picture
feat: added GZipMiddleware for gzip response
575e725
raw
history blame
746 Bytes
from llama_cpp.server.app import create_app, Settings
from fastapi.responses import HTMLResponse
from fastapi.middleware.gzip import GZipMiddleware
import os
app = create_app(
Settings(
n_threads=2, # set to number of cpu cores
model="model/gguf-model.bin",
embedding=True
)
)
app.add_middleware(GZipMiddleware, minimum_size=1000)
# Read the content of index.html once and store it in memory
with open("index.html", "r") as f:
content = f.read()
@app.get("/", response_class=HTMLResponse)
async def read_items():
return content
if __name__ == "__main__":
import uvicorn
uvicorn.run(app,
host=os.environ["HOST"],
port=int(os.environ["PORT"])
)