Spaces:
Runtime error
Runtime error
from typing import Union | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from llama_cpp import Llama | |
class InferenceRequest(BaseModel): | |
input: Union[str, None] = None | |
max_tokens: Union[int, None] = 0 | |
app = FastAPI() | |
llm = Llama(model_path="./models/mistral-7b-openorca.Q4_K_S.gguf", | |
verbose=False, n_ctx=8192) | |
async def root(): | |
return {"message": "Hello World"} | |
async def inference(request: InferenceRequest): | |
input_text = request.input | |
max_tokens = 256 | |
try: | |
max_tokens = int(request.max_tokens) | |
except: | |
pass | |
# process request | |
try: | |
result = llm(input_text, temperature=0.2, | |
top_k=5, max_tokens=max_tokens, stop=["<|im_end|>"]) | |
return result | |
except: | |
pass | |
# create response | |
return {} | |