Files changed (1) hide show
  1. main.py +5 -12
main.py CHANGED
@@ -41,30 +41,23 @@ llms = {
41
  "suffix": "<|endoftext|><|assistant|>"
42
  }
43
  }
44
-
45
- #Pydantic object
46
- class validation(BaseModel):
47
- prompt: str
48
- llm: str
49
- max_tokens: int = 512
50
- nctx: int = 2048
51
 
52
 
53
  #Fast API
54
  app = FastAPI()
55
 
56
  @app.post("/llm_on_cpu")
57
- async def stream(item: validation):
58
 
59
- model = llms[item.llm]
60
  prefix=model['prefix']
61
  suffix=model['suffix']
62
- nctx = item.nctx if item.nctx is not None else model['nctx']
63
- max_tokens = item.max_tokens if item.max_tokens is not None else 512
64
  user="""
65
  {prompt}"""
66
 
67
  llm = Llama(model_path="./code/"+model['file'], n_ctx=model['nctx'], verbose=False, n_threads=8)
68
 
69
- prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
70
  return llm(prompt, max_tokens=max_tokens)
 
41
  "suffix": "<|endoftext|><|assistant|>"
42
  }
43
  }
 
 
 
 
 
 
 
44
 
45
 
46
  #Fast API
47
  app = FastAPI()
48
 
49
  @app.post("/llm_on_cpu")
50
+ async def stream(item: dict):
51
 
52
+ model = llms[item['llm']]
53
  prefix=model['prefix']
54
  suffix=model['suffix']
55
+ nctx = item['nctx'] if 'nctx' is not in item.keys() else model['nctx']
56
+ max_tokens = item['max_tokens'] if 'max_tokens' is not in item.keys() else 512
57
  user="""
58
  {prompt}"""
59
 
60
  llm = Llama(model_path="./code/"+model['file'], n_ctx=model['nctx'], verbose=False, n_threads=8)
61
 
62
+ prompt = f"{prefix}{user.replace('{prompt}', item['prompt'])}{suffix}"
63
  return llm(prompt, max_tokens=max_tokens)