Update handler.py
Browse files- handler.py +2 -1
handler.py
CHANGED
@@ -8,11 +8,12 @@ MAX_TOKENS=8192
|
|
8 |
|
9 |
class EndpointHandler():
|
10 |
def __init__(self, data):
|
11 |
-
self.model = Llama.from_pretrained("lmstudio-ai/gemma-2b-it-GGUF", filename="gemma-2b-it-
|
12 |
|
13 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
14 |
args = gem.get_args_or_none(data)
|
15 |
fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{prompt} <endofturn>\n<startofturn>model"
|
|
|
16 |
if not args[0]:
|
17 |
return {
|
18 |
"status": args["status"],
|
|
|
8 |
|
9 |
class EndpointHandler():
|
10 |
def __init__(self, data):
|
11 |
+
self.model = Llama.from_pretrained("lmstudio-ai/gemma-2b-it-GGUF", filename="gemma-2b-it-q4_k_m.gguf", n_ctx=8192)
|
12 |
|
13 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
14 |
args = gem.get_args_or_none(data)
|
15 |
fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{prompt} <endofturn>\n<startofturn>model"
|
16 |
+
print(args, fmat)
|
17 |
if not args[0]:
|
18 |
return {
|
19 |
"status": args["status"],
|