Conrad Lippert-Zajaczkowski commited on
Commit
80ef686
β€’
1 Parent(s): 48f8e5d
handler.py CHANGED
@@ -17,15 +17,16 @@ class EndpointHandler:
17
  def __init__(self, path=""):
18
  # load the model
19
  print('starting to load tokenizer')
20
- tokenizer = LlamaTokenizer.from_pretrained(".", local_files_only=True)
21
  print('loaded tokenizer')
22
  gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
23
  print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
24
  model = LlamaForCausalLM.from_pretrained(
25
- ".",
26
  device_map="auto",
27
  torch_dtype=dtype,
28
- offload_folder="offload"
 
29
  )
30
  gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
31
  print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
 
17
  def __init__(self, path=""):
18
  # load the model
19
  print('starting to load tokenizer')
20
+ tokenizer = LlamaTokenizer.from_pretrained("/repository/orca_tokenizer", local_files_only=True)
21
  print('loaded tokenizer')
22
  gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
23
  print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
24
  model = LlamaForCausalLM.from_pretrained(
25
+ "/repository/pytorch_model",
26
  device_map="auto",
27
  torch_dtype=dtype,
28
+ offload_folder="offload",
29
+ local_files_only=True
30
  )
31
  gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
32
  print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
added_tokens.json β†’ orca_tokenizer/added_tokens.json RENAMED
File without changes
special_tokens_map.json β†’ orca_tokenizer/special_tokens_map.json RENAMED
File without changes
tokenizer.model β†’ orca_tokenizer/tokenizer.model RENAMED
File without changes
tokenizer_config.json β†’ orca_tokenizer/tokenizer_config.json RENAMED
File without changes