future-xy commited on
Commit
85e30d4
1 Parent(s): f0ad559

fix generation bugs

Browse files
src/backend/huggingface_generate_until.py CHANGED
@@ -28,7 +28,10 @@ class HFLMwithChatTemplate(HFLMWithMeasurement):
28
  messages = [
29
  {"role": "user", "content": f"{input_string}"},
30
  ]
31
- updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
 
 
 
32
  updated_strings.append(updated_string)
33
  strings = updated_strings[:]
34
  except:
 
28
  messages = [
29
  {"role": "user", "content": f"{input_string}"},
30
  ]
31
+ if "dbrx-instruct" in self.model.name_or_path:
32
+ updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
+ else:
34
+ updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
35
  updated_strings.append(updated_string)
36
  strings = updated_strings[:]
37
  except:
src/backend/manage_requests.py CHANGED
@@ -42,6 +42,9 @@ class EvalRequest:
42
  # A GPTQ model does not need dtype to be specified,
43
  # it will be inferred from the config
44
  pass
 
 
 
45
  else:
46
  raise Exception(f"Unknown precision {self.precision}.")
47
  return model_args
 
42
  # A GPTQ model does not need dtype to be specified,
43
  # it will be inferred from the config
44
  pass
45
+ elif self.precision == "8bit":
46
+ model_args += ",load_in_8bit=True"
47
+ model_args += ",trust_remote_code=True"
48
  else:
49
  raise Exception(f"Unknown precision {self.precision}.")
50
  return model_args
src/backend/run_eval_suite.py CHANGED
@@ -48,7 +48,7 @@ def run_evaluation(
48
  )
49
  # hf-chat is implemented to use apply_chat_template
50
  results = evaluator.simple_evaluate(
51
- model=eval_request.inference_framework, # "hf-causal-experimental", # "hf-causal", hf-chat
52
  model_args=eval_request.get_model_args(),
53
  tasks=task_names,
54
  num_fewshot=num_fewshot,
 
48
  )
49
  # hf-chat is implemented to use apply_chat_template
50
  results = evaluator.simple_evaluate(
51
+ model=eval_request.inference_framework, # "hf-chat", "moe-infinity"
52
  model_args=eval_request.get_model_args(),
53
  tasks=task_names,
54
  num_fewshot=num_fewshot,
src/backend/tasks/selfcheckgpt/task.py CHANGED
@@ -23,13 +23,14 @@ class SelfCheckGPT(ConfigurableTask):
23
  def __init__(self):
24
  super().__init__(config={"metadata": {"version": self.VERSION}})
25
  # these end tokens are hard coded because of the current limitaion of the llm-eval.
26
- self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
 
27
  self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
28
  self.generation_kwargs_sampling = {
29
  "temperature": 0.99,
30
  "do_sample": True,
31
- "until": ["\n\n", "<unk>", "<|im_end|>", "</s>"],
32
- "max_length": 512,
33
  }
34
 
35
  self.selfcheckgpt_type = os.environ.get("SELFCHECKGPTTYPE", "SelfCheckNLI")
 
23
  def __init__(self):
24
  super().__init__(config={"metadata": {"version": self.VERSION}})
25
  # these end tokens are hard coded because of the current limitaion of the llm-eval.
26
+ # self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
27
+ self.generation_kwargs = {"until": ["<im_end>"], "max_length": 1024}
28
  self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
29
  self.generation_kwargs_sampling = {
30
  "temperature": 0.99,
31
  "do_sample": True,
32
+ "until": ["<im_end>", "</s>"],
33
+ "max_length": 1024,
34
  }
35
 
36
  self.selfcheckgpt_type = os.environ.get("SELFCHECKGPTTYPE", "SelfCheckNLI")