Spaces:

sparse-generative-ai
/

open-moe-llm-leaderboard

Running

future-xy commited on Mar 31, 2024

Commit

85e30d4

1 Parent(s): f0ad559

fix generation bugs

Files changed (4) hide show

src/backend/huggingface_generate_until.py CHANGED Viewed

@@ -28,7 +28,10 @@ class HFLMwithChatTemplate(HFLMWithMeasurement):
                     messages = [
                         {"role": "user", "content": f"{input_string}"},
                     ]
-                    updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
                     updated_strings.append(updated_string)
                 strings = updated_strings[:]
             except:

                     messages = [
                         {"role": "user", "content": f"{input_string}"},
                     ]
+                    if "dbrx-instruct" in self.model.name_or_path:
+                        updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+                    else:
+                        updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
                     updated_strings.append(updated_string)
                 strings = updated_strings[:]
             except:

src/backend/manage_requests.py CHANGED Viewed

@@ -42,6 +42,9 @@ class EvalRequest:
             # A GPTQ model does not need dtype to be specified,
             # it will be inferred from the config
             pass
         else:
             raise Exception(f"Unknown precision {self.precision}.")
         return model_args

             # A GPTQ model does not need dtype to be specified,
             # it will be inferred from the config
             pass
+        elif self.precision == "8bit":
+            model_args += ",load_in_8bit=True"
+            model_args += ",trust_remote_code=True"
         else:
             raise Exception(f"Unknown precision {self.precision}.")
         return model_args

src/backend/run_eval_suite.py CHANGED Viewed

@@ -48,7 +48,7 @@ def run_evaluation(
     )
     # hf-chat is implemented to use apply_chat_template
     results = evaluator.simple_evaluate(
-        model=eval_request.inference_framework,  # "hf-causal-experimental",  # "hf-causal", hf-chat
         model_args=eval_request.get_model_args(),
         tasks=task_names,
         num_fewshot=num_fewshot,

     )
     # hf-chat is implemented to use apply_chat_template
     results = evaluator.simple_evaluate(
+        model=eval_request.inference_framework,  # "hf-chat", "moe-infinity"
         model_args=eval_request.get_model_args(),
         tasks=task_names,
         num_fewshot=num_fewshot,

src/backend/tasks/selfcheckgpt/task.py CHANGED Viewed

@@ -23,13 +23,14 @@ class SelfCheckGPT(ConfigurableTask):
     def __init__(self):
         super().__init__(config={"metadata": {"version": self.VERSION}})
         # these end tokens are hard coded because of the current limitaion of the llm-eval.
-        self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
         self.generation_kwargs_sampling_number = 5  # the number of sampling for self-consistence
         self.generation_kwargs_sampling = {
             "temperature": 0.99,
             "do_sample": True,
-            "until": ["\n\n", "<unk>", "<|im_end|>", "</s>"],
-            "max_length": 512,
         }
         self.selfcheckgpt_type = os.environ.get("SELFCHECKGPTTYPE", "SelfCheckNLI")

     def __init__(self):
         super().__init__(config={"metadata": {"version": self.VERSION}})
         # these end tokens are hard coded because of the current limitaion of the llm-eval.
+        # self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
+        self.generation_kwargs = {"until": ["<im_end>"], "max_length": 1024}
         self.generation_kwargs_sampling_number = 5  # the number of sampling for self-consistence
         self.generation_kwargs_sampling = {
             "temperature": 0.99,
             "do_sample": True,
+            "until": ["<im_end>", "</s>"],
+            "max_length": 1024,
         }
         self.selfcheckgpt_type = os.environ.get("SELFCHECKGPTTYPE", "SelfCheckNLI")