chivier commited on
Commit
515ea1a
·
1 Parent(s): b077d7d

sync from github

Browse files
src/backend/tasks/arena_hard/task.py CHANGED
@@ -72,7 +72,7 @@ class ArenaHard(ConfigurableTask):
72
  super().__init__(config={"metadata": {"version": self.VERSION}})
73
  # these end tokens are hard coded because of the current limitaion of the llm-eval.
74
  # self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
75
- self.generation_kwargs = {"until": ["</s>", "<|im_end|>"], "max_length": 4096}
76
  # self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
77
  # self.generation_kwargs_sampling = {
78
  # "temperature": 0.99,
 
72
  super().__init__(config={"metadata": {"version": self.VERSION}})
73
  # these end tokens are hard coded because of the current limitaion of the llm-eval.
74
  # self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
75
+ self.generation_kwargs = {"until": ["</s>", "<|im_end|>"], "max_gen_toks": 4096}
76
  # self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
77
  # self.generation_kwargs_sampling = {
78
  # "temperature": 0.99,
src/display/utils.py CHANGED
@@ -188,6 +188,7 @@ class InferenceFramework(Enum):
188
  HF_Chat = ModelDetails("hf-chat")
189
  VLLM = ModelDetails("vllm_moe")
190
  TRTLLM = ModelDetails("tensorrt_llm")
 
191
  Unknown = ModelDetails("?")
192
 
193
  def to_str(self):
@@ -203,6 +204,8 @@ class InferenceFramework(Enum):
203
  return InferenceFramework.HF_Chat
204
  if inference_framework in ["vllm_moe"]:
205
  return InferenceFramework.VLLM
 
 
206
  return InferenceFramework.Unknown
207
 
208
  class GPUType(Enum):
 
188
  HF_Chat = ModelDetails("hf-chat")
189
  VLLM = ModelDetails("vllm_moe")
190
  TRTLLM = ModelDetails("tensorrt_llm")
191
+ VLLM_FIX = ModelDetails("vllm_moe_fixbs")
192
  Unknown = ModelDetails("?")
193
 
194
  def to_str(self):
 
204
  return InferenceFramework.HF_Chat
205
  if inference_framework in ["vllm_moe"]:
206
  return InferenceFramework.VLLM
207
+ if inference_framework in ["vllm_moe_fixbs"]:
208
+ return InferenceFramework.VLLM_FIX
209
  return InferenceFramework.Unknown
210
 
211
  class GPUType(Enum):