IFEvalTR

Runtime error

App Files Files Community

Clémentine commited on Jan 3

Commit

90fa47e

•

1 Parent(s): 3df8919

Incorrectly tagged merges are now flagged

Browse files

Files changed (3) hide show

src/leaderboard/filter_models.py +10 -3
src/leaderboard/read_evals.py +31 -10
src/submission/check_validity.py +1 -2

src/leaderboard/filter_models.py CHANGED Viewed

@@ -40,6 +40,7 @@ FLAGGED_MODELS = {
     "rwitz2/pee": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
     "zyh3826 / GML-Mistral-merged-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/503",
     "dillfrescott/trinity-medium": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
 }
 # Models which have been requested by orgs to not be submitted on the leaderboard
@@ -53,10 +54,16 @@ DO_NOT_SUBMIT_MODELS = [
 def flag_models(leaderboard_data: list[dict]):
     for model_data in leaderboard_data:
-        if model_data["model_name_for_query"] in FLAGGED_MODELS:
-            issue_num = FLAGGED_MODELS[model_data["model_name_for_query"]].split("/")[-1]
             issue_link = model_hyperlink(
-                FLAGGED_MODELS[model_data["model_name_for_query"]],
                 f"See discussion #{issue_num}",
             )
             model_data[

     "rwitz2/pee": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
     "zyh3826 / GML-Mistral-merged-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/503",
     "dillfrescott/trinity-medium": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
+    "merged": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
 }
 # Models which have been requested by orgs to not be submitted on the leaderboard
 def flag_models(leaderboard_data: list[dict]):
     for model_data in leaderboard_data:
+        # Merges are flagged automatically
+        if model_data[AutoEvalColumn.flagged.name] == True:
+            flag_key = "merged"
+        else:
+            flag_key = model_data["model_name_for_query"]
+        if flag_key in FLAGGED_MODELS:
+            issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
             issue_link = model_hyperlink(
+                FLAGGED_MODELS[flag_key],
                 f"See discussion #{issue_num}",
             )
             model_data[

src/leaderboard/read_evals.py CHANGED Viewed

@@ -11,7 +11,7 @@ from huggingface_hub import ModelCard
 from src.display.formatting import make_clickable_model
 from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
-from src.submission.check_validity import is_model_on_hub
 @dataclass
@@ -32,7 +32,8 @@ class EvalResult:
     num_params: int = 0
     date: str = "" # submission date of request file
     still_on_hub: bool = False
-    merge: bool = False
     @classmethod
     def init_from_json_file(self, json_filepath):
@@ -60,11 +61,6 @@ class EvalResult:
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
-        try:
-            merge = any(t in ["merge", "mergedlm"] for t in ModelCard.load(full_model).data.tags)
-        except Exception:
-            merge = False
         still_on_hub, error, model_config = is_model_on_hub(
             full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
         )
@@ -74,6 +70,28 @@ class EvalResult:
             if architectures:
                 architecture = ";".join(architectures)
         # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
@@ -112,7 +130,8 @@ class EvalResult:
             revision= config.get("model_sha", ""),
             still_on_hub=still_on_hub,
             architecture=architecture,
-            merge=merge
         )
     def update_with_request_file(self, requests_path):
@@ -138,8 +157,8 @@ class EvalResult:
             "eval_name": self.eval_name,  # not a column, just a save name,
             AutoEvalColumn.precision.name: self.precision.value.name,
             AutoEvalColumn.model_type.name: self.model_type.value.name,
-            AutoEvalColumn.merged.name: self.merge,
-            AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
             AutoEvalColumn.weight_type.name: self.weight_type.value.name,
             AutoEvalColumn.architecture.name: self.architecture,
             AutoEvalColumn.model.name: make_clickable_model(self.full_model),
@@ -150,6 +169,8 @@ class EvalResult:
             AutoEvalColumn.likes.name: self.likes,
             AutoEvalColumn.params.name: self.num_params,
             AutoEvalColumn.still_on_hub.name: self.still_on_hub,
         }
         for task in Tasks:

 from src.display.formatting import make_clickable_model
 from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
+from src.submission.check_validity import is_model_on_hub, check_model_card
 @dataclass
     num_params: int = 0
     date: str = "" # submission date of request file
     still_on_hub: bool = False
+    is_merge: bool = False
+    flagged: bool = False
     @classmethod
     def init_from_json_file(self, json_filepath):
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
         still_on_hub, error, model_config = is_model_on_hub(
             full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
         )
             if architectures:
                 architecture = ";".join(architectures)
+        # If the model doesn't have a model card or a license, we consider it's deleted
+        if still_on_hub:
+            try:
+                if check_model_card(full_model)[0] is False:
+                    still_on_hub = False
+            except Exception:
+                still_on_hub = False
+        # Check if the model is a merge
+        is_merge_from_metadata = False
+        flagged = False
+        if still_on_hub:
+            model_card = ModelCard.load(full_model)
+            if model_card.data.tags:
+                is_merge_from_metadata = "merge" in model_card.data.tags
+            merge_keywords = ["mergekit", "merged model", "merge model"]
+            # If the model is a merge but not saying it in the metadata, we flag it
+            is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
+            flagged = is_merge_from_model_card and not is_merge_from_metadata
         # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
             revision= config.get("model_sha", ""),
             still_on_hub=still_on_hub,
             architecture=architecture,
+            is_merge=is_merge_from_metadata,
+            flagged=flagged,
         )
     def update_with_request_file(self, requests_path):
             "eval_name": self.eval_name,  # not a column, just a save name,
             AutoEvalColumn.precision.name: self.precision.value.name,
             AutoEvalColumn.model_type.name: self.model_type.value.name,
+            AutoEvalColumn.merged.name: self.is_merge,
+            AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol, # + "🥦" if self.is_merge,
             AutoEvalColumn.weight_type.name: self.weight_type.value.name,
             AutoEvalColumn.architecture.name: self.architecture,
             AutoEvalColumn.model.name: make_clickable_model(self.full_model),
             AutoEvalColumn.likes.name: self.likes,
             AutoEvalColumn.params.name: self.num_params,
             AutoEvalColumn.still_on_hub.name: self.still_on_hub,
+            AutoEvalColumn.flagged.name: self.flagged
         }
         for task in Tasks:

src/submission/check_validity.py CHANGED Viewed

@@ -8,7 +8,6 @@ import huggingface_hub
 from huggingface_hub import ModelCard
 from huggingface_hub.hf_api import ModelInfo
 from transformers import AutoConfig, AutoTokenizer
-from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
 from src.envs import HAS_HIGHER_RATE_LIMIT
@@ -39,7 +38,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
 def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
     try:
-        config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
         if test_tokenizer:
             try:
                 tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)

 from huggingface_hub import ModelCard
 from huggingface_hub.hf_api import ModelInfo
 from transformers import AutoConfig, AutoTokenizer
 from src.envs import HAS_HIGHER_RATE_LIMIT
 def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
     try:
+        config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
         if test_tokenizer:
             try:
                 tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)