Clémentine commited on
Commit
ead4c96
·
1 Parent(s): bde3a6f

Fix tokenizer checks - much simpler

Browse files
Files changed (1) hide show
  1. src/submission/check_validity.py +5 -11
src/submission/check_validity.py CHANGED
@@ -7,7 +7,7 @@ from datetime import datetime, timedelta, timezone
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
- from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
12
 
13
  from src.envs import HAS_HIGHER_RATE_LIMIT
@@ -41,18 +41,12 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
41
  try:
42
  config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
43
  if test_tokenizer:
44
- tokenizer_config = get_tokenizer_config(model_name)
45
- if tokenizer_config not in [None, {}]:
46
- tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
47
- else:
48
- tokenizer_class_candidate = config.tokenizer_class
49
-
50
-
51
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
52
- if tokenizer_class is None:
53
  return (
54
  False,
55
- f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
56
  None
57
  )
58
  return True, None, config
 
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
+ from transformers import AutoConfig, AutoTokenizer
11
  from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
12
 
13
  from src.envs import HAS_HIGHER_RATE_LIMIT
 
41
  try:
42
  config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
43
  if test_tokenizer:
44
+ try:
45
+ AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
46
+ except ValueError as e:
 
 
 
 
 
 
47
  return (
48
  False,
49
+ f"uses a tokenizer which is not in a transformers release: {e}",
50
  None
51
  )
52
  return True, None, config