Clémentine
commited on
Commit
•
ead4c96
1
Parent(s):
bde3a6f
Fix tokenizer checks - much simpler
Browse files
src/submission/check_validity.py
CHANGED
@@ -7,7 +7,7 @@ from datetime import datetime, timedelta, timezone
|
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
9 |
from huggingface_hub.hf_api import ModelInfo
|
10 |
-
from transformers import AutoConfig
|
11 |
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
12 |
|
13 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
@@ -41,18 +41,12 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
41 |
try:
|
42 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
43 |
if test_tokenizer:
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
else:
|
48 |
-
tokenizer_class_candidate = config.tokenizer_class
|
49 |
-
|
50 |
-
|
51 |
-
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
52 |
-
if tokenizer_class is None:
|
53 |
return (
|
54 |
False,
|
55 |
-
f"uses
|
56 |
None
|
57 |
)
|
58 |
return True, None, config
|
|
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
9 |
from huggingface_hub.hf_api import ModelInfo
|
10 |
+
from transformers import AutoConfig, AutoTokenizer
|
11 |
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
12 |
|
13 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
|
|
41 |
try:
|
42 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
43 |
if test_tokenizer:
|
44 |
+
try:
|
45 |
+
AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
46 |
+
except ValueError as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
return (
|
48 |
False,
|
49 |
+
f"uses a tokenizer which is not in a transformers release: {e}",
|
50 |
None
|
51 |
)
|
52 |
return True, None, config
|