pierreguillou
commited on
Commit
•
aac3ac3
1
Parent(s):
6f42898
Update files/functions.py
Browse files- files/functions.py +14 -7
files/functions.py
CHANGED
@@ -52,15 +52,21 @@ import pytesseract
|
|
52 |
|
53 |
## model / feature extractor / tokenizer
|
54 |
|
55 |
-
from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
|
56 |
-
|
57 |
import torch
|
58 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
59 |
|
60 |
-
# model
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
# feature extractor
|
66 |
from transformers import LayoutLMv2FeatureExtractor
|
@@ -68,7 +74,8 @@ feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
|
|
68 |
|
69 |
# tokenizer
|
70 |
from transformers import AutoTokenizer
|
71 |
-
|
|
|
72 |
|
73 |
## Key parameters
|
74 |
|
|
|
52 |
|
53 |
## model / feature extractor / tokenizer
|
54 |
|
|
|
|
|
55 |
import torch
|
56 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
57 |
|
58 |
+
# model 1
|
59 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
60 |
+
model_id = "pierreguillou/lilt-xlm-roberta-base-finetuned-with-DocLayNet-base-at-linelevel-ml384"
|
61 |
+
tokenizer1 = AutoTokenizer.from_pretrained(model_id)
|
62 |
+
model1 = AutoModelForTokenClassification.from_pretrained(model_id);
|
63 |
+
model1.to(device);
|
64 |
+
|
65 |
+
from transformers import LayoutLMv2ForTokenClassification
|
66 |
+
# model 2
|
67 |
+
model_id = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-linelevel-ml384"
|
68 |
+
model2 = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
|
69 |
+
model2.to(device);
|
70 |
|
71 |
# feature extractor
|
72 |
from transformers import LayoutLMv2FeatureExtractor
|
|
|
74 |
|
75 |
# tokenizer
|
76 |
from transformers import AutoTokenizer
|
77 |
+
tokenizer_id = "xlm-roberta-base"
|
78 |
+
tokenizer2 = AutoTokenizer.from_pretrained(tokenizer_id)
|
79 |
|
80 |
## Key parameters
|
81 |
|