pierreguillou commited on
Commit
9c8291e
1 Parent(s): f49e93c

Update files/functions.py

Browse files
Files changed (1) hide show
  1. files/functions.py +20 -20
files/functions.py CHANGED
@@ -50,26 +50,6 @@ print(os.popen(f'cat /etc/issue').read())
50
  print(os.popen(f'apt search tesseract').read())
51
  import pytesseract
52
 
53
- ## model / feature extractor / tokenizer
54
-
55
- from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
56
-
57
- import torch
58
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
59
-
60
- # model
61
- # tokenizer = LayoutXLMTokenizerFast.from_pretrained(model_id)
62
- model = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
63
- model.to(device);
64
-
65
- # feature extractor
66
- from transformers import LayoutLMv2FeatureExtractor
67
- feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
68
-
69
- # tokenizer
70
- from transformers import AutoTokenizer
71
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
72
-
73
  ## Key parameters
74
 
75
  # categories colors
@@ -155,6 +135,26 @@ for lang_t, langcode_t in zip(langs_t,langscode_t):
155
 
156
  langdetect2Tesseract = {v:k for k,v in Tesseract2langdetect.items()}
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  ## General
159
 
160
  # get text and bounding boxes from an image
 
50
  print(os.popen(f'apt search tesseract').read())
51
  import pytesseract
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  ## Key parameters
54
 
55
  # categories colors
 
135
 
136
  langdetect2Tesseract = {v:k for k,v in Tesseract2langdetect.items()}
137
 
138
+ ## model / feature extractor / tokenizer
139
+
140
+ from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
141
+
142
+ import torch
143
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
144
+
145
+ # model
146
+ # tokenizer = LayoutXLMTokenizerFast.from_pretrained(model_id)
147
+ model = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
148
+ model.to(device);
149
+
150
+ # feature extractor
151
+ from transformers import LayoutLMv2FeatureExtractor
152
+ feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
153
+
154
+ # tokenizer
155
+ from transformers import AutoTokenizer
156
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
157
+
158
  ## General
159
 
160
  # get text and bounding boxes from an image