Norm commited on
Commit
5dcf050
1 Parent(s): c97be5f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -10
README.md CHANGED
@@ -9,6 +9,7 @@ license: afl-3.0
9
 
10
  **A Quick Example**
11
  ```python
 
12
  from networks.modeling_erine_layout import ErnieLayoutConfig, ErnieLayoutForQuestionAnswering
13
  from networks.feature_extractor import ErnieFeatureExtractor
14
  from networks.tokenizer import ErnieLayoutTokenizer
@@ -16,30 +17,33 @@ from networks.model_util import ernie_qa_tokenize, prepare_context_info
16
  from PIL import Image
17
 
18
 
19
- pretrain_torch_model_or_path = "path/to/pretrained-model"
 
 
 
20
 
21
  # initialize tokenizer
22
  tokenizer = ErnieLayoutTokenizer.from_pretrained(pretrained_model_name_or_path=pretrain_torch_model_or_path)
23
  context = ['This is an example document', 'All ocr boxes are inserted into this list']
24
- layout = [[381, 91, 505, 115], [738, 96, 804, 122]]
25
 
26
- # intialize feature extractor
27
  feature_extractor = ErnieFeatureExtractor()
28
 
29
  # Tokenize context & questions
30
- context_encodings, = prepare_context_info(tokenizer, context, layout)
31
  question = "what is it?"
32
  tokenized_res = ernie_qa_tokenize(tokenizer, question, context_encodings)
 
 
33
 
34
  # answer start && end index
35
- tokenized_res['start_positions'] = 6
36
- tokenized_res['end_positions'] = 12
37
 
38
- # open the image of the document
39
- pil_image = Image.open("/path/to/image").convert("RGB")
40
 
41
- # Process image
42
- tokenized_res['pixel_values'] = feature_extractor(pil_image)
43
 
44
 
45
  # initialize config
@@ -51,7 +55,9 @@ model = ErnieLayoutForQuestionAnswering.from_pretrained(
51
  pretrained_model_name_or_path=pretrain_torch_model_or_path,
52
  config=config,
53
  )
 
54
 
55
  output = model(**tokenized_res)
56
 
 
57
  ```
 
9
 
10
  **A Quick Example**
11
  ```python
12
+ import torch
13
  from networks.modeling_erine_layout import ErnieLayoutConfig, ErnieLayoutForQuestionAnswering
14
  from networks.feature_extractor import ErnieFeatureExtractor
15
  from networks.tokenizer import ErnieLayoutTokenizer
 
17
  from PIL import Image
18
 
19
 
20
+ pretrain_torch_model_or_path = "path/to/pretrained/mode"
21
+ doc_imag_path = "path/to/doc/image"
22
+
23
+ device = torch.device("cuda:0")
24
 
25
  # initialize tokenizer
26
  tokenizer = ErnieLayoutTokenizer.from_pretrained(pretrained_model_name_or_path=pretrain_torch_model_or_path)
27
  context = ['This is an example document', 'All ocr boxes are inserted into this list']
28
+ layout = [[381, 91, 505, 115], [738, 96, 804, 122]] # all boxes are resized between 0 - 1000
29
 
30
+ # initialize feature extractor
31
  feature_extractor = ErnieFeatureExtractor()
32
 
33
  # Tokenize context & questions
34
+ context_encodings = prepare_context_info(tokenizer, context, layout, add_special_tokens=False)
35
  question = "what is it?"
36
  tokenized_res = ernie_qa_tokenize(tokenizer, question, context_encodings)
37
+ tokenized_res['input_ids'] = torch.tensor([tokenized_res['input_ids']]).to(device)
38
+ tokenized_res['bbox'] = torch.tensor([tokenized_res['bbox']]).to(device)
39
 
40
  # answer start && end index
41
+ tokenized_res['start_positions'] = torch.tensor([6]).to(device)
42
+ tokenized_res['end_positions'] = torch.tensor([12]).to(device)
43
 
 
 
44
 
45
+ # open the image of the document and process image
46
+ tokenized_res['pixel_values'] = feature_extractor(Image.open(doc_imag_path).convert("RGB")).unsqueeze(0).to(device)
47
 
48
 
49
  # initialize config
 
55
  pretrained_model_name_or_path=pretrain_torch_model_or_path,
56
  config=config,
57
  )
58
+ model.to(device)
59
 
60
  output = model(**tokenized_res)
61
 
62
+
63
  ```