Spaces:
Build error
Build error
Update DiT_Extractor/sentence_extractor.py
Browse files
DiT_Extractor/sentence_extractor.py
CHANGED
@@ -118,7 +118,7 @@ def format_output_contexts(sections_per_page):
|
|
118 |
whitespaced_text = ' '.join([word[0] for word in word_section])
|
119 |
words_info = []
|
120 |
for word in word_section:
|
121 |
-
words_info.append({'word_text
|
122 |
|
123 |
context_row = {'text':text_section, 'whitespaced_text':whitespaced_text, 'page_idx':int(page_idx), 'words_info':words_info}
|
124 |
context_id = 'context_{0}'.format(len(all_contexts))
|
|
|
118 |
whitespaced_text = ' '.join([word[0] for word in word_section])
|
119 |
words_info = []
|
120 |
for word in word_section:
|
121 |
+
words_info.append({'word_text':word[0], 'char_indices':word[1], 'word_bbox':word[2]})
|
122 |
|
123 |
context_row = {'text':text_section, 'whitespaced_text':whitespaced_text, 'page_idx':int(page_idx), 'words_info':words_info}
|
124 |
context_id = 'context_{0}'.format(len(all_contexts))
|