impresso-project
/

ner-stacked-bert-multilingual

Token Classification

Model card Files Files and versions Community

emanuelaboros commited on Sep 12, 2024

Commit

d0242b2

·

verified ·

1 Parent(s): f6fd959

Update generic_ner.py

Files changed (1) hide show

generic_ner.py +1 -30

generic_ner.py CHANGED Viewed

@@ -111,7 +111,7 @@ def get_entities(tokens, tags, confidences, text):
                         "score": np.average(confidences[idx : idx + len(subtree)])
                         * 100,
                         "index": (idx, idx + len(subtree)),
-                        "word": original_string,
                         "start": entity_start_position,
                         "end": entity_end_position,
                     }
@@ -242,35 +242,6 @@ class MultitaskTokenClassificationPipeline(Pipeline):
             outputs = self.model(input_ids, attention_mask)
         return outputs, text_sentences, text
-    # def _forward(self, inputs):
-    #     inputs, text_sentences, text = inputs
-    #     all_logits = {}
-    #
-    #     for i in range(len(text_sentences)):
-    #         print(inputs["input_ids"][i].shape)
-    #         input_ids = torch.tensor([inputs["input_ids"][i]], dtype=torch.long).to(
-    #             self.model.device
-    #         )
-    #         attention_mask = torch.tensor(
-    #             [inputs["attention_mask"][i]], dtype=torch.long
-    #         ).to(self.model.device)
-    #
-    #         with torch.no_grad():
-    #             outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
-    #
-    #         # Accumulate logits for each task
-    #         if not all_logits:
-    #             all_logits = {task: logits for task, logits in outputs.logits.items()}
-    #         else:
-    #             for task in all_logits:
-    #                 all_logits[task] = torch.cat(
-    #                     (all_logits[task], outputs.logits[task]), dim=1
-    #                 )
-    #
-    #     # Replace outputs.logits with accumulated logits
-    #     outputs.logits = all_logits
-    #
-    #     return outputs, text_sentences, text
     def postprocess(self, outputs, **kwargs):
         """

                         "score": np.average(confidences[idx : idx + len(subtree)])
                         * 100,
                         "index": (idx, idx + len(subtree)),
+                        "word": text[entity_start_position:entity_end_position], #original_string,
                         "start": entity_start_position,
                         "end": entity_end_position,
                     }
             outputs = self.model(input_ids, attention_mask)
         return outputs, text_sentences, text
     def postprocess(self, outputs, **kwargs):
         """