|
|
|
|
|
""" |
|
Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro |
|
|
|
Objetivo: Neste script utilizamos um modelo pré-treinado para extrair |
|
Entidades e usamos o pacote logging do python para registrar |
|
nossos LOGs. |
|
""" |
|
import logging |
|
from transformers import pipeline |
|
|
|
class EntityRecognizer: |
|
def __init__(self, model_name="Babelscape/wikineural-multilingual-ner"): |
|
self.model = self.load_model(model_name) |
|
self.logger = self.setup_logger() |
|
|
|
def load_model(self, model_name="Babelscape/wikineural-multilingual-ner"): |
|
|
|
return pipeline("ner", model=model_name, tokenizer=model_name) |
|
|
|
def setup_logger(self): |
|
|
|
logger = logging.getLogger(__name__) |
|
logger.setLevel(logging.INFO) |
|
|
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
file_handler = logging.FileHandler('reconhecimento_de_entidade.log') |
|
file_handler.setLevel(logging.INFO) |
|
file_handler.setFormatter(formatter) |
|
|
|
logger.addHandler(file_handler) |
|
|
|
return logger |
|
|
|
def recognize_entities(self, text): |
|
|
|
entities = self.model(text) |
|
|
|
recognized_entities = [] |
|
|
|
for entity in entities: |
|
entity_text = entity['word'] |
|
entity_type = entity['entity'] |
|
recognized_entities.append((entity_text, entity_type)) |
|
|
|
self.logger.info(f"Entidades reconhecidas: {recognized_entities}") |
|
|
|
return recognized_entities |
|
|
|
def process_classification_result(self, tokens_and_tags): |
|
result = {} |
|
current_type = None |
|
current_entity = "" |
|
|
|
for token, tag in tokens_and_tags: |
|
if tag.startswith("B-"): |
|
if current_type is not None and current_entity: |
|
result[current_entity] = current_type |
|
current_type = tag[2:] |
|
current_entity = token |
|
elif tag.startswith("I-"): |
|
current_entity += " " + token |
|
|
|
if current_type is not None and current_entity: |
|
result[current_entity] = current_type |
|
|
|
return result |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
text = "Eddy e Karina compraram uns tênis na loja Nike." |
|
entity_recognizer = EntityRecognizer() |
|
recognized = entity_recognizer.recognize_entities(text) |
|
print(recognized) |
|
print("🤗🤗🤗") |
|
|
|
result = entity_recognizer.process_classification_result(recognized) |
|
result = {k.replace(" ##", ""): v for k, v in result.items()} |
|
print(result) |
|
|