import logging import os from typing import List, Tuple import gradio as gr import pandas as pd import spacy from transformers import AutoModelForTokenClassification, AutoTokenizer try: nlp = spacy.load("pt_core_news_sm") except Exception: os.system("python -m spacy download pt_core_news_sm") nlp = spacy.load("pt_core_news_sm") model = AutoModelForTokenClassification.from_pretrained("Emanuel/bertimbau-base-pos") tokenizer = AutoTokenizer.from_pretrained("Emanuel/bertimbau-base-pos") logger = logging.getLogger() logger.setLevel(logging.DEBUG) def predict(text, nlp, logger=None) -> Tuple[List[str], List[str]]: doc = nlp(text) tokens = [token.text for token in doc] logger.info("Starting predictions for sentence: {}".format(text)) input_tokens = tokenizer( tokens, return_tensors="pt", is_split_into_words=True, return_offsets_mapping=True, return_special_tokens_mask=True, ) output = model(input_tokens["input_ids"]) i_token = 0 labels = [] for off, is_special_token, pred in zip( input_tokens["offset_mapping"][0], input_tokens["special_tokens_mask"][0], output.logits[0], ): if is_special_token or off[0] > 0: continue label = model.config.__dict__["id2label"][int(pred.argmax(axis=-1))] if logger is not None: logger.info("{}, {}, {}".format(off, tokens[i_token], label)) labels.append(label) i_token += 1 return tokens, labels def text_analysis(text): tokens, labels = predict(text, nlp, logger) pos_count = pd.DataFrame( { "token": tokens, "etiqueta": labels, } ) pos_tokens = [] for token, label in zip(tokens, labels): pos_tokens.extend([(token, label), (" ", None)]) return pos_tokens, pos_count css = open("style.css").read() top_html = open("top.html").read() bottom_html = open("bottom.html").read() with gr.Blocks(css=css) as demo: gr.HTML(top_html) text = gr.Textbox(placeholder="Insira um texto...", label="Texto de entrada") output_highlighted = gr.HighlightedText() output_df = gr.Dataframe() submit_btn = gr.Button("Enviar") submit_btn.click( fn=text_analysis, inputs=text, outputs=[output_highlighted, output_df] ) examples = gr.Examples( examples=[ [ "A população não poderia ter acesso a relatórios que explicassem, por exemplo, os motivos exatos de atrasos em obras de linhas e estações." ], ["Filme 'Star Wars : Os Últimos Jedi' ganha trailer definitivo; assista."], ], inputs=[text], label="Exemplos", ) gr.HTML(bottom_html) demo.launch()