Spaces:
Build error
Build error
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer | |
import gradio as gr | |
from spacy import displacy | |
tokenizer = AutoTokenizer.from_pretrained("lirondos/anglicisms-spanish-mbert") | |
model = AutoModelForTokenClassification.from_pretrained( | |
"lirondos/anglicisms-spanish-mbert" | |
) | |
nlp = pipeline("ner", model=model, tokenizer=tokenizer) | |
diplacy_dict_template = { | |
"text": "But Google is starting from behind.", | |
"ents": [{"start": 4, "end": 10, "label": "ORG"}], | |
"title": None, | |
} | |
def infer(input_text): | |
displacy_ents = [] | |
borrowings = nlp(input_text) | |
for borrowing in borrowings: | |
displacy_ent_dict = { | |
"start": borrowing["start"], | |
"end": borrowing["end"], | |
"label": borrowing["entity"], | |
} | |
displacy_ents.append(displacy_ent_dict) | |
colors = {"B-ENG": "linear-gradient(90deg, #aa9cfc, #fc9ce7)", | |
"I-ENG": "linear-gradient(90deg, #99bfff, #a57cf0)", | |
"B-OTHER": "linear-gradient(90deg, #79d0a5, #f6e395)", | |
"I-OTHER": "linear-gradient(90deg, #f79a76, #fb6d6d)"} | |
options = {"ents": ["B-ENG", "I-ENG", "B-OTHER", "I-OTHER"], "colors": colors} | |
displacy_dict_template = {"text": input_text, "ents": displacy_ents, "title": None} | |
html = displacy.render(displacy_dict_template, style="ent", page=True, manual=True, options=options) | |
html = ( | |
"" | |
+ html | |
+ "" | |
) | |
return html | |
description="""This space is a demo for the paper Detecting Unassimilated Borrowings in Spanish: | |
[An Annotated Corpus and Approaches to Modeling](https://arxiv.org/pdf/2203.16169.pdf) | |
The goal of the underlying model is to detect foreign words, e.g. anglicisms, in spanish texts. | |
In general it has two types of tags for foreign words: *ENG* and *OTHER*. The authors used BIO-tagging, | |
which is why in practice you will see a *B-* or *I-* in front of the tags. | |
""" | |
demo = gr.Interface( | |
title="Borrowing Detection Español", | |
description=description, | |
fn=infer, | |
inputs=gr.Text(), | |
outputs=gr.HTML(), | |
examples=["Buscamos data scientist para proyecto de machine learning.", | |
"Las fake news sobre la celebrity se reprodujeron por los 'mass media' en prime time.", | |
"Me gusta el cine noir y el anime."], | |
) | |
demo.launch() | |