|
import gradio as gr |
|
|
|
from app_data import examples |
|
|
|
from app_details import title, description, article |
|
|
|
from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline |
|
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
def get_entities(example): |
|
model_name = "hackathon-pln-es/jurisbert-finetuning-ner" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True) |
|
|
|
model = AutoModelForTokenClassification.from_pretrained(model_name) |
|
token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer) |
|
results = token_classifier(example.lower()) |
|
|
|
output = [] |
|
|
|
i=0 |
|
item = None |
|
prev_item = None |
|
next_item = None |
|
while i < (len(results)): |
|
item = results[i] |
|
p=i-1 |
|
n=i+1 |
|
|
|
if p > 0: |
|
prev_item = results[p] |
|
|
|
|
|
if n<(len(results)): |
|
next_item = results[n] |
|
|
|
|
|
if (i==0): |
|
if item["start"]>0: |
|
output.extend([(example[0:item["start"]], None)]) |
|
output.extend([(example[item["start"]:item["end"]], item["entity_group"])]) |
|
if (next_item!=None): |
|
|
|
if(item["end"]!=next_item["start"]): |
|
output.extend([(example[item["end"]:next_item["start"]], None)]) |
|
i=i+1 |
|
|
|
if (item!=None): |
|
if (item["end"] < len(example)): |
|
output.extend([(example[item["end"]:len(example)], None)]) |
|
|
|
return output |
|
|
|
def clasifica_sistema_universal(example): |
|
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal") |
|
|
|
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal") |
|
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) |
|
results= text_classifier (example) |
|
|
|
salida=[] |
|
for i in results: |
|
salida.append({i["label"]:i["score"]}) |
|
|
|
|
|
|
|
|
|
return {i["label"]: float(i["score"]) for i in results} |
|
|
|
def clasifica_conv_americana(example): |
|
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh") |
|
|
|
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh") |
|
|
|
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) |
|
results= text_classifier (example) |
|
|
|
return {i["label"]: float(i["score"]) for i in results} |
|
|
|
def similitud(example,example2): |
|
model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer") |
|
|
|
embeddings1 = model.encode(example, convert_to_tensor=True) |
|
embeddings2 = model.encode(example2, convert_to_tensor=True) |
|
|
|
|
|
cosine_scores = util.cos_sim(embeddings1, embeddings2) |
|
|
|
return float(cosine_scores[0])*100 |
|
|
|
|
|
def process(example,example2): |
|
entidades = get_entities(example) |
|
|
|
class_sistema_universal = clasifica_sistema_universal(example) |
|
|
|
class_conv_americana = clasifica_conv_americana(example) |
|
|
|
score_similitud = similitud(example,example2) |
|
|
|
entidades2 = get_entities(example2) |
|
|
|
class_sistema_universal2 = clasifica_sistema_universal(example2) |
|
|
|
class_conv_americana2 = clasifica_conv_americana(example2) |
|
return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2 |
|
|
|
input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:") |
|
|
|
input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:") |
|
|
|
|
|
output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:") |
|
output_lbl1= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:") |
|
output_lbl2= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:") |
|
|
|
|
|
output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:") |
|
|
|
|
|
output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:") |
|
output_lbl3= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:") |
|
output_lbl4= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:") |
|
|
|
|
|
|
|
iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl1,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article) |
|
|
|
iface.launch() |
|
|