GPalomeque's picture
Upload app.py
1738721
raw
history blame
4.98 kB
import gradio as gr
from app_data import examples
from app_details import title, description, article
from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline
from sentence_transformers import SentenceTransformer, util
def get_entities(example):
model_name = "hackathon-pln-es/jurisbert-finetuning-ner"
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
model = AutoModelForTokenClassification.from_pretrained(model_name)
token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
results = token_classifier(example.lower())
output = []
i=0
item = None
prev_item = None
next_item = None
while i < (len(results)):
item = results[i]
p=i-1
n=i+1
if p > 0:
prev_item = results[p]
if n<(len(results)):
next_item = results[n]
if (i==0):
if item["start"]>0:
output.extend([(example[0:item["start"]], None)])
output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
if (next_item!=None):
##verificar el tramo entre actual y siguiente
if(item["end"]!=next_item["start"]):
output.extend([(example[item["end"]:next_item["start"]], None)])
i=i+1
if (item!=None):
if (item["end"] < len(example)):
output.extend([(example[item["end"]:len(example)], None)])
return output
def clasifica_sistema_universal(example):
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results= text_classifier (example)
salida=[]
for i in results:
salida.append({i["label"]:i["score"]})
#return results[0]["label"], round(results[0]["score"], 5)
return {i["label"]: float(i["score"]) for i in results}
def clasifica_conv_americana(example):
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results= text_classifier (example)
return {i["label"]: float(i["score"]) for i in results}
def similitud(example,example2):
model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer")
#Compute embedding for both lists
embeddings1 = model.encode(example, convert_to_tensor=True)
embeddings2 = model.encode(example2, convert_to_tensor=True)
#Compute cosine-similarits
cosine_scores = util.cos_sim(embeddings1, embeddings2)
return float(cosine_scores[0])*100
def process(example,example2):
entidades = get_entities(example)
class_sistema_universal = clasifica_sistema_universal(example)
class_conv_americana = clasifica_conv_americana(example)
score_similitud = similitud(example,example2)
entidades2 = get_entities(example2)
class_sistema_universal2 = clasifica_sistema_universal(example2)
class_conv_americana2 = clasifica_conv_americana(example2)
return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2
input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:")
input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:")
#### Resultados texto analizar:
output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
output_lbl1= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
output_lbl2= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
#### Resultados de la similitud
output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:")
#### Resultados texto a comparar:
output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
output_lbl3= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
output_lbl4= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
#iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description)
iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl1,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article)
iface.launch()