File size: 4,977 Bytes
766117c 1738721 766117c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
from app_data import examples
from app_details import title, description, article
from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline
from sentence_transformers import SentenceTransformer, util
def get_entities(example):
model_name = "hackathon-pln-es/jurisbert-finetuning-ner"
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
model = AutoModelForTokenClassification.from_pretrained(model_name)
token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
results = token_classifier(example.lower())
output = []
i=0
item = None
prev_item = None
next_item = None
while i < (len(results)):
item = results[i]
p=i-1
n=i+1
if p > 0:
prev_item = results[p]
if n<(len(results)):
next_item = results[n]
if (i==0):
if item["start"]>0:
output.extend([(example[0:item["start"]], None)])
output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
if (next_item!=None):
##verificar el tramo entre actual y siguiente
if(item["end"]!=next_item["start"]):
output.extend([(example[item["end"]:next_item["start"]], None)])
i=i+1
if (item!=None):
if (item["end"] < len(example)):
output.extend([(example[item["end"]:len(example)], None)])
return output
def clasifica_sistema_universal(example):
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results= text_classifier (example)
salida=[]
for i in results:
salida.append({i["label"]:i["score"]})
#return results[0]["label"], round(results[0]["score"], 5)
return {i["label"]: float(i["score"]) for i in results}
def clasifica_conv_americana(example):
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results= text_classifier (example)
return {i["label"]: float(i["score"]) for i in results}
def similitud(example,example2):
model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer")
#Compute embedding for both lists
embeddings1 = model.encode(example, convert_to_tensor=True)
embeddings2 = model.encode(example2, convert_to_tensor=True)
#Compute cosine-similarits
cosine_scores = util.cos_sim(embeddings1, embeddings2)
return float(cosine_scores[0])*100
def process(example,example2):
entidades = get_entities(example)
class_sistema_universal = clasifica_sistema_universal(example)
class_conv_americana = clasifica_conv_americana(example)
score_similitud = similitud(example,example2)
entidades2 = get_entities(example2)
class_sistema_universal2 = clasifica_sistema_universal(example2)
class_conv_americana2 = clasifica_conv_americana(example2)
return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2
input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:")
input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:")
#### Resultados texto analizar:
output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
output_lbl1= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
output_lbl2= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
#### Resultados de la similitud
output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:")
#### Resultados texto a comparar:
output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
output_lbl3= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
output_lbl4= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
#iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description)
iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl1,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article)
iface.launch()
|