File size: 4,977 Bytes
f3fe141 5d327fe 738d321 5d327fe 738d321 5d327fe f3fe141 5d327fe 936d180 3d2e947 f3fe141 3d2e947 8df1b3b 3d2e947 f3fe141 20801dd 3d2e947 aa42b04 7a88212 ee74ba4 da98d97 ee74ba4 7a88212 da98d97 7a88212 ee74ba4 2a1ebf7 25d06af 2a1ebf7 53d0b05 2a1ebf7 936d180 aa42b04 7a88212 da98d97 7a88212 2a1ebf7 90e6bb7 7a88212 90e6bb7 7a88212 90e6bb7 b5de271 7a88212 90e6bb7 2a1ebf7 90e6bb7 e50d585 90e6bb7 e50d585 90e6bb7 3ba2d59 90e6bb7 e50d585 2a1ebf7 90e6bb7 ee74ba4 777c11a 90e6bb7 7a88212 6daba21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
from app_data import examples
from app_details import title, description, article
from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline
from sentence_transformers import SentenceTransformer, util
def get_entities(example):
model_name = "hackathon-pln-es/jurisbert-finetuning-ner"
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
model = AutoModelForTokenClassification.from_pretrained(model_name)
token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
results = token_classifier(example.lower())
output = []
i=0
item = None
prev_item = None
next_item = None
while i < (len(results)):
item = results[i]
p=i-1
n=i+1
if p > 0:
prev_item = results[p]
if n<(len(results)):
next_item = results[n]
if (i==0):
if item["start"]>0:
output.extend([(example[0:item["start"]], None)])
output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
if (next_item!=None):
##verificar el tramo entre actual y siguiente
if(item["end"]!=next_item["start"]):
output.extend([(example[item["end"]:next_item["start"]], None)])
i=i+1
if (item!=None):
if (item["end"] < len(example)):
output.extend([(example[item["end"]:len(example)], None)])
return output
def clasifica_sistema_universal(example):
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results= text_classifier (example)
salida=[]
for i in results:
salida.append({i["label"]:i["score"]})
#return results[0]["label"], round(results[0]["score"], 5)
return {i["label"]: float(i["score"]) for i in results}
def clasifica_conv_americana(example):
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results= text_classifier (example)
return {i["label"]: float(i["score"]) for i in results}
def similitud(example,example2):
model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer")
#Compute embedding for both lists
embeddings1 = model.encode(example, convert_to_tensor=True)
embeddings2 = model.encode(example2, convert_to_tensor=True)
#Compute cosine-similarits
cosine_scores = util.cos_sim(embeddings1, embeddings2)
return float(cosine_scores[0])*100
def process(example,example2):
entidades = get_entities(example)
class_sistema_universal = clasifica_sistema_universal(example)
class_conv_americana = clasifica_conv_americana(example)
score_similitud = similitud(example,example2)
entidades2 = get_entities(example2)
class_sistema_universal2 = clasifica_sistema_universal(example2)
class_conv_americana2 = clasifica_conv_americana(example2)
return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2
input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:")
input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:")
#### Resultados texto analizar:
output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
output_lbl1= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
output_lbl2= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
#### Resultados de la similitud
output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:")
#### Resultados texto a comparar:
output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
output_lbl3= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
output_lbl4= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
#iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description)
iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl2,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article)
iface.launch()
|