import gradio as gr import json from sentence_transformers import SentenceTransformer, InputExample, util import pandas as pd def Main(Modelo, Texto1, Texto2): error = "" modelResult = "" try: data_test = [] data_test.append(InputExample(guid= "", texts=[Texto1, Texto2], label=0)) modelResult = TestModel('jfarray/Model_'+ Modelo +'_50_Epochs',data_test) except Exception as e: error = e return [error, modelResult] def TestModel(checkpoint, data): local_model_path = checkpoint model = SentenceTransformer(local_model_path) df = pd.DataFrame(columns=["Similitud Semántica"]) sentences1 = [] sentences2 = [] hashed_ids = [] marks = [] scores = [] for i in range (0,len(data)): #len(data) sentences1.append(data[i].texts[0]) sentences2.append(data[i].texts[1]) #Compute embedding for both lists embeddings1 = model.encode(sentences1, convert_to_tensor=True) embeddings2 = model.encode(sentences2, convert_to_tensor=True) #Compute cosine-similarits cosine_scores = util.cos_sim(embeddings1, embeddings2) for i in range(len(sentences1)): hashed_ids.append(data[i].guid) marks.append(data[i].label) scores.append(round(cosine_scores[i][i].item(),3)) df['Similitud Semántica'] = scores return df Modelos = gr.inputs.Dropdown(["dccuchile_bert-base-spanish-wwm-uncased" , "bert-base-multilingual-uncased" , "all-distilroberta-v1" , "paraphrase-multilingual-mpnet-base-v2" , "paraphrase-multilingual-MiniLM-L12-v2" , "distiluse-base-multilingual-cased-v1"]) Opciones = gr.inputs.Radio(["Comparar Textos", "Procesar Fichero"]) Text1Input = gr.inputs.Textbox(lines=10, placeholder="Escriba el texto aqui ...") Text2Input = gr.inputs.Textbox(lines=10, placeholder="Escriba el otro texto aqui ...") LabelOutput = gr.outputs.Label(num_top_classes=None, type="auto", label="") DataFrameOutput = gr.outputs.Dataframe(headers=["Similitud Semántica"] , max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="pandas", label="Resultado") iface = gr.Interface(fn=Main , inputs=[ Modelos, Text1Input ,Text2Input] , outputs=[LabelOutput, DataFrameOutput] , title = "Similitud Semántica de textos en Español de tamaño medio (200-250 palabras)" ) iface.launch(share = False,enable_queue=True, show_error =True)