import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch model_list = [ "ruanchaves/mdeberta-v3-base-assin2-similarity", "ruanchaves/bert-base-portuguese-cased-assin2-similarity", "ruanchaves/bert-large-portuguese-cased-assin2-similarity", "ruanchaves/mdeberta-v3-base-assin-similarity", "ruanchaves/bert-base-portuguese-cased-assin-similarity", "ruanchaves/bert-large-portuguese-cased-assin-similarity", ] model_array = [] for model_name in model_list: row = {} row["tokenizer"] = AutoTokenizer.from_pretrained(model_name) row["model"] = AutoModelForSequenceClassification.from_pretrained(model_name) model_array.append(row) def similarity(s1, s2): scores = [] for row in model_array: tokenizer = row["tokenizer"] model = row["model"] model_input = tokenizer(*([s1, s1], [s2, s1]), padding=True, return_tensors="pt") with torch.no_grad(): output = model(**model_input) score = output[0][0].item() scores.append(score) return sum(scores) / len(scores) inputs = [ gr.inputs.Textbox(label="Text 1"), gr.inputs.Textbox(label="Text 2") ] outputs = gr.outputs.Textbox(label="Similarity Score") gr.Interface(fn=similarity, inputs=inputs, outputs=outputs, title="Semantic Similarity", description="Calculates semantic similarity between two pieces of text using multiple pre-trained models.", examples=[["A quem é atribuida a invenção do ábaco?", "A primeira ferramenta conhecida para a computação foi o ábaco, cuja invenção é atribuída a habitantes da Mesopotâmia, em torno de 2700–2300 a.C.."], ["I love pizza", "Pizza is my favorite food"], ["I hate cats", "I love dogs"]]).launch()