|
import gradio as gr |
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("VietTung04/results") |
|
model = AutoModelForSequenceClassification.from_pretrained("VietTung04/results") |
|
|
|
import torch |
|
torch.manual_seed(42) |
|
def is_paraphrased(sentence1, sentence2): |
|
encoding = tokenizer( |
|
sentence1, |
|
sentence2, |
|
truncation=True, |
|
max_length=128, |
|
return_tensors='pt' |
|
) |
|
|
|
outputs = model(**encoding) |
|
logits = outputs.logits |
|
sigmoid = torch.nn.Sigmoid() |
|
probs = sigmoid(logits.squeeze().cpu()) |
|
label = torch.argmax(probs) |
|
return [{ |
|
'label': 'Paraphrased' if label.item() == 1 else 'Not paraphrased', |
|
'score': probs[label].item() |
|
}] |
|
|
|
iface = gr.Interface(fn=is_paraphrased, inputs=["text", "text"], outputs=['json'], title='Paraphrase Identification') |
|
iface.launch(inline=False) |