|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
"salti/arabic-t5-small-question-paraphrasing", use_fast=True |
|
) |
|
|
|
model = AutoModelForSeq2SeqLM.from_pretrained( |
|
"salti/arabic-t5-small-question-paraphrasing" |
|
).eval() |
|
|
|
prompt = "أعد صياغة: " |
|
|
|
|
|
@torch.inference_mode() |
|
def paraphrase(question, num_beams, encoder_no_repeat_ngram_size): |
|
question = prompt + question |
|
input_ids = tokenizer(question, return_tensors="pt").input_ids |
|
generated_tokens = ( |
|
model.generate( |
|
input_ids, |
|
num_beams=num_beams, |
|
encoder_no_repeat_ngram_size=encoder_no_repeat_ngram_size, |
|
) |
|
.squeeze() |
|
.cpu() |
|
.numpy() |
|
) |
|
return tokenizer.decode(generated_tokens, skip_special_tokens=True) |
|
|
|
|
|
question = gr.inputs.Textbox(label="اكتب سؤالاً باللغة العربية") |
|
num_beams = gr.inputs.Slider(1, 10, step=1, default=1, label="Beam size") |
|
encoder_no_repeat_ngram_size = gr.inputs.Slider( |
|
0, |
|
10, |
|
step=1, |
|
default=3, |
|
label="Ngrams of this size won't be copied from the input (forces more diverse outputs)", |
|
) |
|
|
|
outputs = gr.outputs.Textbox(label="السؤال بصيغة مختلفة") |
|
|
|
examples = [ |
|
[ |
|
"متى تم اختراع الكتابة؟", |
|
5, |
|
3, |
|
], |
|
[ |
|
"ما عدد حروف اللغة العربية؟", |
|
5, |
|
3, |
|
], |
|
[ |
|
"ما هو الذكاء الصنعي؟", |
|
5, |
|
3, |
|
], |
|
] |
|
|
|
iface = gr.Interface( |
|
fn=paraphrase, |
|
inputs=[question, num_beams, encoder_no_repeat_ngram_size], |
|
outputs=outputs, |
|
examples=examples, |
|
title="Arabic question paraphrasing", |
|
theme="huggingface", |
|
) |
|
|
|
iface.launch() |
|
|