File size: 1,800 Bytes
a2b4a62 8961013 a2b4a62 1ef9f8e a2b4a62 1ef9f8e a2b4a62 1ef9f8e a2b4a62 1ef9f8e a2b4a62 1ef9f8e 426cfa6 1ef9f8e a2b4a62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(
"salti/arabic-t5-small-question-paraphrasing", use_fast=True
)
model = AutoModelForSeq2SeqLM.from_pretrained(
"salti/arabic-t5-small-question-paraphrasing"
).eval()
prompt = "أعد صياغة: "
@torch.inference_mode()
def paraphrase(question, num_beams, encoder_no_repeat_ngram_size):
question = prompt + question
input_ids = tokenizer(question, return_tensors="pt").input_ids
generated_tokens = (
model.generate(
input_ids,
num_beams=num_beams,
encoder_no_repeat_ngram_size=encoder_no_repeat_ngram_size,
)
.squeeze()
.cpu()
.numpy()
)
return tokenizer.decode(generated_tokens, skip_special_tokens=True)
question = gr.inputs.Textbox(label="اكتب سؤالاً باللغة العربية")
num_beams = gr.inputs.Slider(1, 10, step=1, default=1, label="Beam size")
encoder_no_repeat_ngram_size = gr.inputs.Slider(
0,
10,
step=1,
default=3,
label="N-grams of this size won't be copied from the input (forces more diverse outputs)",
)
outputs = gr.outputs.Textbox(label="السؤال بصيغة مختلفة")
examples = [
[
"متى تم اختراع الكتابة؟",
5,
3,
],
[
"ما عدد حروف اللغة العربية؟",
5,
3,
],
[
"ما هو الذكاء الصنعي؟",
5,
3,
],
]
iface = gr.Interface(
fn=paraphrase,
inputs=[question, num_beams, encoder_no_repeat_ngram_size],
outputs=outputs,
examples=examples,
title="Arabic question paraphrasing",
theme="huggingface",
)
iface.launch()
|