Spaces:
Running
Running
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
# Function to Initialize the Model | |
def init_model(): | |
para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") | |
para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") | |
return para_tokenizer, para_model | |
# Function to Paraphrase the Text | |
def paraphrase(question, para_tokenizer, para_model, num_beams=10, num_beam_groups=10, num_return_sequences=10, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64): | |
input_ids = para_tokenizer( | |
f'paraphrase: {question}', | |
return_tensors="pt", padding="longest", | |
max_length=max_length, | |
truncation=True, | |
).input_ids | |
outputs = para_model.generate( | |
input_ids, temperature=temperature, repetition_penalty=repetition_penalty, | |
num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size, | |
num_beams=num_beams, num_beam_groups=num_beam_groups, | |
max_length=max_length, diversity_penalty=diversity_penalty | |
) | |
res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
return res | |
def generate_paraphrase(question): | |
para_tokenizer, para_model = init_model() | |
res = paraphrase(question, para_tokenizer, para_model) | |
return res | |
print(generate_paraphrase("Billie Eilish, Charli XCX and Lorde are among a group of young female pop stars who are revealing, in their music, the pressure they have felt to look thin in a time of especially punishing beauty standards.")) |