Spaces:

jgyasu
/

aiisc-watermarking-model

Running

aiisc-watermarking-model / paraphraser.py

Update paraphraser.py

49cecf8 verified 7 months ago

1.61 kB

	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	# Function to Initialize the Model
	def init_model():
	para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
	para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
	return para_tokenizer, para_model

	# Function to Paraphrase the Text
	def paraphrase(question, para_tokenizer, para_model, num_beams=10, num_beam_groups=10, num_return_sequences=10, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64):
	input_ids = para_tokenizer(
	f'paraphrase: {question}',
	return_tensors="pt", padding="longest",
	max_length=max_length,
	truncation=True,
	).input_ids
	outputs = para_model.generate(
	input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
	num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
	num_beams=num_beams, num_beam_groups=num_beam_groups,
	max_length=max_length, diversity_penalty=diversity_penalty
	)
	res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
	return res

	def generate_paraphrase(question):
	para_tokenizer, para_model = init_model()
	res = paraphrase(question, para_tokenizer, para_model)
	return res

	print(generate_paraphrase("Billie Eilish, Charli XCX and Lorde are among a group of young female pop stars who are revealing, in their music, the pressure they have felt to look thin in a time of especially punishing beauty standards."))