Spaces:
Running
Running
File size: 3,612 Bytes
2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 436c4c1 2471de4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Function to Initialize the Model
def init_model():
para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
return para_tokenizer, para_model
# Function to Paraphrase the Text
def paraphrase(question, para_tokenizer, para_model, num_beams=10, num_beam_groups=10, num_return_sequences=10, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64):
input_ids = para_tokenizer(
f'paraphrase: {question}',
return_tensors="pt", padding="longest",
max_length=max_length,
truncation=True,
).input_ids
outputs = para_model.generate(
input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
num_beams=num_beams, num_beam_groups=num_beam_groups,
max_length=max_length, diversity_penalty=diversity_penalty
)
res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
return res
def generate_paraphrase(question):
para_tokenizer, para_model = init_model()
res = paraphrase(question, para_tokenizer, para_model)
return res
# print(generate_paraphrase("Donald Trump said at a campaign rally event in Wilkes-Barre, Pennsylvania, that there has “never been a more dangerous time 5since the Holocaust” to be Jewish in the United States."))
'''
Accepts a sentence or list of sentences and returns a lit of all their paraphrases using GPT-4.
'''
# from openai import OpenAI
# from dotenv import load_dotenv
# load_dotenv()
# import os
# key = os.getenv("OPENAI_API_KEY")
# # Initialize the OpenAI client
# client = OpenAI(
# api_key=key # Replace with your actual API key
# )
# # Function to paraphrase sentences using GPT-4
# def generate_paraphrase(sentences, model="gpt-4o", num_paraphrases=10, max_tokens=150, temperature=0.7):
# # Ensure sentences is a list even if a single sentence is passed
# if isinstance(sentences, str):
# sentences = [sentences]
# paraphrased_sentences_list = []
# for sentence in sentences:
# full_prompt = f"Paraphrase the following text: '{sentence}'"
# try:
# chat_completion = client.chat.completions.create(
# messages=[
# {
# "role": "user",
# "content": full_prompt,
# }
# ],
# model=model,
# max_tokens=max_tokens,
# temperature=temperature,
# n=num_paraphrases # Number of paraphrased sentences to generate
# )
# # Extract the paraphrased sentences from the response
# paraphrased_sentences = [choice.message.content.strip() for choice in chat_completion.choices]
# # Append paraphrased sentences to the list
# paraphrased_sentences_list.extend(paraphrased_sentences)
# except Exception as e:
# print(f"Error paraphrasing sentence '{sentence}': {e}")
# return paraphrased_sentences_list
# result = generate_paraphrase("Mayor Eric Adams did not attend the first candidate forum for the New York City mayoral race, but his record — and the criminal charges he faces — received plenty of attention on Saturday from the Democrats who are running to unseat him.")
# print(len(result)) |