# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
# # Function to Initialize the Model | |
# def init_model(): | |
# para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") | |
# para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") | |
# return para_tokenizer, para_model | |
# # Function to Paraphrase the Text | |
# def paraphrase(question, para_tokenizer, para_model, num_beams=10, num_beam_groups=10, num_return_sequences=10, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64): | |
# input_ids = para_tokenizer( | |
# f'paraphrase: {question}', | |
# return_tensors="pt", padding="longest", | |
# max_length=max_length, | |
# truncation=True, | |
# ).input_ids | |
# outputs = para_model.generate( | |
# input_ids, temperature=temperature, repetition_penalty=repetition_penalty, | |
# num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size, | |
# num_beams=num_beams, num_beam_groups=num_beam_groups, | |
# max_length=max_length, diversity_penalty=diversity_penalty | |
# ) | |
# res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
# return res | |
# def generate_paraphrase(question): | |
# para_tokenizer, para_model = init_model() | |
# res = paraphrase(question, para_tokenizer, para_model) | |
# return res | |
# print(generate_paraphrase("Donald Trump said at a campaign rally event in Wilkes-Barre, Pennsylvania, that there has “never been a more dangerous time 5since the Holocaust” to be Jewish in the United States.")) | |
''' | |
Accepts a sentence or list of sentences and returns a lit of all their paraphrases using GPT-4. | |
''' | |
from openai import OpenAI | |
from dotenv import load_dotenv | |
load_dotenv() | |
import os | |
key = os.getenv("OPENAI_API_KEY") | |
# Initialize the OpenAI client | |
client = OpenAI( | |
api_key=key # Replace with your actual API key | |
) | |
# Function to paraphrase sentences using GPT-4 | |
def generate_paraphrase(sentences, model="gpt-4o", num_paraphrases=10, max_tokens=150, temperature=0.7): | |
# Ensure sentences is a list even if a single sentence is passed | |
if isinstance(sentences, str): | |
sentences = [sentences] | |
paraphrased_sentences_list = [] | |
for sentence in sentences: | |
full_prompt = f"Paraphrase the following text: '{sentence}'" | |
try: | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": full_prompt, | |
} | |
], | |
model=model, | |
max_tokens=max_tokens, | |
temperature=temperature, | |
n=num_paraphrases # Number of paraphrased sentences to generate | |
) | |
# Extract the paraphrased sentences from the response | |
paraphrased_sentences = [choice.message.content.strip() for choice in chat_completion.choices] | |
# Append paraphrased sentences to the list | |
paraphrased_sentences_list.extend(paraphrased_sentences) | |
except Exception as e: | |
print(f"Error paraphrasing sentence '{sentence}': {e}") | |
return paraphrased_sentences_list | |
result = generate_paraphrase("Mayor Eric Adams did not attend the first candidate forum for the New York City mayoral race, but his record — and the criminal charges he faces — received plenty of attention on Saturday from the Democrats who are running to unseat him.") | |
print(len(result)) |