File size: 3,612 Bytes
2471de4
436c4c1
2471de4
 
 
 
 
436c4c1
2471de4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436c4c1
2471de4
 
 
 
436c4c1
 
 
 
 
 
 
2471de4
 
 
 
436c4c1
2471de4
436c4c1
2471de4
 
 
 
436c4c1
2471de4
 
 
 
 
436c4c1
2471de4
436c4c1
2471de4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436c4c1
2471de4
436c4c1
2471de4
436c4c1
2471de4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Function to Initialize the Model
def init_model():
    para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
    para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
    return para_tokenizer, para_model

# Function to Paraphrase the Text
def paraphrase(question, para_tokenizer, para_model, num_beams=10, num_beam_groups=10, num_return_sequences=10, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64):
    input_ids = para_tokenizer(
        f'paraphrase: {question}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids
    outputs = para_model.generate(
        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )
    res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return res

def generate_paraphrase(question):
    para_tokenizer, para_model = init_model()
    res = paraphrase(question, para_tokenizer, para_model)
    return res

# print(generate_paraphrase("Donald Trump said at a campaign rally event in Wilkes-Barre, Pennsylvania, that there has “never been a more dangerous time 5since the Holocaust” to be Jewish in the United States."))

'''
Accepts a sentence or list of sentences and returns a lit of all their paraphrases using GPT-4.
'''

# from openai import OpenAI
# from dotenv import load_dotenv
# load_dotenv()
# import os

# key = os.getenv("OPENAI_API_KEY")

# # Initialize the OpenAI client
# client = OpenAI(
#     api_key=key  # Replace with your actual API key
# )

# # Function to paraphrase sentences using GPT-4
# def generate_paraphrase(sentences, model="gpt-4o", num_paraphrases=10, max_tokens=150, temperature=0.7):
#     # Ensure sentences is a list even if a single sentence is passed
#     if isinstance(sentences, str):
#         sentences = [sentences]

#     paraphrased_sentences_list = []

#     for sentence in sentences:
#         full_prompt = f"Paraphrase the following text: '{sentence}'"
#         try:
#             chat_completion = client.chat.completions.create(
#                 messages=[
#                     {
#                         "role": "user",
#                         "content": full_prompt,
#                     }
#                 ],
#                 model=model,
#                 max_tokens=max_tokens,
#                 temperature=temperature,
#                 n=num_paraphrases  # Number of paraphrased sentences to generate
#             )
#             # Extract the paraphrased sentences from the response
#             paraphrased_sentences = [choice.message.content.strip() for choice in chat_completion.choices]
#             # Append paraphrased sentences to the list
#             paraphrased_sentences_list.extend(paraphrased_sentences)
#         except Exception as e:
#             print(f"Error paraphrasing sentence '{sentence}': {e}")
    
#     return paraphrased_sentences_list

# result = generate_paraphrase("Mayor Eric Adams did not attend the first candidate forum for the New York City mayoral race, but his record — and the criminal charges he faces — received plenty of attention on Saturday from the Democrats who are running to unseat him.")

# print(len(result))