File size: 21,637 Bytes
ed2841c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
Rewrite this python script to use the OpenAI API key for ChatGPT4, and not Google Gemini Pro API key:  #print("import os")import os#print("from docx import Document")#from docx import Document # to open and interact with Microsoft Word documents (.docx and potentially .doc files).#print("import PyPDF2  # Import PyPDF2 library for PDFs")#import PyPDF2  # Import PyPDF2 library for PDFs#print("from bs4 import BeautifulSoup")#from bs4 import BeautifulSoup  # Import BeautifulSoup for HTML parsing#print("import pyth")#import pyth # for extracting plain text from .rtf files.#print("import datetime")import datetimeprint("import google.generativeai as genai")import google.generativeai as genai
print("Configure API key (replace with your actual key)")#API_KEY = "YOUR_API_KEY"GOOGLE_API_KEY = "AI______________FU"  # it's recommended to use double quotes (") around the API key value.print(f"GOOGLE_API_KEY = {GOOGLE_API_KEY}")genai.configure(api_key=GOOGLE_API_KEY)
global MODEL_NAME  # Declare MODEL_NAME as globalglobal NumDocs_Flag, main_index
############# LIST OF AVAILABLE MODELS code direct from Google ######################for m in genai.list_models():  #if 'generateContent' in m.supported_generation_methods:    #print(m.name)##################### INDEXED LIST OF AVAILABLE MODELS code generated by Gemini (Bard) ################available_models = []for i, m in enumerate(genai.list_models()):   if 'generateContent' in m.supported_generation_methods:       available_models.append((i + 1, m.name))  # Add model with index

############################  ACTIVATE SELECTED MODEL ################################

######################### FOLDERS ######################################print("Define folders")input_documents_folder = "input_documents_folder"output_responses_folder = "output_responses_folder"log_folder = "log_folder"
# Create input_documents_folder if it doesn't existos.makedirs(output_responses_folder, exist_ok=True)
# Create output_responses_folder if it doesn't existos.makedirs(output_responses_folder, exist_ok=True)
# Create log folder if it doesn't existos.makedirs(log_folder, exist_ok=True)

#################### Pre-initialize API variables -response- and -response.text- ######################3# Initialize response and response.text (filled by the API) with a placeholder (not recommended)#response = None # triggers an error: AttributeError: 'NoneType' object has no attribute 'text'#response = ""   # triggers an error: AttributeError: 'str' object has no attribute 'text'#response.text = "" #(empty string)
##                  ###############  Needs Simplification  #############print("Manufacture a fake API to define response and response.text before real API call to Gemini.")class FakeResponse:  def __init__(self, text):    self.text = text
def get_response_from_api():  # Simulate an API call (no actual network interaction)  #fake_response = FakeResponse("This is fake API response text.")  fake_response = FakeResponse(" ") # use this in final to eliminate.  return fake_response
# Example usageresponse = get_response_from_api()
if response is not None:  response_text = response.text  + "\n"   # to allign with the User:, Assistant: separeate-lines, motif below.  #print(f" Fake Response = {response_text}")  # Prints "This is fake API response text."else:  print("No response received from Fake API")
###################################### def load_and_select_input_documents #################################def load_and_select_input_documents():    #Loads filenames, prints an indexed list, prompts user for selection, and loads text sequentially.
    # Get document filenames    supported_extensions = (".pdf", ".html", ".mhtml", ".txt", ".py", ".rtf", ".docx", ".doc")    filenames = [filename for filename in os.listdir(input_documents_folder)              if filename.lower().endswith(supported_extensions)]
    # Print numbered list of filenames    print("Available Documents:")    for i, filename in enumerate(filenames):        print(f"{i+1}. {filename}")    print("")    print(f"MODEL_NAME = {MODEL_NAME}")  # remined of selected model type.
    #print("Get user input for document selection")    while True:        selected_input = input("Select document numbers (comma-separated): ")        try:        # Check for zero selection (exit to main)            if selected_input == '0':                print("User has selected 0 documents. Return to main loop with emptly combined_text, and NumDocs_Flag = 0")  # NumDocs_Flag is global for indicating that zero documents in use, and bypass inapplicables.  not yet implemented.                global NumDocs_Flag, main_index                NumDocs_Flag = 0  # Set the global NumDocs_Flag for future use (if applicable)                combined_text = ""  # empty. no documents in combined text                main_index =0                output_filename =f"FreePrompt{main_index + 1}"                return combined_text, output_filename  #exit to main with these values
            selected_indices = [int(x) - 1 for x in selected_input.split(",")]  # Adjust for 0-based indexing            if all(0 <= index < len(filenames) for index in selected_indices):                # create a list of selected_indices plus one                selected_indices_plus_one = [index + 1 for index in selected_indices]                # Print the list using f-string for formatting                print(f"Documents {selected_indices_plus_one} have been selected")                break            else:                print("Invalid selection. Please enter comma-separated numbers within the available range.")        except ValueError:            print("Invalid input. Please enter comma-separated numbers.")
    #print("Load text from selected documents in specified order")    combined_text = ""    for index in selected_indices:        filename = filenames[index]        filepath = os.path.join(input_documents_folder, filename)            if filename.lower().endswith(".docx"):  # Handle .docx files            #print("from docx import Document")            from docx import Document # to open and interact with Microsoft Word documents (.docx and potentially .doc files).            try:              document = Document(filepath)              #document = docx.Document(filepath) use only if import docx  # Imports the entire docx library              combined_text += f"[Document #{index + 1} = {filename}]\n"               for paragraph in document.paragraphs:                combined_text += f"{paragraph.text}\n"            except Exception as e:              print(f"Error processing {filename}: {e}")  # Handle potential errors 
        #elif filename.lower().endswith(".doc"):  # TYPICALLY CRASHES. Consider using a different library for .doc files            #print(f"Cannot handle .doc files directly. Consider using a library like python-docx2txt for .doc files.")        elif filename.lower().endswith(".doc"):  # Handle .doc files            #print("from docx import Document")            from docx import Document # to open and interact with Microsoft Word documents (.docx and potentially .doc files).            try:              document = Document(filepath)              combined_text += f"[Document #{index + 1} = {filename}]\n"  # Insert document name once              for paragraph in document.paragraphs:                combined_text = combined_text + f"[{paragraph.text}\n"                combined_text += f"{paragraph.text}\n"  # Append paragraph text             except Exception as e:              print(f"Error processing {filename}: {e}")              print(f"Attempting to extract text with Textract...")              import textract  # Import Textract only if needed              text = textract.process(filepath).decode('utf-8')              combined_text += f"[Document #{index + 1} = {filename} (Textract)]\n"  # Indicate Textract usage              combined_text += text  # Append extract-extracted text
        elif filename.lower().endswith(".pdf"):  # Handle .pdf files            #print("import PyPDF2  # Import PyPDF2 library for PDFs")  #libraries like PyPDF2, Camelot, or PDFMiner.six can extract text from PDF documents.            import PyPDF2  # Import PyPDF2 library for PDFs            try:              with open(filepath, 'rb') as pdf_file:  # Open PDF in binary mode                pdf_reader = PyPDF2.PdfReader(pdf_file)                for page_num in range(len(pdf_reader.pages)):  # Iterate through pages                  page_obj = pdf_reader.pages[page_num]                  text = page_obj.extract_text()  # Extract text from each page                  combined_text += f"[Document #{index + 1} = {filename} - Page {page_num + 1}]\n{text}\n"            except Exception as e:              print(f"Error processing {filename}: {e}")  # Handle potential errors
        elif filename.lower().endswith(".html") or filename.lower().endswith(".mhtml"):  # Handle .html and .mhtml  pages          #print("from bs4 import BeautifulSoup")          from bs4 import BeautifulSoup  # Import BeautifulSoup for HTML parsing          try:            #with open(filepath, "r") as html_file:  # Open HTML file in read mode #to open the file directly in text mode without BeautifulSoup, use 'r'              #soup = BeautifulSoup(html_file, "html.parser")  # Parse HTML structure             with open(filepath, 'rb') as f:              soup = BeautifulSoup(f, 'html.parser')            combined_text += f"[Document #{index + 1} = {filename}]\n"  # Insert document name              for paragraph in soup.find_all("p"):  # Find all <p> elements (paragraphs)              combined_text += f"{paragraph.get_text(strip=True)}\n"  # Extract text, strip whitespace               except Exception as e:            print(f"Error processing {filename}: {e}")  # Handle potential errors
        elif filename.lower().endswith(".rtf"):  # Handle .rtf files          print("import pyth")          import pyth # for extracting plain text from .rtf files.          try:            with open(filepath, "r") as rtf_file:              rtf_content = rtf_file.read()              text = pyth.decode(rtf_content)  # Extract text using pyth  assumes you've installed pyth using pip install pyth.              combined_text += f"[Document #{index + 1} = {filename}]\n{text}\n"          except Exception as e:            print(f"Error processing {filename}: {e}")
        else:            try:  # Handle other text files (e.g., .txt) with default encoding                with open(filepath, 'r', encoding='utf-8') as f:                    combined_text += f.read() + "\n\n"            except UnicodeDecodeError as e:                print(f"Error decoding {filename} with 'utf-8' encoding: {e}")
    # Generate output filename based on selected filenames    #output_filename = "_".join([filenames[i] for i in selected_indices])    # Ensure .txt extension for output filename    output_filename = f"_".join([filenames[i] for i in selected_indices]) #+ ".txt"    #limit length of output_filename:    max_filename_length = 40    # Truncate only if necessary    if len(output_filename) > max_filename_length:        output_filename = output_filename[:max_filename_length] + "__.txt"
    token_count = model.count_tokens(combined_text)    print(f"Number of tokens in combined_text: {token_count}")    return combined_text, output_filename

         model = genai.GenerativeModel('gemini-pro-vision')          #MODEL_NAME = "gemini-pro"          #model = genai.GenerativeModel(MODEL_NAME)          combined_text = f"imagename.jpg"          img = PIL.Image.open('image.jpg')          #response = model.generate_content(img)          #To provide both text and images in a prompt, pass a list containing the strings and images:          response = model.generate_content(["Write a short, engaging blog post based on this picture. It should include a description of the meal in the photo and talk about my journey meal prepping.", img], stream=True)          response.resolve()  # only required because stream=True          to_markdown(response.text)

########################################## def Construct_Prompt_and_Response #####################################def Construct_Prompt_and_Response(instructions, combined_text):#def Construct_Prompt_and_Print_Log_Response(combined_text, MODEL_NAME, log_folder, output_responses_folder):  #receives instructions and combined_text, sends them as prompt to the generative model,   #prints and logs the response, and saves the prompt (instructions only) and response to an output/log file.
  # Construct the prompt using both instructions and combined_text  prompt = f"{instructions}: {combined_text}"  # Send prompt to model to obtain a response  response = model.generate_content(prompt)  #response.text = f"Gemini AI: {response.text}\n"  # Bard says: The error message "can't set attribute 'text'" in the line response.text = f"Gemini AI: {response.text}\n" indicates that the object referred to by response doesn't allow modifying its text property.  The error message implies that the text property of the response object is not designed to be changed directly in your code. It might be a read-only property or have internal logic that manages its content.  Libraries or models often return objects with specific structures and properties to manage their data internally. Modifying these properties directly within your code can lead to unexpected behavior or errors.  #response.text = "Gemini AI: " + response.text + "\n"  # line n is for separation when combined below. #Assume the GenerateContentResponse object has a property named text that holds the actual generated content.  # Return response  return response
#################### def Log_response_and_Save_Instructions_and_Response_to_output_file ##########################def Log_response_and_Save_Instructions_and_Response_to_output_file(instructions, response, output_filename, main_index):  #print(f"Gemini Response:\n{response.text}")  # for debugging only.  print("") # space  # Use the globally defined folders  global log_folder, output_responses_folder
  today = datetime.date.today().strftime("%Y-%m-%d")  #print(f"Get today's date: {today}")  log_file = os.path.join(log_folder, f"{today}.log")
  # Extract input_filename without ".txt"  input_files = output_filename.replace(".txt", "")
  # Write instructions and response to All-Day Today log file  with open(log_file, 'a') as f:    f.write(f"Today's date: {today}\n")    f.write(f"Input Files: {input_files}\n")    f.write(f"Instructions #{main_index + 1}: {instructions}\n")    f.write(f"Response #{main_index + 1}: {response.text}\n\n")
  # Log instructions and response to named output file  #output_filename = # now defined in def load_documents():  output_path = os.path.join(output_responses_folder, output_filename)  with open(output_path, 'w') as f:    f.write(f"Today's date: {today}\n")    f.write(f"Input Files: {input_files}\n")    #f.write(f"Instructions: {instructions}\n")    #f.write(f"Response: {response.text}")    f.write(f"Instructions #{main_index + 1}: {instructions}\n")    f.write(f"Response #{main_index + 1}: {response.text}\n\n")

###################################### def MAIN #################################def main():  #Main function to load documents, prompt the user, and generate a response, log and save.  global response, main_index  # to avoid error: UnboundLocalError: local variable 'response' referenced before assignment  #load and select documents.docx   combined_text, output_filename = load_and_select_input_documents()
  # Prompt user for the number of iterations  #num_iterations = int(input("Enter the number of iterations: "))  while True:    try:      num_iterations_str = input("Enter the number of iterations: ")      num_iterations = int(num_iterations_str)      # If execution reaches here, the input was a valid integer      break  # Exit the loop    except ValueError:      print("Invalid input. Please enter a positive integer.")
  # Proceed with further code using num_iterations
  # Prompt user for instructions  instructions = input("Enter instructions for Gemini model (e.g., summarize, explain key concepts, combine): ")  print("")  # space
  for main_index in range(num_iterations):    # Construct prompt, get response, and log/save    response = Construct_Prompt_and_Response(instructions, combined_text)
    # Print instructions and response    print(f"Instructions #{main_index + 1}: {instructions}")    # Use 'is None' to check for null response.text and print prompt feedback if applicable    if response.text is None:        print(f"Response prompt_feedback = {response.prompt_feedback}")    else:        print(f"Response #{main_index + 1}: {response.text}")    if not response.text:  # Check if response.text is empty        safety_ratings = response.candidates[0].safety_ratings  # Access safety ratings from first candidate        if any(rating.rating == "BLOCK" for rating in safety_ratings):            print(f"Response blocked due to safety concerns: {safety_ratings}")        else:            print("An error occurred while processing the prompt. Please try again.")
    Log_response_and_Save_Instructions_and_Response_to_output_file(instructions, response, output_filename, main_index)
    # Re-prompt for instructions for the next iteration (if needed)    if main_index + 1 < num_iterations:      new_instructions = input("Enter instructions for the next iteration (or press Enter to continue with same instructions): ")      if new_instructions:        instructions = new_instructions  # Assuming you want to use new instructions
  ############## After Documents, after last "iteration", free form prompts without documents:

Converting the script to use OpenAI API for ChatGPT4
Changes need to be Made:
OpenAI API Integration:
import openai is added to import the OpenAI library.
openai.api_key is set to your OpenAI API key.
The construct_prompt_and_response function now uses openai.Completion.create with the text-davinci-003 engine (you can adjust this) to get the response from ChatGPT4.
Removed Gemini Specific Code:
All code related to Google Gemini Pro API, model selection, and configuration is removed.
Things to Note:
You'll need to replace "YOUR_OPENAI_API_KEY" with your actual OpenAI API key.
You can experiment with different OpenAI engines besides text-davinci-003 to explore various capabilities and performance characteristics.
Adjust the max_tokens parameter in the openai.Completion.create call to control the length of the generated response.
The script maintains the functionality of loading and processing documents, handling user prompts, and logging responses.
Here's the outline of Python script rewritten to utilize the OpenAI API for ChatGPT4 instead of the Google Gemini Pro API:
""""
import os
from docx import Document
import PyPDF2
from bs4 import BeautifulSoup
import pyth
import datetime
import openai

# Set your OpenAI API key
openai.api_key = "YOUR_OPENAI_API_KEY"

# Function to load and select input documents (unchanged)
def load_and_select_input_documents():
    # ... (code remains the same as before)

# Function to construct prompt and get response from ChatGPT4
def construct_prompt_and_response(instructions, combined_text):
    prompt = f"{instructions}: {combined_text}"
    response = openai.Completion.create(
        engine="text-davinci-003",  # You can experiment with different engines
        prompt=prompt,
        max_tokens=1024,  # Adjust as needed
        n=1,
        stop=None,
        temperature=0.7,
    )
    return response.choices[0].text.strip()

# Function to log response and save to output file (unchanged)
def log_response_and_save_instructions_and_response_to_output_file(instructions, response, output_filename, main_index):
    # ... (code remains the same as before)

# Main function
def main():
    global main_index
    combined_text, output_filename = load_and_select_input_documents()

    while True:
        try:
            num_iterations_str = input("Enter the number of iterations: ")
            num_iterations = int(num_iterations_str)
            break
        except ValueError:
            print("Invalid input. Please enter a positive integer.")

    instructions = input("Enter instructions for ChatGPT4 model (e.g., summarize, explain key concepts, combine): ")
    print("")

    for main_index in range(num_iterations):
        response = construct_prompt_and_response(instructions, combined_text)
        print(f"Instructions #{main_index + 1}: {instructions}")
        print(f"Response #{main_index + 1}: {response}")
        log_response_and_save_instructions_and_response_to_output_file(instructions, response, output_filename, main_index)

        if main_index + 1 < num_iterations:
            new_instructions = input("Enter instructions for the next iteration (or press Enter to continue with same instructions): ")
            if new_instructions:
                instructions = new_instructions

    # Free-form prompts after document processing
    combined_text = response  # Carry forward the last response
    print("        Further prompts for ChatGPT4 model (without documents):")
    print("")

    while True:
        main_index += 1
        print(f"END OF CURRENT RESPONSE. The main_index = {main_index}")
        instructions = "User: " + input(f"Prompt #{main_index + 1} ")
        response = construct_prompt_and_response(instructions, combined_text)
        print("")
        print(f"Response #{main_index + 1}: {response}")
        print("")
        combined_text += f"{instructions}.\n Assistant: {response}\n"

if __name__ == "__main__":
    main()