Chat-With-PDF.py / STUB = Convert Chat.py for OpenAI API Key
MartialTerran's picture
Create STUB = Convert Chat.py for OpenAI API Key
ed2841c verified
raw
history blame contribute delete
No virus
21.6 kB
"""
Rewrite this python script to use the OpenAI API key for ChatGPT4, and not Google Gemini Pro API key: #print("import os")import os#print("from docx import Document")#from docx import Document # to open and interact with Microsoft Word documents (.docx and potentially .doc files).#print("import PyPDF2 # Import PyPDF2 library for PDFs")#import PyPDF2 # Import PyPDF2 library for PDFs#print("from bs4 import BeautifulSoup")#from bs4 import BeautifulSoup # Import BeautifulSoup for HTML parsing#print("import pyth")#import pyth # for extracting plain text from .rtf files.#print("import datetime")import datetimeprint("import google.generativeai as genai")import google.generativeai as genai
print("Configure API key (replace with your actual key)")#API_KEY = "YOUR_API_KEY"GOOGLE_API_KEY = "AI______________FU" # it's recommended to use double quotes (") around the API key value.print(f"GOOGLE_API_KEY = {GOOGLE_API_KEY}")genai.configure(api_key=GOOGLE_API_KEY)
global MODEL_NAME # Declare MODEL_NAME as globalglobal NumDocs_Flag, main_index
############# LIST OF AVAILABLE MODELS code direct from Google ######################for m in genai.list_models(): #if 'generateContent' in m.supported_generation_methods: #print(m.name)##################### INDEXED LIST OF AVAILABLE MODELS code generated by Gemini (Bard) ################available_models = []for i, m in enumerate(genai.list_models()): if 'generateContent' in m.supported_generation_methods: available_models.append((i + 1, m.name)) # Add model with index
############################ ACTIVATE SELECTED MODEL ################################
######################### FOLDERS ######################################print("Define folders")input_documents_folder = "input_documents_folder"output_responses_folder = "output_responses_folder"log_folder = "log_folder"
# Create input_documents_folder if it doesn't existos.makedirs(output_responses_folder, exist_ok=True)
# Create output_responses_folder if it doesn't existos.makedirs(output_responses_folder, exist_ok=True)
# Create log folder if it doesn't existos.makedirs(log_folder, exist_ok=True)
#################### Pre-initialize API variables -response- and -response.text- ######################3# Initialize response and response.text (filled by the API) with a placeholder (not recommended)#response = None # triggers an error: AttributeError: 'NoneType' object has no attribute 'text'#response = "" # triggers an error: AttributeError: 'str' object has no attribute 'text'#response.text = "" #(empty string)
## ############### Needs Simplification #############print("Manufacture a fake API to define response and response.text before real API call to Gemini.")class FakeResponse: def __init__(self, text): self.text = text
def get_response_from_api(): # Simulate an API call (no actual network interaction) #fake_response = FakeResponse("This is fake API response text.") fake_response = FakeResponse(" ") # use this in final to eliminate. return fake_response
# Example usageresponse = get_response_from_api()
if response is not None: response_text = response.text + "\n" # to allign with the User:, Assistant: separeate-lines, motif below. #print(f" Fake Response = {response_text}") # Prints "This is fake API response text."else: print("No response received from Fake API")
###################################### def load_and_select_input_documents #################################def load_and_select_input_documents(): #Loads filenames, prints an indexed list, prompts user for selection, and loads text sequentially.
# Get document filenames supported_extensions = (".pdf", ".html", ".mhtml", ".txt", ".py", ".rtf", ".docx", ".doc") filenames = [filename for filename in os.listdir(input_documents_folder) if filename.lower().endswith(supported_extensions)]
# Print numbered list of filenames print("Available Documents:") for i, filename in enumerate(filenames): print(f"{i+1}. {filename}") print("") print(f"MODEL_NAME = {MODEL_NAME}") # remined of selected model type.
#print("Get user input for document selection") while True: selected_input = input("Select document numbers (comma-separated): ") try: # Check for zero selection (exit to main) if selected_input == '0': print("User has selected 0 documents. Return to main loop with emptly combined_text, and NumDocs_Flag = 0") # NumDocs_Flag is global for indicating that zero documents in use, and bypass inapplicables. not yet implemented. global NumDocs_Flag, main_index NumDocs_Flag = 0 # Set the global NumDocs_Flag for future use (if applicable) combined_text = "" # empty. no documents in combined text main_index =0 output_filename =f"FreePrompt{main_index + 1}" return combined_text, output_filename #exit to main with these values
selected_indices = [int(x) - 1 for x in selected_input.split(",")] # Adjust for 0-based indexing if all(0 <= index < len(filenames) for index in selected_indices): # create a list of selected_indices plus one selected_indices_plus_one = [index + 1 for index in selected_indices] # Print the list using f-string for formatting print(f"Documents {selected_indices_plus_one} have been selected") break else: print("Invalid selection. Please enter comma-separated numbers within the available range.") except ValueError: print("Invalid input. Please enter comma-separated numbers.")
#print("Load text from selected documents in specified order") combined_text = "" for index in selected_indices: filename = filenames[index] filepath = os.path.join(input_documents_folder, filename) if filename.lower().endswith(".docx"): # Handle .docx files #print("from docx import Document") from docx import Document # to open and interact with Microsoft Word documents (.docx and potentially .doc files). try: document = Document(filepath) #document = docx.Document(filepath) use only if import docx # Imports the entire docx library combined_text += f"[Document #{index + 1} = {filename}]\n" for paragraph in document.paragraphs: combined_text += f"{paragraph.text}\n" except Exception as e: print(f"Error processing {filename}: {e}") # Handle potential errors
#elif filename.lower().endswith(".doc"): # TYPICALLY CRASHES. Consider using a different library for .doc files #print(f"Cannot handle .doc files directly. Consider using a library like python-docx2txt for .doc files.") elif filename.lower().endswith(".doc"): # Handle .doc files #print("from docx import Document") from docx import Document # to open and interact with Microsoft Word documents (.docx and potentially .doc files). try: document = Document(filepath) combined_text += f"[Document #{index + 1} = {filename}]\n" # Insert document name once for paragraph in document.paragraphs: combined_text = combined_text + f"[{paragraph.text}\n" combined_text += f"{paragraph.text}\n" # Append paragraph text except Exception as e: print(f"Error processing {filename}: {e}") print(f"Attempting to extract text with Textract...") import textract # Import Textract only if needed text = textract.process(filepath).decode('utf-8') combined_text += f"[Document #{index + 1} = {filename} (Textract)]\n" # Indicate Textract usage combined_text += text # Append extract-extracted text
elif filename.lower().endswith(".pdf"): # Handle .pdf files #print("import PyPDF2 # Import PyPDF2 library for PDFs") #libraries like PyPDF2, Camelot, or PDFMiner.six can extract text from PDF documents. import PyPDF2 # Import PyPDF2 library for PDFs try: with open(filepath, 'rb') as pdf_file: # Open PDF in binary mode pdf_reader = PyPDF2.PdfReader(pdf_file) for page_num in range(len(pdf_reader.pages)): # Iterate through pages page_obj = pdf_reader.pages[page_num] text = page_obj.extract_text() # Extract text from each page combined_text += f"[Document #{index + 1} = {filename} - Page {page_num + 1}]\n{text}\n" except Exception as e: print(f"Error processing {filename}: {e}") # Handle potential errors
elif filename.lower().endswith(".html") or filename.lower().endswith(".mhtml"): # Handle .html and .mhtml pages #print("from bs4 import BeautifulSoup") from bs4 import BeautifulSoup # Import BeautifulSoup for HTML parsing try: #with open(filepath, "r") as html_file: # Open HTML file in read mode #to open the file directly in text mode without BeautifulSoup, use 'r' #soup = BeautifulSoup(html_file, "html.parser") # Parse HTML structure with open(filepath, 'rb') as f: soup = BeautifulSoup(f, 'html.parser') combined_text += f"[Document #{index + 1} = {filename}]\n" # Insert document name for paragraph in soup.find_all("p"): # Find all <p> elements (paragraphs) combined_text += f"{paragraph.get_text(strip=True)}\n" # Extract text, strip whitespace except Exception as e: print(f"Error processing {filename}: {e}") # Handle potential errors
elif filename.lower().endswith(".rtf"): # Handle .rtf files print("import pyth") import pyth # for extracting plain text from .rtf files. try: with open(filepath, "r") as rtf_file: rtf_content = rtf_file.read() text = pyth.decode(rtf_content) # Extract text using pyth assumes you've installed pyth using pip install pyth. combined_text += f"[Document #{index + 1} = {filename}]\n{text}\n" except Exception as e: print(f"Error processing {filename}: {e}")
else: try: # Handle other text files (e.g., .txt) with default encoding with open(filepath, 'r', encoding='utf-8') as f: combined_text += f.read() + "\n\n" except UnicodeDecodeError as e: print(f"Error decoding {filename} with 'utf-8' encoding: {e}")
# Generate output filename based on selected filenames #output_filename = "_".join([filenames[i] for i in selected_indices]) # Ensure .txt extension for output filename output_filename = f"_".join([filenames[i] for i in selected_indices]) #+ ".txt" #limit length of output_filename: max_filename_length = 40 # Truncate only if necessary if len(output_filename) > max_filename_length: output_filename = output_filename[:max_filename_length] + "__.txt"
token_count = model.count_tokens(combined_text) print(f"Number of tokens in combined_text: {token_count}") return combined_text, output_filename
model = genai.GenerativeModel('gemini-pro-vision') #MODEL_NAME = "gemini-pro" #model = genai.GenerativeModel(MODEL_NAME) combined_text = f"imagename.jpg" img = PIL.Image.open('image.jpg') #response = model.generate_content(img) #To provide both text and images in a prompt, pass a list containing the strings and images: response = model.generate_content(["Write a short, engaging blog post based on this picture. It should include a description of the meal in the photo and talk about my journey meal prepping.", img], stream=True) response.resolve() # only required because stream=True to_markdown(response.text)
########################################## def Construct_Prompt_and_Response #####################################def Construct_Prompt_and_Response(instructions, combined_text):#def Construct_Prompt_and_Print_Log_Response(combined_text, MODEL_NAME, log_folder, output_responses_folder): #receives instructions and combined_text, sends them as prompt to the generative model, #prints and logs the response, and saves the prompt (instructions only) and response to an output/log file.
# Construct the prompt using both instructions and combined_text prompt = f"{instructions}: {combined_text}" # Send prompt to model to obtain a response response = model.generate_content(prompt) #response.text = f"Gemini AI: {response.text}\n" # Bard says: The error message "can't set attribute 'text'" in the line response.text = f"Gemini AI: {response.text}\n" indicates that the object referred to by response doesn't allow modifying its text property. The error message implies that the text property of the response object is not designed to be changed directly in your code. It might be a read-only property or have internal logic that manages its content. Libraries or models often return objects with specific structures and properties to manage their data internally. Modifying these properties directly within your code can lead to unexpected behavior or errors. #response.text = "Gemini AI: " + response.text + "\n" # line n is for separation when combined below. #Assume the GenerateContentResponse object has a property named text that holds the actual generated content. # Return response return response
#################### def Log_response_and_Save_Instructions_and_Response_to_output_file ##########################def Log_response_and_Save_Instructions_and_Response_to_output_file(instructions, response, output_filename, main_index): #print(f"Gemini Response:\n{response.text}") # for debugging only. print("") # space # Use the globally defined folders global log_folder, output_responses_folder
today = datetime.date.today().strftime("%Y-%m-%d") #print(f"Get today's date: {today}") log_file = os.path.join(log_folder, f"{today}.log")
# Extract input_filename without ".txt" input_files = output_filename.replace(".txt", "")
# Write instructions and response to All-Day Today log file with open(log_file, 'a') as f: f.write(f"Today's date: {today}\n") f.write(f"Input Files: {input_files}\n") f.write(f"Instructions #{main_index + 1}: {instructions}\n") f.write(f"Response #{main_index + 1}: {response.text}\n\n")
# Log instructions and response to named output file #output_filename = # now defined in def load_documents(): output_path = os.path.join(output_responses_folder, output_filename) with open(output_path, 'w') as f: f.write(f"Today's date: {today}\n") f.write(f"Input Files: {input_files}\n") #f.write(f"Instructions: {instructions}\n") #f.write(f"Response: {response.text}") f.write(f"Instructions #{main_index + 1}: {instructions}\n") f.write(f"Response #{main_index + 1}: {response.text}\n\n")
###################################### def MAIN #################################def main(): #Main function to load documents, prompt the user, and generate a response, log and save. global response, main_index # to avoid error: UnboundLocalError: local variable 'response' referenced before assignment #load and select documents.docx combined_text, output_filename = load_and_select_input_documents()
# Prompt user for the number of iterations #num_iterations = int(input("Enter the number of iterations: ")) while True: try: num_iterations_str = input("Enter the number of iterations: ") num_iterations = int(num_iterations_str) # If execution reaches here, the input was a valid integer break # Exit the loop except ValueError: print("Invalid input. Please enter a positive integer.")
# Proceed with further code using num_iterations
# Prompt user for instructions instructions = input("Enter instructions for Gemini model (e.g., summarize, explain key concepts, combine): ") print("") # space
for main_index in range(num_iterations): # Construct prompt, get response, and log/save response = Construct_Prompt_and_Response(instructions, combined_text)
# Print instructions and response print(f"Instructions #{main_index + 1}: {instructions}") # Use 'is None' to check for null response.text and print prompt feedback if applicable if response.text is None: print(f"Response prompt_feedback = {response.prompt_feedback}") else: print(f"Response #{main_index + 1}: {response.text}") if not response.text: # Check if response.text is empty safety_ratings = response.candidates[0].safety_ratings # Access safety ratings from first candidate if any(rating.rating == "BLOCK" for rating in safety_ratings): print(f"Response blocked due to safety concerns: {safety_ratings}") else: print("An error occurred while processing the prompt. Please try again.")
Log_response_and_Save_Instructions_and_Response_to_output_file(instructions, response, output_filename, main_index)
# Re-prompt for instructions for the next iteration (if needed) if main_index + 1 < num_iterations: new_instructions = input("Enter instructions for the next iteration (or press Enter to continue with same instructions): ") if new_instructions: instructions = new_instructions # Assuming you want to use new instructions
############## After Documents, after last "iteration", free form prompts without documents:
Converting the script to use OpenAI API for ChatGPT4
Changes need to be Made:
OpenAI API Integration:
import openai is added to import the OpenAI library.
openai.api_key is set to your OpenAI API key.
The construct_prompt_and_response function now uses openai.Completion.create with the text-davinci-003 engine (you can adjust this) to get the response from ChatGPT4.
Removed Gemini Specific Code:
All code related to Google Gemini Pro API, model selection, and configuration is removed.
Things to Note:
You'll need to replace "YOUR_OPENAI_API_KEY" with your actual OpenAI API key.
You can experiment with different OpenAI engines besides text-davinci-003 to explore various capabilities and performance characteristics.
Adjust the max_tokens parameter in the openai.Completion.create call to control the length of the generated response.
The script maintains the functionality of loading and processing documents, handling user prompts, and logging responses.
Here's the outline of Python script rewritten to utilize the OpenAI API for ChatGPT4 instead of the Google Gemini Pro API:
""""
import os
from docx import Document
import PyPDF2
from bs4 import BeautifulSoup
import pyth
import datetime
import openai
# Set your OpenAI API key
openai.api_key = "YOUR_OPENAI_API_KEY"
# Function to load and select input documents (unchanged)
def load_and_select_input_documents():
# ... (code remains the same as before)
# Function to construct prompt and get response from ChatGPT4
def construct_prompt_and_response(instructions, combined_text):
prompt = f"{instructions}: {combined_text}"
response = openai.Completion.create(
engine="text-davinci-003", # You can experiment with different engines
prompt=prompt,
max_tokens=1024, # Adjust as needed
n=1,
stop=None,
temperature=0.7,
)
return response.choices[0].text.strip()
# Function to log response and save to output file (unchanged)
def log_response_and_save_instructions_and_response_to_output_file(instructions, response, output_filename, main_index):
# ... (code remains the same as before)
# Main function
def main():
global main_index
combined_text, output_filename = load_and_select_input_documents()
while True:
try:
num_iterations_str = input("Enter the number of iterations: ")
num_iterations = int(num_iterations_str)
break
except ValueError:
print("Invalid input. Please enter a positive integer.")
instructions = input("Enter instructions for ChatGPT4 model (e.g., summarize, explain key concepts, combine): ")
print("")
for main_index in range(num_iterations):
response = construct_prompt_and_response(instructions, combined_text)
print(f"Instructions #{main_index + 1}: {instructions}")
print(f"Response #{main_index + 1}: {response}")
log_response_and_save_instructions_and_response_to_output_file(instructions, response, output_filename, main_index)
if main_index + 1 < num_iterations:
new_instructions = input("Enter instructions for the next iteration (or press Enter to continue with same instructions): ")
if new_instructions:
instructions = new_instructions
# Free-form prompts after document processing
combined_text = response # Carry forward the last response
print(" Further prompts for ChatGPT4 model (without documents):")
print("")
while True:
main_index += 1
print(f"END OF CURRENT RESPONSE. The main_index = {main_index}")
instructions = "User: " + input(f"Prompt #{main_index + 1} ")
response = construct_prompt_and_response(instructions, combined_text)
print("")
print(f"Response #{main_index + 1}: {response}")
print("")
combined_text += f"{instructions}.\n Assistant: {response}\n"
if __name__ == "__main__":
main()