from openai import OpenAI import pdf2image import base64 import os import time from config import openai_api client = OpenAI(api_key=openai_api) def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def pdf_to_image(path_to_pdf, get_output_in_code = False): paths_to_img = [] print("Converting pdf to img") start_time = time.time() images = pdf2image.convert_from_path(path_to_pdf, dpi=100) end_time = time.time() execution_time = end_time - start_time print("Conversion complete") print("Execution time: {:.2f} seconds".format(execution_time)) os.makedirs(path_to_pdf.replace(".pdf", ""), exist_ok=True) save_path = path_to_pdf.replace(".pdf", "") + "/png/" print("Creating repository to store images") os.makedirs(save_path, exist_ok=True) print("Directory created : ", save_path) for i, image in enumerate(images): print(f"saving page {i} in {save_path}/{i}_page.png") image.save(f"{save_path}{i}_page.png", "PNG") paths_to_img.append(f"{save_path}/{i}_page.png") if get_output_in_code: return images, paths_to_img def pdfs_folder_to_images(input_path, get_output_in_code = False): pdf_files = [] images = {} for root, dirs, files in os.walk(input_path): for file in files: if file.endswith('.pdf'): print("FILE IS ", os.path.join(root, file)) pdf_files.append(os.path.join(root, file)) if get_output_in_code: images[os.path.join(root, file)] = pdf_to_image(os.path.join(root, file), get_output_in_code=True) else: pdf_to_image(os.path.join(root, file)) if get_output_in_code: return images def img_to_txt(img): response = client.chat.completions.create( model="gpt-4o", messages=[ { "role": "system", "content": "Your task is to transcribe and explain in English every single thing from screenshots sent by users" }, { "role": "user", "content": [ { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{img}", } } ] } ], temperature=1, max_tokens=1999, top_p=1, frequency_penalty=0, presence_penalty=0 ).choices[0].message.content return response def img_to_txt_gemini(img): return "" def process_pdf_hq(path, get_output_in_code=True): converted_pdf_router = pdfs_folder_to_images(path, get_output_in_code=True) path_extracted_pdf = path+"/extracted_pdf/" os.makedirs(path_extracted_pdf, exist_ok=True) # paths_to_img content_extracted = {} for link in list(converted_pdf_router.keys()): print("Working on ", link) content_extracted[link] = [] for img_path in converted_pdf_router[link][1]: print("Processing subimage") base64_image = encode_image(img_path) content = img_to_txt(base64_image) # content = "Blank" print(img_path) content_extracted[link].append(content) with open(f"{path_extracted_pdf}/PDF_FILE_{img_path.replace('/','_').replace('.','_')}.txt", "w") as fil: fil.write(content) if get_output_in_code: return content_extracted