Spaces:
Sleeping
Sleeping
from openai import OpenAI | |
import pdf2image | |
import base64 | |
import os | |
import time | |
from config import openai_api | |
client = OpenAI(api_key=openai_api) | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
def pdf_to_image(path_to_pdf, get_output_in_code = False): | |
paths_to_img = [] | |
print("Converting pdf to img") | |
start_time = time.time() | |
images = pdf2image.convert_from_path(path_to_pdf, dpi=100) | |
end_time = time.time() | |
execution_time = end_time - start_time | |
print("Conversion complete") | |
print("Execution time: {:.2f} seconds".format(execution_time)) | |
os.makedirs(path_to_pdf.replace(".pdf", ""), exist_ok=True) | |
save_path = path_to_pdf.replace(".pdf", "") + "/png/" | |
print("Creating repository to store images") | |
os.makedirs(save_path, exist_ok=True) | |
print("Directory created : ", save_path) | |
for i, image in enumerate(images): | |
print(f"saving page {i} in {save_path}/{i}_page.png") | |
image.save(f"{save_path}{i}_page.png", "PNG") | |
paths_to_img.append(f"{save_path}/{i}_page.png") | |
if get_output_in_code: | |
return images, paths_to_img | |
def pdfs_folder_to_images(input_path, get_output_in_code = False): | |
pdf_files = [] | |
images = {} | |
for root, dirs, files in os.walk(input_path): | |
for file in files: | |
if file.endswith('.pdf'): | |
print("FILE IS ", os.path.join(root, file)) | |
pdf_files.append(os.path.join(root, file)) | |
if get_output_in_code: | |
images[os.path.join(root, file)] = pdf_to_image(os.path.join(root, file), get_output_in_code=True) | |
else: | |
pdf_to_image(os.path.join(root, file)) | |
if get_output_in_code: | |
return images | |
def img_to_txt(img): | |
response = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{ | |
"role": "system", | |
"content": "Your task is to transcribe and explain in English every single thing from screenshots sent by users" | |
}, | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{img}", | |
} | |
} | |
] | |
} | |
], | |
temperature=1, | |
max_tokens=1999, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0 | |
).choices[0].message.content | |
return response | |
def img_to_txt_gemini(img): | |
return "" | |
def process_pdf_hq(path, get_output_in_code=True): | |
converted_pdf_router = pdfs_folder_to_images(path, get_output_in_code=True) | |
path_extracted_pdf = path+"/extracted_pdf/" | |
os.makedirs(path_extracted_pdf, exist_ok=True) | |
# paths_to_img | |
content_extracted = {} | |
for link in list(converted_pdf_router.keys()): | |
print("Working on ", link) | |
content_extracted[link] = [] | |
for img_path in converted_pdf_router[link][1]: | |
print("Processing subimage") | |
base64_image = encode_image(img_path) | |
content = img_to_txt(base64_image) | |
# content = "Blank" | |
print(img_path) | |
content_extracted[link].append(content) | |
with open(f"{path_extracted_pdf}/PDF_FILE_{img_path.replace('/','_').replace('.','_')}.txt", "w") as fil: | |
fil.write(content) | |
if get_output_in_code: | |
return content_extracted |