File size: 3,492 Bytes
411ca77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from openai import OpenAI
import pdf2image
import base64
import os
import time
from config import openai_api

client = OpenAI(api_key=openai_api)

    

def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')



def pdf_to_image(path_to_pdf, get_output_in_code = False):
    paths_to_img = []
    print("Converting pdf to img")
    start_time = time.time()
    images = pdf2image.convert_from_path(path_to_pdf, dpi=100)
    end_time = time.time()
    execution_time = end_time - start_time
    print("Conversion complete")
    print("Execution time: {:.2f} seconds".format(execution_time))
    os.makedirs(path_to_pdf.replace(".pdf", ""), exist_ok=True)
    save_path = path_to_pdf.replace(".pdf", "") + "/png/"
    print("Creating repository to store images")
    os.makedirs(save_path, exist_ok=True)
    print("Directory created : ", save_path)
    for i, image in enumerate(images):
        print(f"saving page {i} in {save_path}/{i}_page.png")
        image.save(f"{save_path}{i}_page.png", "PNG")
        paths_to_img.append(f"{save_path}/{i}_page.png")
    if get_output_in_code:
        return images, paths_to_img

def pdfs_folder_to_images(input_path, get_output_in_code = False):
    pdf_files = []
    images = {}
    for root, dirs, files in os.walk(input_path):
        for file in files:
            if file.endswith('.pdf'):
                print("FILE IS ", os.path.join(root, file))
                pdf_files.append(os.path.join(root, file))
                if get_output_in_code:
                    images[os.path.join(root, file)] = pdf_to_image(os.path.join(root, file), get_output_in_code=True)
                else:
                    pdf_to_image(os.path.join(root, file))
                    
    if get_output_in_code:
        return images
    
    
def img_to_txt(img):
    response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
        "role": "system",
        "content": "Your task is to transcribe and explain in English every single thing from screenshots sent by users"
        },
        {
        "role": "user",
        "content": [
            {
            "type": "image_url",
            "image_url": {
            "url": f"data:image/jpeg;base64,{img}",
            }
            }
        ]
        }
    ],
    temperature=1,
    max_tokens=1999,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
    ).choices[0].message.content
    return response


def img_to_txt_gemini(img):
    return ""

def process_pdf_hq(path, get_output_in_code=True):
    converted_pdf_router = pdfs_folder_to_images(path, get_output_in_code=True)
    path_extracted_pdf = path+"/extracted_pdf/"
    os.makedirs(path_extracted_pdf, exist_ok=True)
    # paths_to_img
    content_extracted = {}
    for link in list(converted_pdf_router.keys()):
        print("Working on ", link)
        content_extracted[link] = []
        
        for img_path in converted_pdf_router[link][1]:
            print("Processing subimage")
            base64_image = encode_image(img_path)
            content = img_to_txt(base64_image)
            # content = "Blank"
            print(img_path)
            content_extracted[link].append(content)
            with open(f"{path_extracted_pdf}/PDF_FILE_{img_path.replace('/','_').replace('.','_')}.txt", "w") as fil:
                fil.write(content)
    if get_output_in_code:
        return content_extracted