File size: 4,847 Bytes
cd3a196
4a27668
cd3a196
4a27668
 
 
 
 
 
 
 
 
 
 
cd3a196
4a27668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd3a196
4a27668
cd3a196
fefc92e
 
4a27668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fefc92e
4a27668
 
 
 
 
 
 
 
 
 
 
 
fefc92e
cd3a196
 
 
 
 
 
 
 
 
 
4a27668
cd3a196
 
819daad
747b69f
cd3a196
 
 
 
 
747b69f
cd3a196
747b69f
cd3a196
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import gradio as gr
from pptx import Presentation
from pptx.util import Pt, Inches
from pptx.shapes.group import GroupShape
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
from pptx.shapes.picture import Picture
import json
import os
from PIL import Image
import io

def print_json(item):
    item_json = json.dumps(item, ensure_ascii=False, indent=4)
    return item_json

def transfer_textbox_content_in_group(group_shape):
    """Edit the content of text boxes within a group shape."""
    group_shape_item = {}
    for l, shape in enumerate(group_shape.shapes):
        shape_item = {}
        if shape.has_text_frame:
            shape_item['type'] = "text"
            shape_item['location'] = (shape.left, shape.top)
            text_frame = shape.text_frame
            for r, paragraph in enumerate(text_frame.paragraphs):
                original_run = paragraph.runs[0]
                paragraph_item = {}
                paragraph_item['text'] = paragraph.text
                paragraph_item['align'] = paragraph.alignment
                font_item = {}
                font_item['name'] = original_run.font.name
                font_item['bold'] = original_run.font.bold
                font_item['italic'] = original_run.font.italic
                font_item['underline'] = original_run.font.underline
                font_item['color'] = original_run.font.color.rgb
                font_item['language_id'] = original_run.font.language_id
                paragraph_item['font'] = font_item
                shape_item[f'paragraph_{r}'] = paragraph_item
        group_shape_item[f"shape_{l}"] = shape_item
    return group_shape_item

def transfer_to_structure(pptx_file, images_dir_path):
    item = {}
    prs = Presentation(pptx_file)

    image_path_list = []
    
    # Iterate through each slide in the presentation
    for i, slide in enumerate(prs.slides):
        # Iterate through each shape in the slide
        slide_item = {}
        for j, shape in enumerate(slide.shapes):
            # If the shape is a group, process its shapes
            shape_item = {}
            if shape.has_text_frame:
                text_frame = shape.text_frame
                for paragraph in text_frame.paragraphs:
                    # Clear the existing text but keep the paragraph
                    paragraph.clear()
                    # Add a new run with the new content and copy font style
                    run = paragraph.add_run()
                    run.text = new_content
                    if paragraph.runs:
                        original_run = paragraph.runs[0]
                        copy_font_style(original_run, run)
            elif isinstance(shape, GroupShape):
                shape_item['type'] = "group"
                group_shape_item = transfer_textbox_content_in_group(shape)
                shape_item['group_content'] = group_shape_item
                pass
            elif isinstance(shape, Picture):
                shape_item['type'] = "picture"
                image_path = os.path.join(images_dir_path, f"picture_{j}.png")
                image_path_list.append(image_path)
                shape_item['image_path'] = image_path
                shape_item['size'] = shape.image.size # width, height
                shape_item['dpi'] = shape.image.dpi # (horz_dpi, vert_dpi) 
                shape_item['location'] = (shape.left, shape.top)
                shape_item['location_inches'] = (Inches(shape.left).inches, Inches(shape.top).inches)
                image_stream = io.BytesIO(shape.image.blob)
                shape_image = Image.open(image_stream)
                shape_image.save(image_path)
                pass
            slide_item[f"shape_{j}"] = shape_item
        item[f"slide_{i}"] = slide_item
                
    return print_json(item), image_path_list

def copy_font_style(original_run, new_run):
    new_run.font.name = original_run.font.name
    new_run.font.bold = original_run.font.bold
    new_run.font.italic = original_run.font.italic
    new_run.font.underline = original_run.font.underline
    new_run.font.color.rgb = original_run.font.color.rgb
    new_run.font.language_id = original_run.font.language_id

def process_pptx(pptx_file):
    images_dir_path = "images"
    if not os.path.exists(images_dir_path):
        os.makedirs(images_dir_path)
    json_output, image_paths = transfer_to_structure(pptx_file.name, images_dir_path)
    return json_output, image_paths

# Gradio interface
iface = gr.Interface(
    fn=process_pptx,
    inputs=gr.File(label="Upload PowerPoint File"),
    outputs=[gr.Textbox(label="JSON Output"), gr.Gallery(label="Extracted Images")],
    title="PowerPoint to JSON Converter",
    description="Upload a PowerPoint file to convert its structure to JSON and display extracted images."
)

iface.launch()