paper-slides-summary / pptx_utils.py
zolicsaki's picture
Upload 7 files
57d4532 verified
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.util import Inches
from pptx.enum.text import PP_ALIGN, MSO_ANCHOR
from pptx.util import Pt
import string
from datetime import datetime
import os
import re
def clean_leading_numbering(text):
# Remove leading numbering like: "1. ", "1) ", "(1) ", "- 1. ", etc.
return re.sub(r'^[\s\(\-\.\d\)]*', '', text)
def is_logo_exist(file_path: str):
print(file_path)
if os.path.exists(file_path):
# print("File exists.")
return True
else:
print("File does not exist.")
return False
class Dict2PPT:
def __init__(self, logo_path: str = 'logo.png', title_size: int = 32, content_size: int=24) -> None:
self.title_font_size = Pt(title_size)
self.content_font_size = Pt(content_size)
self.logo_path = logo_path
self.prs = Presentation()
def _title_preprocess(self, title: str):
words = title.split()
capitalized_words = [word.capitalize() for word in words]
result = ' '.join(capitalized_words)
return result
def _add_time_footnote(self, slide):
# Get slide dimensions
slide_width = self.prs.slide_width
slide_height = self.prs.slide_height
# Prepare date text
date_str = datetime.today().strftime("%B %d, %Y") # e.g., March 26, 2025
# Set textbox size
textbox_width = Inches(3) # You can adjust this
textbox_height = Inches(0.3)
left = (slide_width - textbox_width) / 2 # Center horizontally
top = slide_height - Inches(0.5) # Near bottom
textbox = slide.shapes.add_textbox(left, top, textbox_width, textbox_height)
text_frame = textbox.text_frame
p = text_frame.paragraphs[0]
run = p.add_run()
run.text = date_str
run.font.size = Pt(12)
p.alignment = PP_ALIGN.CENTER # βœ… Center text horizontally
def _add_logo(self, slide):
# Define logo path and size
# logo_path = "logo.png" # Replace with your actual logo path
if not is_logo_exist(file_path=self.logo_path):
return
logo_width = Inches(1.0) # Resize logo as needed
logo_height = Inches(1.0)
# Calculate position for top-right corner
slide_width = self.prs.slide_width
right_margin = Inches(0.2) # Optional small margin from edge
top = Inches(0.2)
# Position: from right edge minus logo width
left = slide_width - logo_width - right_margin
# Add logo
slide.shapes.add_picture(self.logo_path, left, top, width=logo_width, height=logo_height)
def _set_background_color(self, slide):
fill = slide.background.fill
fill.solid() # Use solid color
fill.fore_color.rgb = RGBColor(240, 248, 255) # RGB for a light blue
def title_slide(self, title: str, authors: str):
title_slide_layout = self.prs.slide_layouts[0] # Title Slide
slide = self.prs.slides.add_slide(title_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
title_shape = slide.shapes.title
title_shape.text = title
title_paragraph = title_shape.text_frame.paragraphs[0]
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 44 pt
author_shape = slide.placeholders[1]
today = datetime.today().strftime("%B %d, %Y") # e.g., March 25, 2025
# print('authors', authors)
# if authors:
# author_shape.text = '\n' + authors + '\n' + today
# else:
# author_shape.text = '\nAuthor Here\n' + today
author_shape.text = '\nAuthor Here\n' + today
# Set subtitle font size
author_paragraph = author_shape.text_frame.paragraphs[1] # 0 is blank line, 1 is actual text
for run in author_paragraph.runs:
run.font.name = 'Times New Roman'
run.font.size = Pt(24) # Set subtitle font size to 28 pt
def outline_slide(self, outline: dict):
content_slide_layout = self.prs.slide_layouts[1] # title and Content Slide Layout
slide = self.prs.slides.add_slide(content_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
title_shape = slide.shapes.title
title_shape.text = 'Outline'
title_paragraph = title_shape.text_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 36 pt
# Clear existing content
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# Add topic
for topic, desc in outline.items():
p1 = text_frame.add_paragraph()
p1.text = topic
p1.level = 0
p1.font.size = Pt(20)
p1.font.name = 'Times New Roman'
p1.font.bold = True
p1.alignment = PP_ALIGN.LEFT
# Line 2: description (indented)
if len(desc) > 0:
p2 = text_frame.add_paragraph()
p2.text = desc
p2.level = 1 # Indented bullet
p2.font.size = Pt(12)
p2.font.name = 'Times New Roman'
p2.alignment = PP_ALIGN.LEFT
self._add_time_footnote(slide=slide)
def _outline_preprocess_(self, outline):
if isinstance(outline, dict):
clean_outline = {}
for topic, desc in outline.items():
topic = topic.strip().strip(string.punctuation).strip()
desc = desc.strip().strip(string.punctuation).strip()
clean_outline[topic] = desc
return clean_outline
elif isinstance(outline, str):
sentences = outline.split('\n')
sentences = [text.strip().strip(string.punctuation).strip() for text in sentences]
sent_dict = {}
for sent in sentences:
tokens = sent.split(':')
if len(tokens) == 1:
sent_dict[tokens[0]] = ''
else:
key = tokens[0].strip().strip(string.punctuation).strip()
value = ''.join(tokens[1:])
value = value.strip().strip(string.punctuation).strip()
sent_dict[key] = value
return sent_dict
else:
print('Wrong format')
return {}
def _background_preprocess(self, background: str):
background_array = []
sentences = background.strip().splitlines()
for sent in sentences:
sent = clean_leading_numbering(sent)
background_array.append(sent.strip().strip(string.punctuation).strip())
return background_array
def background_slide(self, background):
content_slide_layout = self.prs.slide_layouts[1] # title and Content Slide Layout
slide = self.prs.slides.add_slide(content_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
title_shape = slide.shapes.title
title_shape.text = 'Background'
title_paragraph = title_shape.text_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 36 pt
# Clear existing content
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# βœ… Vertically center content inside the placeholder
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
# Add topic + indented description as two lines
background_item_num = len(background)
fontsize = 22
if background_item_num >= 4 and background_item_num <= 6:
fontsize = 20
elif background_item_num >7:
fontsize = 18
for idx, topic in enumerate(background, start=1):
p1 = text_frame.add_paragraph()
p1.text = f"{idx}. {topic}"
p1.level = 0
p1.font.size = Pt(fontsize)
p1.font.name = 'Times New Roman'
# p1.font.bold = True
p1.alignment = PP_ALIGN.LEFT
self._add_time_footnote(slide=slide)
def _problem_define_preprocess(self, problem_desc: str):
from collections import OrderedDict
def split_text_by_headers(text, headers):
sections = OrderedDict({header: [] for header in headers})
current = None
for line in text.strip().strip(string.punctuation).splitlines():
line_clean = line.strip().strip(string.punctuation).strip()
if len(line_clean) == 0:
continue
# Check if line matches any of the section headers
matched = [h for h in headers if h.lower() == line_clean.lower()]
if matched:
current = matched[0]
continue
if current:
cleaned_line = clean_leading_numbering(text=line_clean)
cleaned_line = cleaned_line.strip().strip(string.punctuation).strip()
sections[current].append(cleaned_line)
# Convert lists to joined text blocks
return {k: v for k, v in sections.items()}
sections = ["Scope", "Challenges", "Assumptions", "Relevance"]
problem_dict = {}
if any([_ in problem_desc for _ in sections]):
problem_dict = split_text_by_headers(text=problem_desc, headers=sections)
if all([len(v)==0 for k, v in problem_dict.items()]) or len(problem_dict) == 0:
problem_dict = {}
cleaned_sentences = []
sentences = problem_desc.strip().strip(string.punctuation).splitlines()
for sent in sentences:
cleaned_line = clean_leading_numbering(text=sent)
cleaned_line = cleaned_line.strip().strip(string.punctuation).strip()
cleaned_sentences.append(cleaned_line)
problem_dict['Scope'] = cleaned_sentences
return problem_dict
def problem_def_slide(self, problems):
sections = ["Scope", "Challenges", "Assumptions", "Relevance"]
scope = problems.get('Scope', [])
challenges = problems.get('Challenges', [])
assumptions = problems.get('Assumptions', [])
relevance = problems.get('Relevance', [])
for sect_name in sections:
section_contents = problems.get(sect_name, [])
if len(section_contents) == 0:
continue
content_slide_layout = self.prs.slide_layouts[1] # title and Content Slide Layout
slide = self.prs.slides.add_slide(content_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
title_shape = slide.shapes.title
if sect_name == 'Scope':
title_shape.text = 'Problem Definition'
elif sect_name in {'Challenges', 'Assumptions'}:
title_shape.text = 'Problem Definition - {}'.format(sect_name)
else:
title_shape.text = 'Interested Practitioners'
title_paragraph = title_shape.text_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 36 pt
# Clear existing content
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# βœ… Vertically center content inside the placeholder
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
fontsize = 20
for idx, topic in enumerate(section_contents, start=1):
p1 = text_frame.add_paragraph()
p1.text = f"{idx}. {topic}"
p1.level = 0
p1.font.size = Pt(fontsize)
p1.font.name = 'Times New Roman'
# p1.font.bold = True
p1.alignment = PP_ALIGN.LEFT
self._add_time_footnote(slide=slide)
def _objective_preprocess(self, objective: str):
objective_array = []
sentences = objective.strip().splitlines()
for sent in sentences:
sent = clean_leading_numbering(text=sent)
objective_array.append(sent.strip().strip(string.punctuation).strip())
return objective_array
def objective_slide(self, objectives):
content_slide_layout = self.prs.slide_layouts[1] # title and Content Slide Layout
slide = self.prs.slides.add_slide(content_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
title_shape = slide.shapes.title
title_shape.text = 'Objectives & How'
title_paragraph = title_shape.text_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 36 pt
# Clear existing content
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# βœ… Vertically center content inside the placeholder
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
objective_item_num = len(objectives)
fontsize = 24
if objective_item_num >= 4 and objective_item_num <= 6:
fontsize = 22
elif objective_item_num >7:
fontsize = 20
for idx, topic in enumerate(objectives, start=1):
p1 = text_frame.add_paragraph()
p1.text = f"{idx}. {topic}"
p1.level = 0
p1.font.size = Pt(fontsize)
p1.font.name = 'Times New Roman'
# p1.font.bold = True
p1.alignment = PP_ALIGN.LEFT
self._add_time_footnote(slide=slide)
def _method_preprocess(self, methodology: str):
method_array = []
sentences = methodology.strip().splitlines()
for sent in sentences:
sent_trim = clean_leading_numbering(text=sent)
sent_trim = sent_trim.strip().strip(string.punctuation).strip()
method_array.append(sent_trim)
return method_array
def method_slide(self, methods):
content_slide_layout = self.prs.slide_layouts[1] # title and Content Slide Layout
slide = self.prs.slides.add_slide(content_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
title_shape = slide.shapes.title
title_shape.text = 'Proposed Method'
title_paragraph = title_shape.text_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 36 pt
# Clear existing content
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# βœ… Vertically center content inside the placeholder
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
fontsize = 20
for idx, step in enumerate(methods, start=1):
p = text_frame.add_paragraph()
run1 = p.add_run()
run1.text = "Step {}. ".format(idx)
run1.font.bold = True
run1.font.size = Pt(fontsize)
# Second run: normal text
run2 = p.add_run()
run2.text = step
run2.font.bold = False
run2.font.size = Pt(fontsize)
p.font.name = 'Times New Roman'
p.alignment = PP_ALIGN.LEFT
self._add_time_footnote(slide=slide)
def _experiment_preprocess(self, experiment: str):
def split_sections_by_keywords(text: str, keyword1: str, keyword2: str) -> dict:
lines = text.strip().splitlines()
part1_lines = []
part2_lines = []
current_section = None
for line in lines:
stripped = clean_leading_numbering(line)
stripped = stripped.strip().strip(string.punctuation).strip()
if len(stripped) == 0:
continue
if keyword1 in stripped:
current_section = keyword1
continue
elif keyword2 in stripped:
current_section = keyword2
continue
if current_section == keyword1:
tokens = stripped.split(':')
key = tokens[0].strip().strip(string.punctuation).strip()
if len(tokens) > 1:
parse_stripped = key + ": " + ':'.join(tokens[1:]).strip().strip(string.punctuation).strip()
else:
parse_stripped = key
part1_lines.append(parse_stripped)
elif current_section == keyword2:
tokens = stripped.split(':')
key = tokens[0].strip().strip(string.punctuation).strip()
if len(tokens) > 1:
parse_stripped = (key, ':'.join(tokens[1:]))
else:
parse_stripped = (key, '')
part2_lines.append(parse_stripped)
return {
keyword1: part1_lines,
keyword2: part2_lines
}
experiment_dict = {}
sentences = experiment.strip().splitlines()
evidence_keyword = 'Evidence Summary'
exp_summary_keyword = 'Experimental Summary'
if (evidence_keyword in experiment) and (exp_summary_keyword in experiment):
experiment_dict = split_sections_by_keywords(text=experiment, keyword1=evidence_keyword, keyword2=exp_summary_keyword)
else:
experiment_array = []
for sent in sentences:
sent = clean_leading_numbering(sent)
sent = sent.strip().strip(string.punctuation).strip()
experiment_array.append(sent)
experiment_dict[exp_summary_keyword] = experiment_array
return experiment_dict
def experiment_slide(self, experiments):
evidence_keyword = 'Evidence Summary'
exp_summary_keyword = 'Experimental Summary'
if len(experiments) == 1:
experiments_part1 = experiments[exp_summary_keyword]
experiments_part2 = []
else:
assert len(experiments) == 2
experiments_part1 = experiments[exp_summary_keyword]
experiments_part2 = experiments[evidence_keyword]
content_slide_layout = self.prs.slide_layouts[1] # title and Content Slide Layout
slide = self.prs.slides.add_slide(content_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
title_shape = slide.shapes.title
title_shape.text = 'Experimental Study'
title_paragraph = title_shape.text_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 36 pt
# Clear existing content
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# βœ… Vertically center content inside the placeholder
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
fontsize = 20
if len(experiments_part2) == 0:
for idx, sent in enumerate(experiments_part1, start=1):
p1 = text_frame.add_paragraph()
p1.text = f"{idx}. {sent}"
p1.level = 0
p1.font.size = Pt(fontsize)
p1.font.name = 'Times New Roman'
p1.font.bold = True
p1.alignment = PP_ALIGN.LEFT
else:
for idx, step in enumerate(experiments_part1, start=1):
key, value = step
if len(value) == 0:
continue
p = text_frame.add_paragraph()
run1 = p.add_run()
run1.text = key
run1.font.bold = True
run1.font.size = Pt(fontsize)
# Second run: normal text
run2 = p.add_run()
run2.text = value
run2.font.bold = False
run2.font.size = Pt(fontsize)
p.font.name = 'Times New Roman'
p.alignment = PP_ALIGN.LEFT
self._add_time_footnote(slide=slide)
###experimental study in multiple pages
if len(experiments_part2) > 0:
content_slide_layout = self.prs.slide_layouts[1] # title and Content Slide Layout
slide_2 = self.prs.slides.add_slide(content_slide_layout)
self._set_background_color(slide=slide_2)
self._add_logo(slide=slide_2)
title_shape = slide_2.shapes.title
title_shape.text = 'Experimental Study (Summary)'
title_paragraph = title_shape.text_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 36 pt
# Clear existing content
content_shape = slide_2.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# βœ… Vertically center content inside the placeholder
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
self._add_time_footnote(slide=slide_2)
for idx, sent in enumerate(experiments_part2, start=1):
p1 = text_frame.add_paragraph()
p1.text = f"{idx}. {sent}"
p1.level = 0
p1.font.size = Pt(fontsize)
p1.font.name = 'Times New Roman'
p1.alignment = PP_ALIGN.LEFT
def _conclusion_preprocess(self, conclusion: str):
conclusion_dict = {}
sentences = conclusion.strip().splitlines()
for sent in sentences:
trim_sent = sent.strip().strip(string.punctuation).strip()
trim_sent = clean_leading_numbering(text=trim_sent)
if len(trim_sent) == 0 or trim_sent.lower().startswith('conclusion'):
continue
else:
tokens = trim_sent.split(':')
key = tokens[0].strip().strip(string.punctuation).strip()
if len(tokens) == 1:
conclusion_dict[key] = ''
else:
value = ':'.join(tokens[1:]).strip().strip(string.punctuation).strip()
conclusion_dict[key] = value
return conclusion_dict
def conclusion_slide(self, conclusion):
content_slide_layout = self.prs.slide_layouts[1] # title and Content Slide Layout
slide = self.prs.slides.add_slide(content_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
title_shape = slide.shapes.title
title_shape.text = 'Conclusions & Future Work'
title_paragraph = title_shape.text_frame.paragraphs[0]
title_paragraph.alignment = PP_ALIGN.LEFT
for run in title_paragraph.runs:
run.font.bold = True
run.font.name = 'Times New Roman'
run.font.size = Pt(36) # e.g., 36 pt
# Clear existing content
content_shape = slide.placeholders[1]
text_frame = content_shape.text_frame
text_frame.clear()
# βœ… Vertically center content inside the placeholder
text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
# Add topic
for topic, desc in conclusion.items():
if len(desc) == 0:
continue
p1 = text_frame.add_paragraph()
p1.text = topic
p1.level = 0
p1.font.size = Pt(20)
p1.font.name = 'Times New Roman'
p1.font.bold = True
p1.alignment = PP_ALIGN.LEFT
# Line 2: description (indented)
p2 = text_frame.add_paragraph()
p2.text = desc
p2.level = 1 # Indented bullet
p2.font.size = Pt(16)
p2.font.italic = True
p2.font.name = 'Times New Roman'
p2.alignment = PP_ALIGN.LEFT
self._add_time_footnote(slide=slide)
def build_slides(self, slide_dict: dict, authors: str = 'Author here'):
title = slide_dict.get('Title', '')
title = self._title_preprocess(title=title)
self.title_slide(title=title, authors=authors)
outline = slide_dict.get('Outline', {})
outline = self._outline_preprocess_(outline=outline)
assert len(outline) > 0, 'No outline detected!!!'
self.outline_slide(outline=outline)
background = slide_dict.get('Background', '')
if background:
background = self._background_preprocess(background=background)
self.background_slide(background=background)
problem_definition = slide_dict.get('Research problem', '')
# print('problem_definition', problem_definition)
if problem_definition:
problems = self._problem_define_preprocess(problem_desc=problem_definition)
# print('problems', problems)
self.problem_def_slide(problems=problems)
objectives = slide_dict.get('Objectives', '')
if objectives:
objectives = self._objective_preprocess(objective=objectives)
self.objective_slide(objectives=objectives)
methodology = slide_dict.get('Methodology', '')
if methodology:
methodology = self._method_preprocess(methodology=methodology)
# print('Method', methodology)
self.method_slide(methods=methodology)
experimental_study = slide_dict.get('Results', '')
if experimental_study:
experiments = self._experiment_preprocess(experiment=experimental_study)
# print('experiments', experiments)
self.experiment_slide(experiments=experiments)
conclusion = slide_dict.get('Conclusions', '')
if conclusion:
conclusion = self._conclusion_preprocess(conclusion=conclusion)
self.conclusion_slide(conclusion=conclusion)
self.qa_slides()
print('Done!!')
def qa_slides(self):
# Add a blank slide (usually layout 6 is blank)
blank_slide_layout = self.prs.slide_layouts[6]
slide = self.prs.slides.add_slide(blank_slide_layout)
self._set_background_color(slide=slide)
self._add_logo(slide=slide)
# Add a textbox in the center
left = Inches(2)
top = Inches(2.5)
width = Inches(6)
height = Inches(2)
textbox = slide.shapes.add_textbox(left, top, width, height)
text_frame = textbox.text_frame
text_frame.clear()
# Add "Thank you"
p1 = text_frame.add_paragraph()
p1.text = "Thank you!"
p1.font.size = Pt(44)
p1.font.bold = True
p1.alignment = PP_ALIGN.CENTER
# Add "Q & A"
p2 = text_frame.add_paragraph()
p2.text = "\nQ & A"
p2.font.size = Pt(36)
p2.alignment = PP_ALIGN.CENTER
self._add_time_footnote(slide=slide)
def save(self, file_name='slides.pptx'):
self.prs.save(file_name)