Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -37,7 +37,7 @@ def extract_text_from_images(image_paths):
|
|
37 |
|
38 |
# Step 3: Rebuild the PDF with Extracted Text
|
39 |
def rebuild_pdf(original_pdf_path, text_data, output_pdf_path):
|
40 |
-
"""Overlay extracted text onto the original PDF with enhanced alignment."""
|
41 |
with fitz.open(original_pdf_path) as pdf:
|
42 |
output_pdf = fitz.open()
|
43 |
|
@@ -52,25 +52,26 @@ def rebuild_pdf(original_pdf_path, text_data, output_pdf_path):
|
|
52 |
for bbox, text, conf in text_data[page_num]:
|
53 |
if conf > 0.6: # Confidence threshold
|
54 |
(x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
adjusted_rect = fitz.Rect(adjusted_x, adjusted_y, x_max, y_max)
|
61 |
|
|
|
62 |
new_page.insert_textbox(
|
63 |
adjusted_rect, # Use adjusted bounding box
|
64 |
text,
|
65 |
-
fontsize=10, # Adjust font size
|
66 |
-
fontname="helv", # Use Helvetica for compatibility
|
67 |
-
|
|
|
68 |
)
|
69 |
|
70 |
# Add original diagrams and graphics
|
71 |
new_page.show_pdf_page(page.rect, pdf, page_num)
|
72 |
|
73 |
-
# Save rebuilt PDF with
|
74 |
output_pdf.save(output_pdf_path, garbage=4)
|
75 |
|
76 |
|
|
|
37 |
|
38 |
# Step 3: Rebuild the PDF with Extracted Text
|
39 |
def rebuild_pdf(original_pdf_path, text_data, output_pdf_path):
|
40 |
+
"""Overlay extracted text onto the original PDF with enhanced alignment and font embedding."""
|
41 |
with fitz.open(original_pdf_path) as pdf:
|
42 |
output_pdf = fitz.open()
|
43 |
|
|
|
52 |
for bbox, text, conf in text_data[page_num]:
|
53 |
if conf > 0.6: # Confidence threshold
|
54 |
(x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
|
55 |
+
|
56 |
+
# Apply dynamic alignment adjustments
|
57 |
+
adjusted_x_min = x_min + 2 # Shift horizontally
|
58 |
+
adjusted_y_min = y_min + 3 # Shift vertically
|
59 |
+
adjusted_rect = fitz.Rect(adjusted_x_min, adjusted_y_min, x_max, y_max)
|
|
|
60 |
|
61 |
+
# Insert text with font embedding
|
62 |
new_page.insert_textbox(
|
63 |
adjusted_rect, # Use adjusted bounding box
|
64 |
text,
|
65 |
+
fontsize=10, # Adjust font size for better fit
|
66 |
+
fontname="helv", # Use Helvetica for better compatibility
|
67 |
+
fontfile="path/to/font.ttf", # Embed font (replace with actual font path)
|
68 |
+
color=(0, 0, 0), # Black text
|
69 |
)
|
70 |
|
71 |
# Add original diagrams and graphics
|
72 |
new_page.show_pdf_page(page.rect, pdf, page_num)
|
73 |
|
74 |
+
# Save rebuilt PDF with optimized structure
|
75 |
output_pdf.save(output_pdf_path, garbage=4)
|
76 |
|
77 |
|