SuriRaja commited on
Commit
43a0c5f
·
verified ·
1 Parent(s): aff2fb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -37,7 +37,7 @@ def extract_text_from_images(image_paths):
37
 
38
  # Step 3: Rebuild the PDF with Extracted Text
39
  def rebuild_pdf(original_pdf_path, text_data, output_pdf_path):
40
- """Overlay extracted text onto the original PDF with enhanced alignment."""
41
  with fitz.open(original_pdf_path) as pdf:
42
  output_pdf = fitz.open()
43
 
@@ -52,25 +52,26 @@ def rebuild_pdf(original_pdf_path, text_data, output_pdf_path):
52
  for bbox, text, conf in text_data[page_num]:
53
  if conf > 0.6: # Confidence threshold
54
  (x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
55
- rect = fitz.Rect(x_min, y_min, x_max, y_max)
56
-
57
- # Adjust text placement for alignment
58
- adjusted_x = x_min + 2 # Fine-tune as needed
59
- adjusted_y = y_min + 2 # Fine-tune as needed
60
- adjusted_rect = fitz.Rect(adjusted_x, adjusted_y, x_max, y_max)
61
 
 
62
  new_page.insert_textbox(
63
  adjusted_rect, # Use adjusted bounding box
64
  text,
65
- fontsize=10, # Adjust font size as needed
66
- fontname="helv", # Use Helvetica for compatibility
67
- color=(0, 0, 0), # Black color for text
 
68
  )
69
 
70
  # Add original diagrams and graphics
71
  new_page.show_pdf_page(page.rect, pdf, page_num)
72
 
73
- # Save rebuilt PDF with embedded fonts
74
  output_pdf.save(output_pdf_path, garbage=4)
75
 
76
 
 
37
 
38
  # Step 3: Rebuild the PDF with Extracted Text
39
  def rebuild_pdf(original_pdf_path, text_data, output_pdf_path):
40
+ """Overlay extracted text onto the original PDF with enhanced alignment and font embedding."""
41
  with fitz.open(original_pdf_path) as pdf:
42
  output_pdf = fitz.open()
43
 
 
52
  for bbox, text, conf in text_data[page_num]:
53
  if conf > 0.6: # Confidence threshold
54
  (x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
55
+
56
+ # Apply dynamic alignment adjustments
57
+ adjusted_x_min = x_min + 2 # Shift horizontally
58
+ adjusted_y_min = y_min + 3 # Shift vertically
59
+ adjusted_rect = fitz.Rect(adjusted_x_min, adjusted_y_min, x_max, y_max)
 
60
 
61
+ # Insert text with font embedding
62
  new_page.insert_textbox(
63
  adjusted_rect, # Use adjusted bounding box
64
  text,
65
+ fontsize=10, # Adjust font size for better fit
66
+ fontname="helv", # Use Helvetica for better compatibility
67
+ fontfile="path/to/font.ttf", # Embed font (replace with actual font path)
68
+ color=(0, 0, 0), # Black text
69
  )
70
 
71
  # Add original diagrams and graphics
72
  new_page.show_pdf_page(page.rect, pdf, page_num)
73
 
74
+ # Save rebuilt PDF with optimized structure
75
  output_pdf.save(output_pdf_path, garbage=4)
76
 
77