Nirav-Khanpara commited on
Commit
4ad1f12
1 Parent(s): 9ed0ab0

Upload 2 files

Browse files
Files changed (2) hide show
  1. requirements.txt +0 -0
  2. scan_pdf_parser.py +10 -0
requirements.txt ADDED
Binary file (4.65 kB). View file
 
scan_pdf_parser.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytesseract
2
+ from pdf2image import convert_from_path
3
+
4
+
5
+ def get_text_from_scanned_pdf(pdf_path):
6
+ text = ''
7
+ images = convert_from_path(pdf_path)
8
+ for img in images:
9
+ text += pytesseract.image_to_string(img)
10
+ return text