Update app.py
Browse files
app.py
CHANGED
@@ -34,6 +34,8 @@ from PyPDF2 import PdfFileReader
|
|
34 |
from pdf2image import convert_from_bytes
|
35 |
import pdfplumber
|
36 |
from line_cor import mark_region
|
|
|
|
|
37 |
|
38 |
# NLP Pkgs
|
39 |
from textblob import TextBlob
|
@@ -46,8 +48,9 @@ import pytesseract
|
|
46 |
|
47 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
48 |
from PIL import Image
|
|
|
49 |
def read_pdf(file):
|
50 |
-
images=pdf2image.convert_from_bytes(file.read())
|
51 |
#pdfReader = PdfFileReader(file)
|
52 |
#count = pdfReader.numPages
|
53 |
all_page_text = ""
|
|
|
34 |
from pdf2image import convert_from_bytes
|
35 |
import pdfplumber
|
36 |
from line_cor import mark_region
|
37 |
+
import pdf2image
|
38 |
+
|
39 |
|
40 |
# NLP Pkgs
|
41 |
from textblob import TextBlob
|
|
|
48 |
|
49 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
50 |
from PIL import Image
|
51 |
+
@st.experimental_singleton
|
52 |
def read_pdf(file):
|
53 |
+
images=pdf2image.convert_from_bytes(file.read(),"rb")
|
54 |
#pdfReader = PdfFileReader(file)
|
55 |
#count = pdfReader.numPages
|
56 |
all_page_text = ""
|