Update app.py
Browse files
@@ -49,24 +49,31 @@ import pytesseract
49 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
50 |
from PIL import Image
51 |
52 |
def read_pdf(file):
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
# get co-ordinates to cr
63 |
text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
64 |
all_page_text += text + " " #page.extractText()
65 |
return all_page_text
66 |
#def read_pdf_with_pdfplumber(file):
67 |
# with pdfplumber.open(file) as pdf:
68 |
# page = pdf.pages[0]
69 |
# return page.extract_text()
70 |
st.title("Streamlit NLP APP")
71 |
72 |
def text_analyzer(my_text):
49 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
50 |
from PIL import Image
51 |
52 |
# def read_pdf(file):
53 |
# images=pdf2image.convert_from_bytes(file.read(),"rb")
54 |
# #pdfReader = PdfFileReader(file)
55 |
# #count = pdfReader.numPages
56 |
# all_page_text = ""
57 |
# for im in images:
58 |
# #page = pdfReader.getPage(i)
59 |
# img = Image.open(im)
60 |
# img = img.save("img.png")
61 |
# image_name = cv2.imread("img.png")
62 |
# # get co-ordinates to cr
63 |
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
64 |
# all_page_text += text + " " #page.extractText()
65 |
# return all_page_text
66 |
def read_pdf_with_pdfplumber(file):
67 |
with pdfplumber.open(file) as pdf:
68 |
page = pdf.pages[0]
69 |
#return page.extract_text()
70 |
# get co-ordinates to cr
71 |
#img = Image.open(im)
72 |
img = page.save("img.png")
73 |
image_name = cv2.imread("img.png")
74 |
## get co-ordinates to cr
75 |
text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
76 |
all_page_text += text + " " #page.extractText()
77 |
st.title("Streamlit NLP APP")
78 |
79 |
def text_analyzer(my_text):