Update app.py
Browse files
app.py
CHANGED
@@ -28,7 +28,10 @@ import os
|
|
28 |
import streamlit as st
|
29 |
import torch
|
30 |
from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
31 |
-
|
|
|
|
|
|
|
32 |
|
33 |
# NLP Pkgs
|
34 |
from textblob import TextBlob
|
@@ -40,6 +43,22 @@ import numpy as np
|
|
40 |
import pytesseract
|
41 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
42 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# Title
|
44 |
if st.button("REFRESH"):
|
45 |
st.experimental_rerun()
|
@@ -81,16 +100,20 @@ def main():
|
|
81 |
st.subheader("Please, feed your image/text, features/services will appear automatically!")
|
82 |
message = st.text_input("Type your text here!")
|
83 |
camera_photo = st.camera_input("Take a photo, Containing English or Bangla texts", on_change=change_photo_state)
|
84 |
-
uploaded_photo = st.file_uploader("Upload Image, Containing English or Bangla texts",type=['jpg','png','jpeg'], on_change=change_photo_state)
|
85 |
if "photo" not in st.session_state:
|
86 |
st.session_state["photo"]="not done"
|
87 |
|
88 |
if st.session_state["photo"]=="done" or message:
|
89 |
if uploaded_photo:
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
94 |
st.success(text)
|
95 |
elif camera_photo:
|
96 |
img = Image.open(camera_photo)
|
|
|
28 |
import streamlit as st
|
29 |
import torch
|
30 |
from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
|
31 |
+
import docx2txt
|
32 |
+
from PIL import Image
|
33 |
+
from PyPDF2 import PdfFileReader
|
34 |
+
import pdfplumber
|
35 |
|
36 |
# NLP Pkgs
|
37 |
from textblob import TextBlob
|
|
|
43 |
import pytesseract
|
44 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
45 |
from PIL import Image
|
46 |
+
def read_pdf(file):
|
47 |
+
pdfReader = PdfFileReader(file)
|
48 |
+
count = pdfReader.numPages
|
49 |
+
all_page_text = ""
|
50 |
+
for i in range(count):
|
51 |
+
page = pdfReader.getPage(i)
|
52 |
+
all_page_text += page.extractText()
|
53 |
+
|
54 |
+
return all_page_text
|
55 |
+
|
56 |
+
#def read_pdf_with_pdfplumber(file):
|
57 |
+
# with pdfplumber.open(file) as pdf:
|
58 |
+
# page = pdf.pages[0]
|
59 |
+
# return page.extract_text()
|
60 |
+
|
61 |
+
|
62 |
# Title
|
63 |
if st.button("REFRESH"):
|
64 |
st.experimental_rerun()
|
|
|
100 |
st.subheader("Please, feed your image/text, features/services will appear automatically!")
|
101 |
message = st.text_input("Type your text here!")
|
102 |
camera_photo = st.camera_input("Take a photo, Containing English or Bangla texts", on_change=change_photo_state)
|
103 |
+
uploaded_photo = st.file_uploader("Upload Image, Containing English or Bangla texts",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state)
|
104 |
if "photo" not in st.session_state:
|
105 |
st.session_state["photo"]="not done"
|
106 |
|
107 |
if st.session_state["photo"]=="done" or message:
|
108 |
if uploaded_photo:
|
109 |
+
if uploaded_photo.type == "application/pdf":
|
110 |
+
text = read_pdf(docx_file)
|
111 |
+
text = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
112 |
+
else:
|
113 |
+
img = Image.open(uploaded_photo)
|
114 |
+
img = img.save("img.png")
|
115 |
+
img = cv2.imread("img.png")
|
116 |
+
text = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
117 |
st.success(text)
|
118 |
elif camera_photo:
|
119 |
img = Image.open(camera_photo)
|