Soumen commited on
Commit
09d4214
1 Parent(s): 8994505

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -6
app.py CHANGED
@@ -28,7 +28,10 @@ import os
28
  import streamlit as st
29
  import torch
30
  from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
31
-
 
 
 
32
 
33
  # NLP Pkgs
34
  from textblob import TextBlob
@@ -40,6 +43,22 @@ import numpy as np
40
  import pytesseract
41
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
42
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Title
44
  if st.button("REFRESH"):
45
  st.experimental_rerun()
@@ -81,16 +100,20 @@ def main():
81
  st.subheader("Please, feed your image/text, features/services will appear automatically!")
82
  message = st.text_input("Type your text here!")
83
  camera_photo = st.camera_input("Take a photo, Containing English or Bangla texts", on_change=change_photo_state)
84
- uploaded_photo = st.file_uploader("Upload Image, Containing English or Bangla texts",type=['jpg','png','jpeg'], on_change=change_photo_state)
85
  if "photo" not in st.session_state:
86
  st.session_state["photo"]="not done"
87
 
88
  if st.session_state["photo"]=="done" or message:
89
  if uploaded_photo:
90
- img = Image.open(uploaded_photo)
91
- img = img.save("img.png")
92
- img = cv2.imread("img.png")
93
- text = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
 
 
 
 
94
  st.success(text)
95
  elif camera_photo:
96
  img = Image.open(camera_photo)
 
28
  import streamlit as st
29
  import torch
30
  from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
31
+ import docx2txt
32
+ from PIL import Image
33
+ from PyPDF2 import PdfFileReader
34
+ import pdfplumber
35
 
36
  # NLP Pkgs
37
  from textblob import TextBlob
 
43
  import pytesseract
44
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
45
  from PIL import Image
46
+ def read_pdf(file):
47
+ pdfReader = PdfFileReader(file)
48
+ count = pdfReader.numPages
49
+ all_page_text = ""
50
+ for i in range(count):
51
+ page = pdfReader.getPage(i)
52
+ all_page_text += page.extractText()
53
+
54
+ return all_page_text
55
+
56
+ #def read_pdf_with_pdfplumber(file):
57
+ # with pdfplumber.open(file) as pdf:
58
+ # page = pdf.pages[0]
59
+ # return page.extract_text()
60
+
61
+
62
  # Title
63
  if st.button("REFRESH"):
64
  st.experimental_rerun()
 
100
  st.subheader("Please, feed your image/text, features/services will appear automatically!")
101
  message = st.text_input("Type your text here!")
102
  camera_photo = st.camera_input("Take a photo, Containing English or Bangla texts", on_change=change_photo_state)
103
+ uploaded_photo = st.file_uploader("Upload Image, Containing English or Bangla texts",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state)
104
  if "photo" not in st.session_state:
105
  st.session_state["photo"]="not done"
106
 
107
  if st.session_state["photo"]=="done" or message:
108
  if uploaded_photo:
109
+ if uploaded_photo.type == "application/pdf":
110
+ text = read_pdf(docx_file)
111
+ text = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
112
+ else:
113
+ img = Image.open(uploaded_photo)
114
+ img = img.save("img.png")
115
+ img = cv2.imread("img.png")
116
+ text = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
117
  st.success(text)
118
  elif camera_photo:
119
  img = Image.open(camera_photo)