pytesseract_and_easyocr

Sleeping

App Files Files Community

May222 commited on Dec 12, 2023

Commit

baf49b9

1 Parent(s): f73d025

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -12

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import gradio as gr
 # from transformers import pipeline
 from PIL import Image
 import pytesseract
 import cv2
 import os
@@ -11,17 +12,89 @@ import os
 #     predictions = pipeline(input_img)
 #     return input_img, {p["label"]: p["score"] for p in predictions}
-def recognize(input_img):
-    text = pytesseract.image_to_string(Image.open("./data/" + filename))
-    return input_img, text
-gradio_app = gr.Interface(
-    recognize,
-    inputs=[gr.Image(label="Upload an Image", type="pil")],
-    outputs=[gr.Textbox(label="Text in the Image")],
-    title="Extrate Text From Image",
-)
-if __name__ == "__main__":
-    gradio_app.launch(server_port=8756)

+# import gradio as gr
 # from transformers import pipeline
 from PIL import Image
 import pytesseract
+import easyocr
 import cv2
 import os
 #     predictions = pipeline(input_img)
 #     return input_img, {p["label"]: p["score"] for p in predictions}
+# def recognize(input_img):
+#     text = pytesseract.image_to_string(Image.open("./data/" + filename))
+#     return input_img, text
+# gradio_app = gr.Interface(
+#     recognize,
+#     inputs=[gr.Image(label="Upload an Image", type="pil")],
+#     outputs=[gr.Textbox(label="Text in the Image")],
+#     title="Extrate Text From Image",
+# )
+# if __name__ == "__main__":
+#     gradio_app.launch(server_port=8756)
+import os
+import matplotlib.pyplot as plt
+import streamlit as st
+import cv2
+import tensorflow as tf
+from PIL import Image
+import pytesseract
+DET_ARCHS = ["pytesseract", "easyocr"]
+def main():
+    # Wide mode
+    st.set_page_config(layout="wide")
+    # Designing the interface
+    st.title("Image Text Recognition")
+    # For newline
+    st.write('\n')
+    # Instructions
+    st.markdown("*Hint: click on the top-right corner of an image to enlarge it!*")
+    # Set the columns
+    cols = st.columns((1, 1, 1, 1))
+    cols[0].subheader("Input image")
+    cols[1].subheader("OCR output")
+    # Sidebar
+    # File selection
+    st.sidebar.title("Document selection")
+    # Disabling warning
+    # st.set_option('deprecation.showfileUploaderEncoding', False)
+    # Choose your own image
+    uploaded_file = st.sidebar.file_uploader("Upload files", type=['png', 'jpeg', 'jpg'])
+    if uploaded_file is not None:
+        doc = uploaded_file.read()
+        cols[0].image(doc)
+    # Model selection
+    st.sidebar.title("Model selection")
+    det_arch = st.sidebar.selectbox("OCR model", DET_ARCHS)
+    # For newline
+    st.sidebar.write('\n')
+    if st.sidebar.button("Analyze image"):
+        if uploaded_file is None:
+            st.sidebar.write("Please upload a document")
+        else:
+            with st.spinner('Loading model...'):
+                if det_arch == 'pytesseract':
+                    predictor = pytesseract.image_to_string(Image.open(doc))
+                else:
+                    reader = easyocr.Reader(['en'])
+                    predictor = reader.readtext("./data/" + filename, detail = 0)
+            with st.spinner('Analyzing...'):
+                # Plot OCR output
+                if det_arch == 'pytesseract':
+                    cols[1].text(predictor)
+                else:
+                    cols[1].text(''.join(text))
+if __name__ == '__main__':
+    main()