pytesseract_and_easyocr

Sleeping

App Files Files Community

May222 commited on Dec 12, 2023

Commit

74f998a

1 Parent(s): 3dcbe7b

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -101

app.py CHANGED Viewed

@@ -1,108 +1,31 @@
-import os
-import matplotlib.pyplot as plt
-import streamlit as st
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 import cv2
-import tensorflow as tf
-gpu_devices = tf.config.experimental.list_physical_devices('GPU')
-if any(gpu_devices):
-    tf.config.experimental.set_memory_growth(gpu_devices[0], True)
-from doctr.io import DocumentFile
-from doctr.models import ocr_predictor
-from doctr.utils.visualization import visualize_page
-DET_ARCHS = ["db_resnet50", "db_mobilenet_v3_large"]
-RECO_ARCHS = ["crnn_vgg16_bn", "crnn_mobilenet_v3_small", "master", "sar_resnet31"]
-def main():
-    # Wide mode
-    st.set_page_config(layout="wide")
-    # Designing the interface
-    st.title("docTR: Document Text Recognition")
-    # For newline
-    st.write('\n')
-    #
-    st.write('Find more info at: https://github.com/mindee/doctr')
-    # For newline
-    st.write('\n')
-    # Instructions
-    st.markdown("*Hint: click on the top-right corner of an image to enlarge it!*")
-    # Set the columns
-    cols = st.columns((1, 1, 1, 1))
-    cols[0].subheader("Input page")
-    cols[1].subheader("Segmentation heatmap")
-    cols[2].subheader("OCR output")
-    cols[3].subheader("Page reconstitution")
-    # Sidebar
-    # File selection
-    st.sidebar.title("Document selection")
-    # Disabling warning
-    # st.set_option('deprecation.showfileUploaderEncoding', False)
-    # Choose your own image
-    uploaded_file = st.sidebar.file_uploader("Upload files", type=['pdf', 'png', 'jpeg', 'jpg'])
-    if uploaded_file is not None:
-        if uploaded_file.name.endswith('.pdf'):
-            doc = DocumentFile.from_pdf(uploaded_file.read())
-        else:
-            doc = DocumentFile.from_images(uploaded_file.read())
-        page_idx = st.sidebar.selectbox("Page selection", [idx + 1 for idx in range(len(doc))]) - 1
-        cols[0].image(doc[page_idx])
-    # Model selection
-    st.sidebar.title("Model selection")
-    det_arch = st.sidebar.selectbox("Text detection model", DET_ARCHS)
-    reco_arch = st.sidebar.selectbox("Text recognition model", RECO_ARCHS)
-    # For newline
-    st.sidebar.write('\n')
-    if st.sidebar.button("Analyze page"):
-        if uploaded_file is None:
-            st.sidebar.write("Please upload a document")
-        else:
-            with st.spinner('Loading model...'):
-                predictor = ocr_predictor(det_arch, reco_arch, pretrained=True)
-            with st.spinner('Analyzing...'):
-                # Forward the image to the model
-                processed_batches = predictor.det_predictor.pre_processor([doc[page_idx]])
-                out = predictor.det_predictor.model(processed_batches[0], return_model_output=True)
-                seg_map = out["out_map"]
-                seg_map = tf.squeeze(seg_map[0, ...], axis=[2])
-                seg_map = cv2.resize(seg_map.numpy(), (doc[page_idx].shape[1], doc[page_idx].shape[0]),
-                                     interpolation=cv2.INTER_LINEAR)
-                # Plot the raw heatmap
-                fig, ax = plt.subplots()
-                ax.imshow(seg_map)
-                ax.axis('off')
-                cols[1].pyplot(fig)
-                # Plot OCR output
-                out = predictor([doc[page_idx]])
-                fig = visualize_page(out.pages[0].export(), doc[page_idx], interactive=False)
-                cols[2].pyplot(fig)
-                # Page reconsitution under input page
-                page_export = out.pages[0].export()
-                img = out.pages[0].synthesize()
-                cols[3].image(img, clamp=True)
-                # Display JSON
-                st.markdown("\nHere are your analysis results in JSON format:")
-                st.json(page_export)
-if __name__ == '__main__':
-    main()

+import gradio as gr
+# from transformers import pipeline
+from PIL import Image
+import pytesseract
 import cv2
+import os
+# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+# def predict(input_img):
+#     predictions = pipeline(input_img)
+#     return input_img, {p["label"]: p["score"] for p in predictions}
+def recognize(input_img):
+    text = pytesseract.image_to_string(Image.open("./data/" + filename))
+    return input_img, text
+gradio_app = gr.Interface(
+    recognize,
+    inputs=gr.inputs.Textbox(lines=2, placeholder="Name Here..."),
+    inputs=gr.Image(label="Select hot dog candidate", sources=['upload', 'webcam'], type="pil"),
+    outputs=[gr.Image(label="Processed Image"), gr.Label(label="Result", num_top_classes=2)],
+    inputs=[gr.Image(label="Upload an Image", type="pil")],
+    outputs=[gr.Textbox(label="Text in the Image")],
+    title="Extrate Text From Image",
+)
+if __name__ == "__main__":
+    gradio_app.launch()