May222 commited on
Commit
74f998a
1 Parent(s): 3dcbe7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -101
app.py CHANGED
@@ -1,108 +1,31 @@
1
- import os
2
-
3
- import matplotlib.pyplot as plt
4
- import streamlit as st
5
-
6
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
7
-
8
  import cv2
9
- import tensorflow as tf
10
-
11
- gpu_devices = tf.config.experimental.list_physical_devices('GPU')
12
- if any(gpu_devices):
13
- tf.config.experimental.set_memory_growth(gpu_devices[0], True)
14
-
15
- from doctr.io import DocumentFile
16
- from doctr.models import ocr_predictor
17
- from doctr.utils.visualization import visualize_page
18
-
19
- DET_ARCHS = ["db_resnet50", "db_mobilenet_v3_large"]
20
- RECO_ARCHS = ["crnn_vgg16_bn", "crnn_mobilenet_v3_small", "master", "sar_resnet31"]
21
-
22
-
23
- def main():
24
-
25
- # Wide mode
26
- st.set_page_config(layout="wide")
27
-
28
- # Designing the interface
29
- st.title("docTR: Document Text Recognition")
30
- # For newline
31
- st.write('\n')
32
- #
33
- st.write('Find more info at: https://github.com/mindee/doctr')
34
- # For newline
35
- st.write('\n')
36
- # Instructions
37
- st.markdown("*Hint: click on the top-right corner of an image to enlarge it!*")
38
- # Set the columns
39
- cols = st.columns((1, 1, 1, 1))
40
- cols[0].subheader("Input page")
41
- cols[1].subheader("Segmentation heatmap")
42
- cols[2].subheader("OCR output")
43
- cols[3].subheader("Page reconstitution")
44
-
45
- # Sidebar
46
- # File selection
47
- st.sidebar.title("Document selection")
48
- # Disabling warning
49
- # st.set_option('deprecation.showfileUploaderEncoding', False)
50
- # Choose your own image
51
- uploaded_file = st.sidebar.file_uploader("Upload files", type=['pdf', 'png', 'jpeg', 'jpg'])
52
- if uploaded_file is not None:
53
- if uploaded_file.name.endswith('.pdf'):
54
- doc = DocumentFile.from_pdf(uploaded_file.read())
55
- else:
56
- doc = DocumentFile.from_images(uploaded_file.read())
57
- page_idx = st.sidebar.selectbox("Page selection", [idx + 1 for idx in range(len(doc))]) - 1
58
- cols[0].image(doc[page_idx])
59
-
60
- # Model selection
61
- st.sidebar.title("Model selection")
62
- det_arch = st.sidebar.selectbox("Text detection model", DET_ARCHS)
63
- reco_arch = st.sidebar.selectbox("Text recognition model", RECO_ARCHS)
64
-
65
- # For newline
66
- st.sidebar.write('\n')
67
-
68
- if st.sidebar.button("Analyze page"):
69
-
70
- if uploaded_file is None:
71
- st.sidebar.write("Please upload a document")
72
-
73
- else:
74
- with st.spinner('Loading model...'):
75
- predictor = ocr_predictor(det_arch, reco_arch, pretrained=True)
76
-
77
- with st.spinner('Analyzing...'):
78
 
79
- # Forward the image to the model
80
- processed_batches = predictor.det_predictor.pre_processor([doc[page_idx]])
81
- out = predictor.det_predictor.model(processed_batches[0], return_model_output=True)
82
- seg_map = out["out_map"]
83
- seg_map = tf.squeeze(seg_map[0, ...], axis=[2])
84
- seg_map = cv2.resize(seg_map.numpy(), (doc[page_idx].shape[1], doc[page_idx].shape[0]),
85
- interpolation=cv2.INTER_LINEAR)
86
- # Plot the raw heatmap
87
- fig, ax = plt.subplots()
88
- ax.imshow(seg_map)
89
- ax.axis('off')
90
- cols[1].pyplot(fig)
91
 
92
- # Plot OCR output
93
- out = predictor([doc[page_idx]])
94
- fig = visualize_page(out.pages[0].export(), doc[page_idx], interactive=False)
95
- cols[2].pyplot(fig)
96
 
97
- # Page reconsitution under input page
98
- page_export = out.pages[0].export()
99
- img = out.pages[0].synthesize()
100
- cols[3].image(img, clamp=True)
101
 
102
- # Display JSON
103
- st.markdown("\nHere are your analysis results in JSON format:")
104
- st.json(page_export)
 
 
105
 
 
 
 
 
 
106
 
107
- if __name__ == '__main__':
108
- main()
 
1
+ import gradio as gr
2
+ # from transformers import pipeline
3
+ from PIL import Image
4
+ import pytesseract
 
 
 
5
  import cv2
6
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ # def predict(input_img):
11
+ # predictions = pipeline(input_img)
12
+ # return input_img, {p["label"]: p["score"] for p in predictions}
 
13
 
14
+ def recognize(input_img):
15
+ text = pytesseract.image_to_string(Image.open("./data/" + filename))
16
+ return input_img, text
 
17
 
18
+ gradio_app = gr.Interface(
19
+ recognize,
20
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Name Here..."),
21
+ inputs=gr.Image(label="Select hot dog candidate", sources=['upload', 'webcam'], type="pil"),
22
+ outputs=[gr.Image(label="Processed Image"), gr.Label(label="Result", num_top_classes=2)],
23
 
24
+ inputs=[gr.Image(label="Upload an Image", type="pil")],
25
+ outputs=[gr.Textbox(label="Text in the Image")],
26
+
27
+ title="Extrate Text From Image",
28
+ )
29
 
30
+ if __name__ == "__main__":
31
+ gradio_app.launch()