02alexander
commited on
Commit
•
ca9b39d
1
Parent(s):
dd4f81f
progress thingy
Browse files- app.py +75 -22
- examples/RP2040.png +0 -0
- examples/lorem-ipsum.png +0 -0
- examples/paper_page.png +0 -0
- image_examples/RP2040.png +3 -0
- image_examples/lorem-ipsum.png +3 -0
- image_examples/paper_page.png +3 -0
- ocr.py +21 -4
- {examples → pdf_examples}/midsommar_exercises.pdf +0 -0
- {examples → pdf_examples}/paper.pdf +0 -0
app.py
CHANGED
@@ -4,15 +4,17 @@ import os
|
|
4 |
from pathlib import Path
|
5 |
from queue import SimpleQueue
|
6 |
from threading import Thread
|
|
|
7 |
from typing import Any
|
8 |
|
9 |
import gradio as gr # type: ignore
|
10 |
import rerun as rr
|
|
|
11 |
from fastapi import FastAPI
|
12 |
from fastapi.middleware.cors import CORSMiddleware
|
13 |
from gradio_rerun import Rerun # type: ignore
|
14 |
|
15 |
-
from ocr import detect_and_log_layouts
|
16 |
|
17 |
CUSTOM_PATH = "/"
|
18 |
|
@@ -27,18 +29,42 @@ app.add_middleware(
|
|
27 |
allow_origins=origins,
|
28 |
)
|
29 |
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
log_queue.put("done")
|
33 |
|
|
|
34 |
@rr.thread_local_stream("PaddleOCR")
|
35 |
-
def log_to_rr(file_path: Path):
|
36 |
stream = rr.binary_stream()
|
37 |
|
38 |
log_queue: SimpleQueue[Any] = SimpleQueue()
|
39 |
-
|
|
|
|
|
40 |
handle.start()
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
while True:
|
43 |
msg = log_queue.get()
|
44 |
if msg == "done":
|
@@ -53,13 +79,13 @@ def log_to_rr(file_path: Path):
|
|
53 |
entity_path = msg[1]
|
54 |
args = msg[2]
|
55 |
kwargs = msg[3] if len(msg) >= 4 else {}
|
56 |
-
# print(entity_path)
|
57 |
-
# print(args)
|
58 |
-
# print(kwargs)
|
59 |
rr.log(entity_path, *args, **kwargs)
|
60 |
|
61 |
yield stream.read()
|
62 |
|
|
|
|
|
|
|
63 |
handle.join()
|
64 |
print("done")
|
65 |
|
@@ -73,21 +99,48 @@ with gr.Blocks() as demo:
|
|
73 |
gr.Markdown(DESCRIPTION)
|
74 |
with gr.Row():
|
75 |
with gr.Column(scale=1):
|
76 |
-
with gr.
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
gr.
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
with gr.Column(scale=4):
|
90 |
viewer = Rerun(streaming=True, height=900)
|
91 |
-
|
|
|
|
|
|
|
92 |
|
93 |
app = gr.mount_gradio_app(app, demo, path=CUSTOM_PATH)
|
|
|
4 |
from pathlib import Path
|
5 |
from queue import SimpleQueue
|
6 |
from threading import Thread
|
7 |
+
from time import sleep
|
8 |
from typing import Any
|
9 |
|
10 |
import gradio as gr # type: ignore
|
11 |
import rerun as rr
|
12 |
+
import rerun.blueprint as rrb
|
13 |
from fastapi import FastAPI
|
14 |
from fastapi.middleware.cors import CORSMiddleware
|
15 |
from gradio_rerun import Rerun # type: ignore
|
16 |
|
17 |
+
from ocr import detect_and_log_layouts, PAGE_LIMIT
|
18 |
|
19 |
CUSTOM_PATH = "/"
|
20 |
|
|
|
29 |
allow_origins=origins,
|
30 |
)
|
31 |
|
32 |
+
|
33 |
+
def progress_log(log_queue: SimpleQueue[Any], done: SimpleQueue[Any]):
|
34 |
+
dots = 0
|
35 |
+
while True:
|
36 |
+
if not done.empty():
|
37 |
+
break
|
38 |
+
sleep(0.7)
|
39 |
+
log_queue.put([
|
40 |
+
"log",
|
41 |
+
"progress",
|
42 |
+
[rr.TextDocument(f"working{'.'*(dots+1)}")]
|
43 |
+
])
|
44 |
+
dots = (dots + 1) % 5
|
45 |
+
|
46 |
+
|
47 |
+
def file_ocr(log_queue: SimpleQueue[Any], file_path: str, start_page: int, end_page: int):
|
48 |
+
detect_and_log_layouts(log_queue, file_path, start_page, end_page)
|
49 |
log_queue.put("done")
|
50 |
|
51 |
+
|
52 |
@rr.thread_local_stream("PaddleOCR")
|
53 |
+
def log_to_rr(file_path: Path, start_page: int = 1, end_page: int = -1):
|
54 |
stream = rr.binary_stream()
|
55 |
|
56 |
log_queue: SimpleQueue[Any] = SimpleQueue()
|
57 |
+
done: SimpleQueue[Any] = SimpleQueue()
|
58 |
+
Thread(target=progress_log, args=[log_queue, done]).start()
|
59 |
+
handle = Thread(target=file_ocr, args=[log_queue, str(file_path), start_page, end_page])
|
60 |
handle.start()
|
61 |
|
62 |
+
rr.send_blueprint(rrb.Blueprint(
|
63 |
+
rrb.TextDocumentView(contents=["progress/**"]),
|
64 |
+
collapse_panels=True,
|
65 |
+
))
|
66 |
+
yield stream.read()
|
67 |
+
|
68 |
while True:
|
69 |
msg = log_queue.get()
|
70 |
if msg == "done":
|
|
|
79 |
entity_path = msg[1]
|
80 |
args = msg[2]
|
81 |
kwargs = msg[3] if len(msg) >= 4 else {}
|
|
|
|
|
|
|
82 |
rr.log(entity_path, *args, **kwargs)
|
83 |
|
84 |
yield stream.read()
|
85 |
|
86 |
+
rr.log("progress",rr.TextDocument("Done!"))
|
87 |
+
yield stream.read()
|
88 |
+
done.put(())
|
89 |
handle.join()
|
90 |
print("done")
|
91 |
|
|
|
99 |
gr.Markdown(DESCRIPTION)
|
100 |
with gr.Row():
|
101 |
with gr.Column(scale=1):
|
102 |
+
with gr.Tab(label="Upload Image"):
|
103 |
+
with gr.Row():
|
104 |
+
input_image_file = gr.Image(label="Input Image", image_mode="RGBA", sources="upload", type="filepath")
|
105 |
+
# input_image_file = gr.Image(label="Input image")
|
106 |
+
with gr.Row():
|
107 |
+
image_button = gr.Button()
|
108 |
+
with gr.Row():
|
109 |
+
gr.Examples(
|
110 |
+
examples=[
|
111 |
+
os.path.join("image_examples", img_name)
|
112 |
+
for img_name in sorted(os.listdir("image_examples"))
|
113 |
+
],
|
114 |
+
inputs=[input_image_file],
|
115 |
+
label="Examples",
|
116 |
+
cache_examples=False,
|
117 |
+
examples_per_page=12,
|
118 |
+
)
|
119 |
+
with gr.Tab(label="Upload pdf"):
|
120 |
+
with gr.Row():
|
121 |
+
input_pdf_file = gr.File(label="Input pdf")
|
122 |
+
gr.Markdown(f"Max {PAGE_LIMIT} pages, -1 on end page means max number of pages")
|
123 |
+
with gr.Row():
|
124 |
+
start_page_number = gr.Number(1, label="Start page", minimum=1)
|
125 |
+
with gr.Row():
|
126 |
+
end_page_number = gr.Number(-1, label="End page")
|
127 |
+
with gr.Row():
|
128 |
+
pdf_button = gr.Button()
|
129 |
+
with gr.Row():
|
130 |
+
gr.Examples(
|
131 |
+
examples=[
|
132 |
+
os.path.join("pdf_examples", img_name) for img_name in sorted(os.listdir("pdf_examples"))
|
133 |
+
],
|
134 |
+
inputs=[input_pdf_file],
|
135 |
+
label="Examples",
|
136 |
+
cache_examples=False,
|
137 |
+
examples_per_page=12,
|
138 |
+
)
|
139 |
with gr.Column(scale=4):
|
140 |
viewer = Rerun(streaming=True, height=900)
|
141 |
+
|
142 |
+
image_button.click(log_to_rr, inputs=[input_image_file], outputs=[viewer])
|
143 |
+
pdf_button.click(log_to_rr, inputs=[input_pdf_file, start_page_number, end_page_number], outputs=[viewer])
|
144 |
+
|
145 |
|
146 |
app = gr.mount_gradio_app(app, demo, path=CUSTOM_PATH)
|
examples/RP2040.png
DELETED
Binary file (176 kB)
|
|
examples/lorem-ipsum.png
DELETED
Binary file (281 kB)
|
|
examples/paper_page.png
DELETED
Binary file (484 kB)
|
|
image_examples/RP2040.png
ADDED
Git LFS Details
|
image_examples/lorem-ipsum.png
ADDED
Git LFS Details
|
image_examples/paper_page.png
ADDED
Git LFS Details
|
ocr.py
CHANGED
@@ -25,6 +25,8 @@ DATASET_DIR: Final = EXAMPLE_DIR / "dataset"
|
|
25 |
|
26 |
SAMPLE_IMAGE_URLs = ["https://storage.googleapis.com/rerun-example-datasets/ocr/paper.png"]
|
27 |
|
|
|
|
|
28 |
LayoutStructure: TypeAlias = tuple[
|
29 |
list[str], list[str], list[rrb.Spatial2DView], list[rrb.Spatial2DView], list[rrb.Spatial2DView]
|
30 |
]
|
@@ -352,7 +354,11 @@ def generate_blueprint(
|
|
352 |
contents=[f"{page_path}/Image/**"] + detections_paths,
|
353 |
),
|
354 |
rrb.Spatial2DView(name="Detections", contents=[f"{page_path}/Image/**"]),
|
355 |
-
rrb.
|
|
|
|
|
|
|
|
|
356 |
),
|
357 |
rrb.Horizontal(*section_tabs),
|
358 |
name=page_path,
|
@@ -366,11 +372,22 @@ def generate_blueprint(
|
|
366 |
)
|
367 |
|
368 |
|
369 |
-
def detect_and_log_layouts(log_queue: SimpleQueue[Any], file_path: str) -> None:
|
|
|
|
|
|
|
|
|
|
|
370 |
images: list[npt.NDArray[np.uint8]] = []
|
371 |
if file_path.endswith(".pdf"):
|
372 |
# convert pdf to images
|
373 |
-
images.extend(np.array(img, dtype=np.uint8) for img in pdf2image.convert_from_path(file_path))
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
else:
|
375 |
# read image
|
376 |
img = cv2.imread(file_path)
|
@@ -379,7 +396,7 @@ def detect_and_log_layouts(log_queue: SimpleQueue[Any], file_path: str) -> None:
|
|
379 |
|
380 |
# Extracte the layout from each image
|
381 |
layouts: list[Layout] = []
|
382 |
-
page_paths = [f"page_{i +
|
383 |
processed_layouts: list[LayoutStructure] = []
|
384 |
for i, (image, page_path) in enumerate(zip(images, page_paths)):
|
385 |
layouts.append(detect_and_log_layout(log_queue, image, page_path))
|
|
|
25 |
|
26 |
SAMPLE_IMAGE_URLs = ["https://storage.googleapis.com/rerun-example-datasets/ocr/paper.png"]
|
27 |
|
28 |
+
PAGE_LIMIT = 10
|
29 |
+
|
30 |
LayoutStructure: TypeAlias = tuple[
|
31 |
list[str], list[str], list[rrb.Spatial2DView], list[rrb.Spatial2DView], list[rrb.Spatial2DView]
|
32 |
]
|
|
|
354 |
contents=[f"{page_path}/Image/**"] + detections_paths,
|
355 |
),
|
356 |
rrb.Spatial2DView(name="Detections", contents=[f"{page_path}/Image/**"]),
|
357 |
+
rrb.Vertical(
|
358 |
+
rrb.TextDocumentView(name="Progress", contents=["progress/**"]),
|
359 |
+
rrb.TextDocumentView(name="Recovery", contents=f"{page_path}/Recovery"),
|
360 |
+
row_shares=[1, 4]
|
361 |
+
)
|
362 |
),
|
363 |
rrb.Horizontal(*section_tabs),
|
364 |
name=page_path,
|
|
|
372 |
)
|
373 |
|
374 |
|
375 |
+
def detect_and_log_layouts(log_queue: SimpleQueue[Any], file_path: str, start_page: int = 1, end_page: int | None = -1) -> None:
|
376 |
+
if end_page == -1:
|
377 |
+
end_page = start_page + PAGE_LIMIT
|
378 |
+
if end_page < start_page:
|
379 |
+
end_page = start_page
|
380 |
+
|
381 |
images: list[npt.NDArray[np.uint8]] = []
|
382 |
if file_path.endswith(".pdf"):
|
383 |
# convert pdf to images
|
384 |
+
images.extend(np.array(img, dtype=np.uint8) for img in pdf2image.convert_from_path(file_path, first_page=start_page, last_page=end_page))
|
385 |
+
if len(images) > PAGE_LIMIT:
|
386 |
+
log_queue.put([
|
387 |
+
"log",
|
388 |
+
"error",
|
389 |
+
[rr.TextLog(f"Too many pages requsted: {len(images)} requested but the limit is {PAGE_LIMIT}")],
|
390 |
+
])
|
391 |
else:
|
392 |
# read image
|
393 |
img = cv2.imread(file_path)
|
|
|
396 |
|
397 |
# Extracte the layout from each image
|
398 |
layouts: list[Layout] = []
|
399 |
+
page_paths = [f"page_{i + start_page}" for i in range(len(images))]
|
400 |
processed_layouts: list[LayoutStructure] = []
|
401 |
for i, (image, page_path) in enumerate(zip(images, page_paths)):
|
402 |
layouts.append(detect_and_log_layout(log_queue, image, page_path))
|
{examples → pdf_examples}/midsommar_exercises.pdf
RENAMED
File without changes
|
{examples → pdf_examples}/paper.pdf
RENAMED
File without changes
|