Raphaël Bournhonesque
commited on
Commit
•
4364eec
1
Parent(s):
ae6ec77
improve demo
Browse files- app.py +18 -58
- requirements.txt +3 -2
app.py
CHANGED
@@ -1,56 +1,7 @@
|
|
1 |
-
import re
|
2 |
-
|
3 |
-
from annotated_text import annotated_text
|
4 |
import requests
|
5 |
import streamlit as st
|
6 |
-
|
7 |
-
|
8 |
-
BARCODE_PATH_REGEX = re.compile(r"^(...)(...)(...)(.*)$")
|
9 |
-
|
10 |
-
|
11 |
-
def split_barcode(barcode: str) -> list[str]:
|
12 |
-
"""Split barcode in the same way as done by Product Opener to generate a
|
13 |
-
product image folder.
|
14 |
-
|
15 |
-
:param barcode: The barcode of the product. For the pro platform only,
|
16 |
-
it must be prefixed with the org ID using the format
|
17 |
-
`{ORG_ID}/{BARCODE}`
|
18 |
-
:raises ValueError: raise a ValueError if `barcode` is invalid
|
19 |
-
:return: a list containing the splitted barcode
|
20 |
-
"""
|
21 |
-
org_id = None
|
22 |
-
if "/" in barcode:
|
23 |
-
# For the pro platform, `barcode` is expected to be in the format
|
24 |
-
# `{ORG_ID}/{BARCODE}` (ex: `org-lea-nature/3307130803004`)
|
25 |
-
org_id, barcode = barcode.split("/", maxsplit=1)
|
26 |
-
|
27 |
-
if not barcode.isdigit():
|
28 |
-
raise ValueError(f"unknown barcode format: {barcode}")
|
29 |
-
|
30 |
-
match = BARCODE_PATH_REGEX.fullmatch(barcode)
|
31 |
-
|
32 |
-
splits = [x for x in match.groups() if x] if match else [barcode]
|
33 |
-
|
34 |
-
if org_id is not None:
|
35 |
-
# For the pro platform only, images and OCRs belonging to an org
|
36 |
-
# are stored in a folder named after the org for all its products, ex:
|
37 |
-
# https://images.pro.openfoodfacts.org/images/products/org-lea-nature/330/713/080/3004/1.jpg
|
38 |
-
splits.append(org_id)
|
39 |
-
|
40 |
-
return splits
|
41 |
-
|
42 |
-
|
43 |
-
def _generate_file_path(barcode: str, image_id: str, suffix: str):
|
44 |
-
splitted_barcode = split_barcode(barcode)
|
45 |
-
return f"/{'/'.join(splitted_barcode)}/{image_id}{suffix}"
|
46 |
-
|
47 |
-
|
48 |
-
def generate_ocr_path(barcode: str, image_id: str) -> str:
|
49 |
-
return _generate_file_path(barcode, image_id, ".json")
|
50 |
-
|
51 |
-
|
52 |
-
def generate_image_path(barcode: str, image_id: str) -> str:
|
53 |
-
return _generate_file_path(barcode, image_id, ".400.jpg")
|
54 |
|
55 |
|
56 |
@st.cache_data
|
@@ -85,7 +36,11 @@ def display_ner_tags(text: str, entities: list[dict]):
|
|
85 |
annotated_text(spans)
|
86 |
|
87 |
|
88 |
-
def run(
|
|
|
|
|
|
|
|
|
89 |
product = get_product(barcode)
|
90 |
st.markdown(f"[Product page](https://world.openfoodfacts.org/product/{barcode})")
|
91 |
|
@@ -93,18 +48,21 @@ def run(barcode: str, model_version: str, min_threshold: float = 0.5,):
|
|
93 |
st.error(f"Product {barcode} not found")
|
94 |
return
|
95 |
|
96 |
-
images = product
|
|
|
|
|
|
|
|
|
|
|
97 |
for image_id, _ in images.items():
|
98 |
if not image_id.isdigit():
|
99 |
continue
|
100 |
|
101 |
-
|
102 |
-
ocr_url = f"https://static.openfoodfacts.org/images/products{ocr_path}"
|
103 |
prediction = send_prediction_request(ocr_url, model_version)
|
104 |
|
105 |
st.divider()
|
106 |
-
|
107 |
-
image_url = f"https://static.openfoodfacts.org/images/products{image_path}"
|
108 |
st.markdown(f"[Image {image_id}]({image_url}), [OCR]({ocr_url})")
|
109 |
st.image(image_url)
|
110 |
|
@@ -126,7 +84,9 @@ st.markdown(
|
|
126 |
"This demo leverages the ingredient entity detection model, "
|
127 |
"that takes the OCR text as input and predict ingredient lists."
|
128 |
)
|
129 |
-
barcode = st.text_input(
|
|
|
|
|
130 |
model_version = "1"
|
131 |
st.experimental_set_query_params(barcode=barcode)
|
132 |
|
|
|
|
|
|
|
|
|
1 |
import requests
|
2 |
import streamlit as st
|
3 |
+
from annotated_text import annotated_text
|
4 |
+
from openfoodfacts.images import generate_image_url, generate_json_ocr_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
@st.cache_data
|
|
|
36 |
annotated_text(spans)
|
37 |
|
38 |
|
39 |
+
def run(
|
40 |
+
barcode: str,
|
41 |
+
model_version: str,
|
42 |
+
min_threshold: float = 0.5,
|
43 |
+
):
|
44 |
product = get_product(barcode)
|
45 |
st.markdown(f"[Product page](https://world.openfoodfacts.org/product/{barcode})")
|
46 |
|
|
|
48 |
st.error(f"Product {barcode} not found")
|
49 |
return
|
50 |
|
51 |
+
images = product.get("images", [])
|
52 |
+
|
53 |
+
if not images:
|
54 |
+
st.error(f"No images found for product {barcode}")
|
55 |
+
return
|
56 |
+
|
57 |
for image_id, _ in images.items():
|
58 |
if not image_id.isdigit():
|
59 |
continue
|
60 |
|
61 |
+
ocr_url = generate_json_ocr_url(barcode, image_id)
|
|
|
62 |
prediction = send_prediction_request(ocr_url, model_version)
|
63 |
|
64 |
st.divider()
|
65 |
+
image_url = generate_image_url(barcode, image_id)
|
|
|
66 |
st.markdown(f"[Image {image_id}]({image_url}), [OCR]({ocr_url})")
|
67 |
st.image(image_url)
|
68 |
|
|
|
84 |
"This demo leverages the ingredient entity detection model, "
|
85 |
"that takes the OCR text as input and predict ingredient lists."
|
86 |
)
|
87 |
+
barcode = st.text_input(
|
88 |
+
"barcode", help="Barcode of the product", value=default_barcode
|
89 |
+
).strip()
|
90 |
model_version = "1"
|
91 |
st.experimental_set_query_params(barcode=barcode)
|
92 |
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
requests==2.28.1
|
2 |
-
streamlit==1.
|
3 |
-
st-annotated-text==4.0.0
|
|
|
|
1 |
requests==2.28.1
|
2 |
+
streamlit==1.28.1
|
3 |
+
st-annotated-text==4.0.0
|
4 |
+
openfoodfacts==0.1.11
|