app updated
Browse files- app.py +4 -0
- utils/cnn_transformer.py +5 -6
- utils/helpers.py +32 -29
app.py
CHANGED
@@ -156,6 +156,10 @@ def main():
|
|
156 |
{"Key": keys, "Values": values}
|
157 |
)
|
158 |
|
|
|
|
|
|
|
|
|
159 |
# Convert DataFrame to CSV
|
160 |
csv = sample_payload_output.to_csv(index=False)
|
161 |
|
|
|
156 |
{"Key": keys, "Values": values}
|
157 |
)
|
158 |
|
159 |
+
# Display table
|
160 |
+
with st.expander("Inspect table (before download)"):
|
161 |
+
st.table(sample_payload_output)
|
162 |
+
|
163 |
# Convert DataFrame to CSV
|
164 |
csv = sample_payload_output.to_csv(index=False)
|
165 |
|
utils/cnn_transformer.py
CHANGED
@@ -3,11 +3,11 @@ import os
|
|
3 |
os.environ["KERAS_BACKEND"] = "tensorflow"
|
4 |
|
5 |
import re
|
6 |
-
import numpy as np
|
7 |
-
import matplotlib.pyplot as plt
|
8 |
|
9 |
-
import tensorflow as tf
|
10 |
import keras
|
|
|
|
|
|
|
11 |
from keras import layers
|
12 |
from keras.applications import efficientnet
|
13 |
from keras.layers import TextVectorization
|
@@ -319,7 +319,6 @@ class ImageCaptioningModel(keras.Model):
|
|
319 |
return [self.loss_tracker, self.acc_tracker]
|
320 |
|
321 |
|
322 |
-
|
323 |
strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
|
324 |
strip_chars = strip_chars.replace("<", "")
|
325 |
strip_chars = strip_chars.replace(">", "")
|
@@ -350,7 +349,7 @@ def generate_caption(caption_model: None):
|
|
350 |
|
351 |
# Pass the image to the CNN
|
352 |
# img = tf.expand_dims(sample_img, 0)
|
353 |
-
#TOOD
|
354 |
img = None
|
355 |
img = caption_model.cnn_model(img)
|
356 |
|
@@ -376,4 +375,4 @@ def generate_caption(caption_model: None):
|
|
376 |
|
377 |
decoded_caption = decoded_caption.replace("<start> ", "")
|
378 |
decoded_caption = decoded_caption.replace(" <end>", "").strip()
|
379 |
-
print("Predicted Caption: ", decoded_caption)
|
|
|
3 |
os.environ["KERAS_BACKEND"] = "tensorflow"
|
4 |
|
5 |
import re
|
|
|
|
|
6 |
|
|
|
7 |
import keras
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import numpy as np
|
10 |
+
import tensorflow as tf
|
11 |
from keras import layers
|
12 |
from keras.applications import efficientnet
|
13 |
from keras.layers import TextVectorization
|
|
|
319 |
return [self.loss_tracker, self.acc_tracker]
|
320 |
|
321 |
|
|
|
322 |
strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
|
323 |
strip_chars = strip_chars.replace("<", "")
|
324 |
strip_chars = strip_chars.replace(">", "")
|
|
|
349 |
|
350 |
# Pass the image to the CNN
|
351 |
# img = tf.expand_dims(sample_img, 0)
|
352 |
+
# TOOD
|
353 |
img = None
|
354 |
img = caption_model.cnn_model(img)
|
355 |
|
|
|
375 |
|
376 |
decoded_caption = decoded_caption.replace("<start> ", "")
|
377 |
decoded_caption = decoded_caption.replace(" <end>", "").strip()
|
378 |
+
print("Predicted Caption: ", decoded_caption)
|
utils/helpers.py
CHANGED
@@ -4,22 +4,20 @@ import json
|
|
4 |
import os
|
5 |
from typing import Any, Dict, List
|
6 |
|
|
|
|
|
|
|
|
|
7 |
import pandas as pd
|
8 |
import requests
|
9 |
import streamlit as st
|
10 |
-
from
|
11 |
-
import google.generativeai as palm
|
12 |
-
from pypdf import PdfReader
|
13 |
from langchain.text_splitter import (
|
14 |
RecursiveCharacterTextSplitter,
|
15 |
SentenceTransformersTokenTextSplitter,
|
16 |
)
|
17 |
-
import
|
18 |
-
|
19 |
-
import streamlit as st
|
20 |
-
import chromadb
|
21 |
-
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
22 |
-
|
23 |
|
24 |
# API Key (You should set this in your environment variables)
|
25 |
# api_key = st.secrets["PALM_API_KEY"]
|
@@ -187,10 +185,10 @@ def displayPDF(file: str) -> None:
|
|
187 |
# Opening the PDF file in binary read mode
|
188 |
with open(file, "rb") as f:
|
189 |
# Encoding the PDF file content to base64
|
190 |
-
base64_pdf: str = base64.b64encode(f.read()).decode(
|
191 |
|
192 |
# Creating an HTML embed string for displaying the PDF
|
193 |
-
pdf_display: str =
|
194 |
|
195 |
# Using Streamlit to display the HTML embed string as unsafe HTML
|
196 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
@@ -199,16 +197,16 @@ def displayPDF(file: str) -> None:
|
|
199 |
def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
|
200 |
"""
|
201 |
Draws bounding boxes and labels onto an image based on provided predictions.
|
202 |
-
|
203 |
Parameters:
|
204 |
- image (Any): The image to annotate, which should support the PIL drawing interface.
|
205 |
- predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
|
206 |
-
containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
|
207 |
'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.
|
208 |
-
|
209 |
Returns:
|
210 |
- Any: The annotated image with bounding boxes and labels drawn on it.
|
211 |
-
|
212 |
Note:
|
213 |
- This function assumes that the incoming image supports the PIL ImageDraw interface.
|
214 |
- The function directly modifies the input image and returns it.
|
@@ -235,7 +233,9 @@ def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
|
|
235 |
return image
|
236 |
|
237 |
|
238 |
-
def draw_bounding_boxes_for_textract(
|
|
|
|
|
239 |
"""
|
240 |
Draws bounding boxes on an image based on the provided JSON data from Textract.
|
241 |
|
@@ -248,31 +248,34 @@ def draw_bounding_boxes_for_textract(image: Image.Image, json_data: Dict[str, An
|
|
248 |
"""
|
249 |
# Load the image from the provided path
|
250 |
draw = ImageDraw.Draw(image)
|
251 |
-
|
252 |
# Parse the JSON data
|
253 |
try:
|
254 |
data = json_data
|
255 |
-
blocks = json.loads(data[
|
256 |
except json.JSONDecodeError:
|
257 |
-
st.error(
|
258 |
return image
|
259 |
-
|
260 |
if blocks is None:
|
261 |
-
st.error(
|
262 |
return image
|
263 |
-
|
264 |
# Iterate through the elements to find bounding boxes and draw them
|
265 |
for item in blocks:
|
266 |
-
if
|
267 |
-
bbox = item[
|
268 |
# Extract coordinates and dimensions
|
269 |
-
left, top, width, height =
|
|
|
|
|
|
|
|
|
|
|
270 |
# Calculate bounding box coordinates in image space
|
271 |
left_top = (left * image.width, top * image.height)
|
272 |
right_bottom = ((left + width) * image.width, (top + height) * image.height)
|
273 |
# Draw rectangle
|
274 |
-
draw.rectangle([left_top, right_bottom], outline=
|
275 |
-
|
276 |
-
return image
|
277 |
-
|
278 |
|
|
|
|
4 |
import os
|
5 |
from typing import Any, Dict, List
|
6 |
|
7 |
+
import chromadb
|
8 |
+
import google.generativeai as palm
|
9 |
+
import matplotlib.patches as patches
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
import pandas as pd
|
12 |
import requests
|
13 |
import streamlit as st
|
14 |
+
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
|
|
|
|
15 |
from langchain.text_splitter import (
|
16 |
RecursiveCharacterTextSplitter,
|
17 |
SentenceTransformersTokenTextSplitter,
|
18 |
)
|
19 |
+
from PIL import Image, ImageDraw, ImageFont
|
20 |
+
from pypdf import PdfReader
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# API Key (You should set this in your environment variables)
|
23 |
# api_key = st.secrets["PALM_API_KEY"]
|
|
|
185 |
# Opening the PDF file in binary read mode
|
186 |
with open(file, "rb") as f:
|
187 |
# Encoding the PDF file content to base64
|
188 |
+
base64_pdf: str = base64.b64encode(f.read()).decode("utf-8")
|
189 |
|
190 |
# Creating an HTML embed string for displaying the PDF
|
191 |
+
pdf_display: str = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf">'
|
192 |
|
193 |
# Using Streamlit to display the HTML embed string as unsafe HTML
|
194 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
|
|
197 |
def draw_boxes(image: Any, predictions: List[Dict[str, Any]]) -> Any:
|
198 |
"""
|
199 |
Draws bounding boxes and labels onto an image based on provided predictions.
|
200 |
+
|
201 |
Parameters:
|
202 |
- image (Any): The image to annotate, which should support the PIL drawing interface.
|
203 |
- predictions (List[Dict[str, Any]]): A list of predictions where each prediction is a dictionary
|
204 |
+
containing 'label', 'score', and 'box' keys. The 'box' is another dictionary with 'xmin',
|
205 |
'ymin', 'xmax', and 'ymax' as keys representing coordinates for the bounding box.
|
206 |
+
|
207 |
Returns:
|
208 |
- Any: The annotated image with bounding boxes and labels drawn on it.
|
209 |
+
|
210 |
Note:
|
211 |
- This function assumes that the incoming image supports the PIL ImageDraw interface.
|
212 |
- The function directly modifies the input image and returns it.
|
|
|
233 |
return image
|
234 |
|
235 |
|
236 |
+
def draw_bounding_boxes_for_textract(
|
237 |
+
image: Image.Image, json_data: Dict[str, Any]
|
238 |
+
) -> Image.Image:
|
239 |
"""
|
240 |
Draws bounding boxes on an image based on the provided JSON data from Textract.
|
241 |
|
|
|
248 |
"""
|
249 |
# Load the image from the provided path
|
250 |
draw = ImageDraw.Draw(image)
|
251 |
+
|
252 |
# Parse the JSON data
|
253 |
try:
|
254 |
data = json_data
|
255 |
+
blocks = json.loads(data["body"]) if "body" in data else None
|
256 |
except json.JSONDecodeError:
|
257 |
+
st.error("Invalid JSON data.")
|
258 |
return image
|
259 |
+
|
260 |
if blocks is None:
|
261 |
+
st.error("No bounding box data found.")
|
262 |
return image
|
263 |
+
|
264 |
# Iterate through the elements to find bounding boxes and draw them
|
265 |
for item in blocks:
|
266 |
+
if "BlockType" in item and item["BlockType"] in ["LINE", "WORD"]:
|
267 |
+
bbox = item["Geometry"]["BoundingBox"]
|
268 |
# Extract coordinates and dimensions
|
269 |
+
left, top, width, height = (
|
270 |
+
bbox["Left"],
|
271 |
+
bbox["Top"],
|
272 |
+
bbox["Width"],
|
273 |
+
bbox["Height"],
|
274 |
+
)
|
275 |
# Calculate bounding box coordinates in image space
|
276 |
left_top = (left * image.width, top * image.height)
|
277 |
right_bottom = ((left + width) * image.width, (top + height) * image.height)
|
278 |
# Draw rectangle
|
279 |
+
draw.rectangle([left_top, right_bottom], outline="red", width=2)
|
|
|
|
|
|
|
280 |
|
281 |
+
return image
|