Golu2811 commited on
Commit
f35c9aa
·
verified ·
1 Parent(s): 7876d85

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -0
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # The code segment allows users to upload an image, perform object detection using DETR-ResNet-50
3
+ # model, draw bounding boxes around detected objects, generate descriptions and summaries for the
4
+ # objects, and display the results in a summary table.
5
+
6
+ # :param filename: The `filename` parameter in the code refers to the name of the file that is being
7
+ # processed or queried. It is used as an input to functions like `query(filename)` and
8
+ # `text(filename)` where the file is read and processed accordingly. In the Streamlit application
9
+ # context, the `filename`
10
+ # :return: The code provided is a Streamlit application for image segmentation and object detection
11
+ # using the DETR model from Hugging Face and optical character recognition (OCR) using the T-ROCR
12
+ # model from Microsoft.
13
+ import streamlit as st
14
+ from PIL import Image
15
+ import cv2
16
+ import requests
17
+ from dotenv import load_dotenv
18
+ import google.generativeai as genai
19
+ from langchain_google_genai import ChatGoogleGenerativeAI
20
+ import os
21
+ import pandas as pd
22
+
23
+ st.title("Image segmentation and object analysis")
24
+
25
+ load_dotenv()
26
+ os.getenv("GOOGLE_API_KEY")
27
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
28
+
29
+ model = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.3)
30
+ token=os.getenv(TOKEN)
31
+ detr = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50"
32
+ headers = {"Authorization": token}
33
+
34
+ def query(filename):
35
+ with open(filename, "rb") as f:
36
+ data = f.read()
37
+ response = requests.post(detr, headers=headers, data=data)
38
+ return response.json()
39
+
40
+ trocr = "https://api-inference.huggingface.co/models/microsoft/trocr-base-handwritten"
41
+
42
+ def text(filename):
43
+ with open(filename, "rb") as f:
44
+ data = f.read()
45
+ response = requests.post(trocr, headers=headers, data=data)
46
+ return response.json()
47
+
48
+
49
+ # The function `get_object_descriptions` takes a list of objects, generates a short description for
50
+ # each object using a model, and returns a list of these descriptions.
51
+
52
+
53
+ def get_object_descriptions(objects_list):
54
+
55
+ descriptions = []
56
+ for obj in objects_list:
57
+ text=f"Provide a very short description of {obj}."
58
+ res = model.invoke(text)
59
+ descriptions.append(res.content)
60
+
61
+ return descriptions
62
+
63
+ def get_object_summary(objects_list):
64
+
65
+ # This function takes a list of objects, generates a short description for each object, and returns a
66
+ # list of these descriptions.
67
+
68
+ descriptions = []
69
+ for obj in objects_list:
70
+ text=f"Provide a very short nature and design of {obj} in just two lines."
71
+ res = model.invoke(text)
72
+ descriptions.append(res.content)
73
+
74
+ return descriptions
75
+
76
+
77
+
78
+ # The line `uploaded_file = st.file_uploader("Choose an image")` in the provided Python code segment is creating a file uploader widget using Streamlit. This widget allows users to select and upload an image file from their local system. The text "Choose an image" is displayed as the label for the file uploader, prompting the user to select an image file for processing within the Streamlit application.
79
+
80
+ uploaded_file = st.file_uploader("Choose an image")
81
+
82
+ if uploaded_file is not None:
83
+ image_data = uploaded_file.read()
84
+ st.image(image_data)
85
+ st.write("file uploaded")
86
+ image = Image.open(uploaded_file)
87
+ # Specify the file path to save the image
88
+ filepath = "./uploaded_image.jpg"
89
+ # Save the image
90
+ image.save(filepath)
91
+ st.success(f"Image saved successfully at {filepath}")
92
+
93
+
94
+ output = query("uploaded_image.jpg")
95
+ image=cv2.imread("uploaded_image.jpg")
96
+ # The code snippet `res=text("uploaded_image.jpg")` is calling the `text` function with the uploaded image file as a parameter. This function is responsible for processing the uploaded image using the OCR (Optical Character Recognition) model from Microsoft. It extracts text content from the image.
97
+
98
+ if output is None:
99
+ res=text("uploaded_image.jpg")
100
+ st.write("Image only contain text")
101
+ st.write(res[0]['generated_text'])
102
+ else:
103
+
104
+ # Draw bounding boxes on the image
105
+ # The function `draw_bounding_boxes` takes an image and a list of detections, draws bounding boxes around the detected objects, and adds labels and scores to the image.
106
+ def draw_bounding_boxes(image, detections):
107
+ for detection in detections:
108
+ xmin, ymin, xmax, ymax = detection['box']['xmin'], detection['box']['ymin'], detection['box']['xmax'], detection['box']['ymax']
109
+ label = detection['label']
110
+ score = detection['score']
111
+
112
+ # Draw the bounding box
113
+ cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
114
+
115
+ # Add the label and score
116
+ text = f"{label}: {score:.2f}"
117
+ cv2.putText(image, text, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
118
+
119
+ return image
120
+
121
+ # if output is not None:
122
+
123
+ # The line `image_with_boxes = draw_bounding_boxes(image, output)` is calling the `draw_bounding_boxes` function with the `image` and `output` as parameters. This function takes an image and a list of detections as input, then it draws bounding boxes around the detected objects on the image and adds labels and scores to those bounding boxes.
124
+ image_with_boxes = draw_bounding_boxes(image, output)
125
+
126
+ # Display the image with bounding boxes
127
+ cv2.imwrite("saved_image.jpg", image_with_boxes)
128
+ st.image("saved_image.jpg")
129
+ obj=[]
130
+ for i in output:
131
+ obj.append(i["label"])
132
+ obj1=list(set(obj))
133
+
134
+ # The code segment you provided is responsible for generating object descriptions and summaries based on the detected objects in the uploaded image. Here is a breakdown of what each part of the code is doing:
135
+ desc=get_object_descriptions(obj1)
136
+ summ=get_object_summary(obj1)
137
+ df = pd.DataFrame({'OBJECT': obj1, 'DESCRIPTION': desc, 'SUMMARY': summ})
138
+ res=text("uploaded_image.jpg")
139
+
140
+ # The code snippet `st.title("Summary Table")` is setting the title of a section in the Streamlit application to "Summary Table". This title will be displayed at the top of the section to provide context or information to the user about the content that follows.
141
+ st.title("Summary Table")
142
+ st.dataframe(df)
143
+