louiecerv commited on
Commit
43dbe54
·
1 Parent(s): d9ff2ba

first save

Browse files
Files changed (2) hide show
  1. app.py +81 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import google.generativeai as genai
3
+ from PIL import Image
4
+
5
+ # Replace with your actual API key
6
+ API_KEY = "AIzaSyA3AAdrEZ4YwczJZpV8uVMIM7zTJpXtNgg"
7
+ genai.configure(api_key=API_KEY)
8
+
9
+ def multimodal_prompt(image_file, analysis_task):
10
+ """
11
+ Sends a multimodal prompt to the Gemini model with an image and a selected analysis task.
12
+
13
+ Args:
14
+ image_file: The uploaded image file object.
15
+ analysis_task: The selected task for image analysis.
16
+
17
+ Returns:
18
+ The model's response as a string.
19
+ """
20
+ model = genai.GenerativeModel("gemini-1.5-flash")
21
+
22
+ try:
23
+ # Load image data as bytes
24
+ image_bytes = image_file.getvalue()
25
+
26
+ # Create the image input for the model
27
+ image_part = {
28
+ "mime_type": "image/png" if image_file.type == "image/png" else "image/jpeg",
29
+ "data": image_bytes
30
+ }
31
+
32
+ # Construct the multimodal prompt
33
+ prompt = [
34
+ f"Perform the following task on the image: {analysis_task}",
35
+ image_part
36
+ ]
37
+
38
+ # Send the request to the model
39
+ response = model.generate_content(prompt)
40
+ return response.text
41
+ except Exception as e:
42
+ return f"An error occurred: {e}"
43
+
44
+ def main():
45
+ # Streamlit UI
46
+ st.title("Multimodal Gemini Image Analysis App")
47
+ st.write("Upload an image and choose a task for analysis.")
48
+
49
+ # File uploader for images (JPEG, PNG)
50
+ uploaded_image = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
51
+
52
+ # List of image analysis tasks
53
+ analysis_tasks = [
54
+ "Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.",
55
+ "Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.",
56
+ "Image Captioning: Generate a concise and accurate caption that describes the content of the image.",
57
+ "Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'",
58
+ "Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.",
59
+ "Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.",
60
+ "Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.",
61
+ "Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.",
62
+ "Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.",
63
+ "Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases."
64
+ ]
65
+
66
+ # Task selection dropdown
67
+ selected_task = st.selectbox("Select an image analysis task:", analysis_tasks)
68
+
69
+ if uploaded_image is not None:
70
+ # Preview the uploaded image
71
+ st.image(uploaded_image, caption="Uploaded Image", use_container_width=True)
72
+
73
+ if uploaded_image is not None and selected_task:
74
+ if st.button("Analyze Image"):
75
+ with st.spinner("Processing..."):
76
+ response = multimodal_prompt(uploaded_image, selected_task)
77
+ st.subheader("Response:")
78
+ st.write(response)
79
+
80
+ if __name__ == "__main__":
81
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ Pillow