Spaces:
Runtime error
Runtime error
import streamlit as st | |
import google.generativeai as genai | |
from PIL import Image | |
import os | |
# Replace with your actual API key | |
API_KEY = os.getenv("GEMINI_API_KEY") | |
genai.configure(api_key=API_KEY) | |
def multimodal_prompt(image_file, analysis_task): | |
""" | |
Sends a multimodal prompt to the Gemini model with an image and a selected analysis task. | |
Args: | |
image_file: The uploaded image file object. | |
analysis_task: The selected task for image analysis. | |
Returns: | |
The model's response as a string. | |
""" | |
model = genai.GenerativeModel("gemini-1.5-flash") | |
try: | |
# Load image data as bytes | |
image_bytes = image_file.getvalue() | |
# Create the image input for the model | |
image_part = { | |
"mime_type": "image/png" if image_file.type == "image/png" else "image/jpeg", | |
"data": image_bytes | |
} | |
# Construct the multimodal prompt | |
prompt = [ | |
f"Perform the following task on the image: {analysis_task}", | |
image_part | |
] | |
# Send the request to the model | |
response = model.generate_content(prompt) | |
return response.text | |
except Exception as e: | |
return f"An error occurred: {e}" | |
def main(): | |
# Streamlit UI | |
st.title("Multimodal Gemini Image Analysis App") | |
st.write("Upload an image and choose a task for analysis.") | |
# File uploader for images (JPEG, PNG) | |
uploaded_image = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"]) | |
# List of image analysis tasks | |
analysis_tasks = [ | |
"Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.", | |
"Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.", | |
"Image Captioning: Generate a concise and accurate caption that describes the content of the image.", | |
"Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'", | |
"Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.", | |
"Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.", | |
"Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.", | |
"Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.", | |
"Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.", | |
"Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases." | |
] | |
# Task selection dropdown | |
selected_task = st.selectbox("Select an image analysis task:", analysis_tasks) | |
if uploaded_image is not None: | |
# Preview the uploaded image | |
st.image(uploaded_image, caption="Uploaded Image", use_container_width=True) | |
if uploaded_image is not None and selected_task: | |
if st.button("Analyze Image"): | |
with st.spinner("Processing..."): | |
response = multimodal_prompt(uploaded_image, selected_task) | |
st.markdown(response) | |
if __name__ == "__main__": | |
main() |