Spaces:

louiecerv
/

image-multimodal

Runtime error

App Files Files Community

louiecerv commited on Dec 17, 2024

Commit

43dbe54

1 Parent(s): d9ff2ba

first save

Browse files

Files changed (2) hide show

app.py +81 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import streamlit as st
+import google.generativeai as genai
+from PIL import Image
+# Replace with your actual API key
+API_KEY = "AIzaSyA3AAdrEZ4YwczJZpV8uVMIM7zTJpXtNgg"
+genai.configure(api_key=API_KEY)
+def multimodal_prompt(image_file, analysis_task):
+    """
+    Sends a multimodal prompt to the Gemini model with an image and a selected analysis task.
+    Args:
+        image_file: The uploaded image file object.
+        analysis_task: The selected task for image analysis.
+    Returns:
+        The model's response as a string.
+    """
+    model = genai.GenerativeModel("gemini-1.5-flash")
+    try:
+        # Load image data as bytes
+        image_bytes = image_file.getvalue()
+        # Create the image input for the model
+        image_part = {
+            "mime_type": "image/png" if image_file.type == "image/png" else "image/jpeg",
+            "data": image_bytes
+        }
+        # Construct the multimodal prompt
+        prompt = [
+            f"Perform the following task on the image: {analysis_task}",
+            image_part
+        ]
+        # Send the request to the model
+        response = model.generate_content(prompt)
+        return response.text
+    except Exception as e:
+        return f"An error occurred: {e}"
+def main():
+    # Streamlit UI
+    st.title("Multimodal Gemini Image Analysis App")
+    st.write("Upload an image and choose a task for analysis.")
+    # File uploader for images (JPEG, PNG)
+    uploaded_image = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
+    # List of image analysis tasks
+    analysis_tasks = [
+        "Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.",
+        "Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.",
+        "Image Captioning: Generate a concise and accurate caption that describes the content of the image.",
+        "Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'",
+        "Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.",
+        "Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.",
+        "Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.",
+        "Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.",
+        "Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.",
+        "Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases."
+    ]
+    # Task selection dropdown
+    selected_task = st.selectbox("Select an image analysis task:", analysis_tasks)
+    if uploaded_image is not None:
+        # Preview the uploaded image
+        st.image(uploaded_image, caption="Uploaded Image", use_container_width=True)
+    if uploaded_image is not None and selected_task:
+        if st.button("Analyze Image"):
+            with st.spinner("Processing..."):
+                response = multimodal_prompt(uploaded_image, selected_task)
+                st.subheader("Response:")
+                st.write(response)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+google-generativeai
+Pillow