import streamlit as st
import google.generativeai as genai
from PIL import Image
import os

# Replace with your actual API key
API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=API_KEY)

def multimodal_prompt(image_file, analysis_task):
    """
    Sends a multimodal prompt to the Gemini model with an image and a selected analysis task.

    Args:
        image_file: The uploaded image file object.
        analysis_task: The selected task for image analysis.

    Returns:
        The model's response as a string.
    """
    model = genai.GenerativeModel("gemini-1.5-flash")

    try:
        # Load image data as bytes
        image_bytes = image_file.getvalue()

        # Create the image input for the model
        image_part = {
            "mime_type": "image/png" if image_file.type == "image/png" else "image/jpeg",
            "data": image_bytes
        }

        # Construct the multimodal prompt
        prompt = [
            f"Perform the following task on the image: {analysis_task}",
            image_part
        ]

        # Send the request to the model
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"An error occurred: {e}"

def main():
    # Streamlit UI
    st.title("Multimodal Gemini Image Analysis App")
    st.write("Upload an image and choose a task for analysis.")

    # File uploader for images (JPEG, PNG)
    uploaded_image = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])

    # List of image analysis tasks
    analysis_tasks = [
        "Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.",
        "Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.",
        "Image Captioning: Generate a concise and accurate caption that describes the content of the image.",
        "Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'",
        "Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.",
        "Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.",
        "Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.",
        "Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.",
        "Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.",
        "Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases."
    ]

    # Task selection dropdown
    selected_task = st.selectbox("Select an image analysis task:", analysis_tasks)

    if uploaded_image is not None:
        # Preview the uploaded image
        st.image(uploaded_image, caption="Uploaded Image", use_container_width=True)

    if uploaded_image is not None and selected_task:
        if st.button("Analyze Image"):
            with st.spinner("Processing..."):
                response = multimodal_prompt(uploaded_image, selected_task)
                st.markdown(response)

if __name__ == "__main__":
    main()