Spaces:

eagle0504
/

world-model

Sleeping

File size: 4,760 Bytes

import streamlit as st
import requests
import base64
from PIL import Image
import io
import os

# Function to make an API call to Google's Gemini API
def call_gemini_api(image_base64, api_key, prompt="What is this picture?"):
    headers = {
        "Content-Type": "application/json",
    }
    data = {
        "contents": [
            {
                "parts": [
                    {"text": prompt},
                    {"inline_data": {"mime_type": "image/jpeg", "data": image_base64}},
                ]
            }
        ]
    }
    response = requests.post(
        f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key={api_key}",
        headers=headers,
        json=data,
    )
    return response.json()

# Streamlit app
st.set_page_config(layout="wide")
st.title("World Model")

st.sidebar.title("Upload Image or Take a Picture")
with st.sidebar.expander("Instructions", expanded=False):
    st.write("""
    ## Streamlit Image Chatbot App Manual
    
    Welcome to the Streamlit Image Chatbot App! This manual will guide you through using the app to chat with a bot about your uploaded images.
    
    ### Getting Started
    
    #### Step 1: Open the App
    - Open your web browser and go to the URL where the Streamlit app is hosted. This will typically be provided to you by the person who set up the app.
    
    #### Step 2: Upload an Image
    1. **Choose an Image:**
       - Look at the left-hand side of the screen. This area is called the sidebar.
       - In the sidebar, you will see an option to "Choose an image...". Click on this button.
    
    2. **Select an Image:**
       - A file dialog will open. Browse your computer to find an image file (JPG, JPEG, or PNG) you want to upload.
       - Select the file and click "Open" or "Choose" depending on your operating system.
    
    3. **View the Uploaded Image:**
       - Once the image is uploaded, you will see it displayed in the sidebar.
    
    #### Step 3: Chat with the Bot
    1. **Ask a Question:**
       - Look at the main section of the app, to the right of the sidebar.
       - There will be a text box labeled "Ask a question about the image:". Click inside this text box.
    
    2. **Type Your Question:**
       - Type in a question you have about the uploaded image. For example, you could ask, "What is in this picture?" or "Can you describe this image?"
    
    3. **Submit Your Question:**
       - Press the Enter key on your keyboard to submit your question.
    
    4. **View the Response:**
       - The bot will analyze your question and the image, then provide a response. The conversation will be displayed below the text box.
    
    #### Step 4: Continue the Conversation
    - You can continue asking more questions about the same image. Each time you ask a question, the bot will respond and the conversation will be updated.
    
    #### Step 5: Upload a New Image (Optional)
    - If you want to start a new conversation with a different image, simply go back to the sidebar and upload a new image. The bot will reset with the new image context, and you can start asking questions about the new image.
    
    ### Tips for Best Results
    - Use clear and specific questions to get the best responses from the bot.
    - Ensure your images are clear and in good quality to help the bot analyze them accurately.
    
    Enjoy exploring and interacting with the Streamlit Image Chatbot App!
    """)
uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.sidebar.image(image, caption='Uploaded Image.', use_column_width=True)
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    image_base64 = base64.b64encode(buffered.getvalue()).decode()
    api_key = os.environ["GEMINI_API_KEY"] # st.sidebar.text_input("Enter your API key", type="password")

    if api_key:
        st.header("Chat with the Bot")
        
        if 'conversation' not in st.session_state:
            st.session_state.conversation = []
        
        user_input = st.text_input("Ask a question about the image:")

        if user_input:
            json_response = call_gemini_api(image_base64, api_key, user_input)
            response = json_response["candidates"][0]["content"]["parts"][0]["text"]
            st.session_state.conversation.append({"user": user_input, "bot": response})

        if st.session_state.conversation:
            for chat in st.session_state.conversation:
                st.write(f"**You:** {chat['user']}")
                st.write(f"**Bot:** {chat['bot']}")

else:
    st.sidebar.text("Please upload an image to start the conversation.")