Spaces:

LuckRafly
/

Chat-with-an-Image-GeminiAI

Running

App Files Files Community

LuckRafly commited on Dec 23, 2023

Commit

3aba7d8

•

1 Parent(s): b4f02d2

Upload 4 files

Browse files

Files changed (4) hide show

app.py +102 -0
function.py +88 -0
htmlTemplate.py +89 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import streamlit as st
+from PIL import Image
+from function import bounding_box, captioning_query
+from tempfile import NamedTemporaryFile
+import os
+from function import ImageCaptionTools, ObjectDetectionTool
+from langchain.agents import initialize_agent, AgentType
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.memory import ConversationBufferWindowMemory
+from htmlTemplate import css, bot_template, user_template
+DIR_PATH = './temp'
+if not os.path.exists(DIR_PATH):
+    os.mkdir(DIR_PATH)
+# initialize Agent
+def agent_init():
+    tools = [ImageCaptionTools(), ObjectDetectionTool()]
+    llm = ChatGoogleGenerativeAI(model="gemini-pro")
+    memory = ConversationBufferWindowMemory(memory_key='chat_history',
+                                            k=5,
+                                            return_messages=True)
+    agents = initialize_agent(
+        agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
+        llm=llm,
+        tools=tools,
+        max_iterations=5,
+        verbose=True,
+        memory=memory
+    )
+    return agents
+def delete_temp_files():
+    for filename in os.listdir(DIR_PATH):
+        file_path = os.path.join(DIR_PATH, filename)
+        if os.path.isfile(file_path):
+            os.unlink(file_path)
+def main():
+    st.set_page_config(
+        page_title="Chat with an Image",
+        page_icon="🖼️",
+        layout="wide"
+    )
+    st.write(css, unsafe_allow_html=True)
+    agent = agent_init()
+    if "image_processed" not in st.session_state:
+        st.session_state.image_processed = None
+    if "result_bounding" not in st.session_state:
+        st.session_state.result_bounding = None
+    # Delete temp files when session state changes
+    if st.session_state.image_processed is None:
+        delete_temp_files()
+    # image_path = 'documentation\photo_1.jpg'
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        image_upload = st.file_uploader(label="Please Upload Your Image", type=['jpg', 'png', 'jpeg'])
+        if not image_upload:
+            st.warning("Please upload your image")
+        else:
+            st.image(
+                image_upload,
+                use_column_width=True
+            )
+        click_process = st.button("Process Image", disabled=not image_upload)
+        if click_process:
+            delete_temp_files()
+            with NamedTemporaryFile(dir=DIR_PATH, delete=False) as f:
+                f.write(image_upload.getbuffer())
+                st.session_state.image_path = f.name
+                st.session_state.image_processed = True
+        if (st.session_state.image_processed and st.session_state.result_bounding is None) or click_process:
+            with st.spinner("Please Wait"):
+                result_bounding = bounding_box(st.session_state.image_path)
+                st.session_state.result_bounding = result_bounding
+        # Expander to show/hide image
+        if st.session_state.result_bounding is not None:
+            with st.expander("Show Image (Bounding Box)"):
+                st.image(st.session_state.result_bounding)
+    with col2:
+        user_question = st.text_area("Ask About your image",
+                                     disabled=not st.session_state.image_processed,
+                                     max_chars=150)
+        click_ask = st.button("Ask Question", disabled=not st.session_state.image_processed)
+        if click_ask:
+            st.write(user_template.replace("{{MSG}}", user_question), unsafe_allow_html=True)
+            with st.spinner("AI Searching for Answer🔎"):
+                chat_history = agent.invoke({"input": f"{user_question}, this is the image path: {st.session_state.image_path}"})
+                response = chat_history['output']
+                st.write(bot_template.replace("{{MSG}}", response), unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

function.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from langchain.tools import BaseTool
+from PIL import Image, ImageDraw
+import requests
+from dotenv import load_dotenv
+import os
+load_dotenv()
+def object_detection_query(filepath):
+    API_URL = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50"
+    headers = {"Authorization": "Bearer " + os.environ['HUGGINGFACEHUB_API_TOKEN']}
+    with open(filepath, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers=headers, data=data)
+    return response.json()
+def bounding_box(filepath):
+    # Generate an output
+    output = object_detection_query(filepath)
+    # load the image
+    image = Image.open(filepath).convert('RGB')
+    # create a drawing object
+    draw = ImageDraw.Draw(image)
+    # Draw boxes and labels on the image
+    for detection in output:
+        label = detection['label']
+        score = detection['score']
+        box = detection['box']
+        # Draw the box
+        draw.rectangle([box['xmin'], box['ymin'], box['xmax'], box['ymax']], outline="red", width=2)
+        # Draw the label and score
+        text = f"{label} ({score:.2f})"
+        draw.text((box['xmin'], box['ymin']-10), text, fill='red')
+    return image
+def captioning_query(filepath):
+    API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
+    headers = {"Authorization": "Bearer " + os.environ['HUGGINGFACEHUB_API_TOKEN']}
+    with open(filepath, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers=headers, data=data)
+    return response.json()
+class ImageCaptionTools(BaseTool):
+    name = "Image_Caption_Tools"
+    description = "Use this tool with any given image path to receive a personalized description, poem, story, or more. "\
+                  "Ideal for agents seeking tailored insights. "\
+                  "Let the tool craft content based on your image for a unique perspective."
+    def _run(self, image_path) -> str:
+        """Use the tool."""
+        result = captioning_query(image_path)
+        text = result[0]['generated_text']
+        return text
+    async def _arun(self, query: str) -> str:
+        """Use the tool asynchronously."""
+        raise NotImplementedError("custom_search does not support async")
+class ObjectDetectionTool(BaseTool):
+    name = "Object_Detection_Tool"
+    description = "Object Detection Tool: Use this tool to detect objects in an image. Provide the image path, " \
+                  "and it will return a list of detected objects. Each element in the list is in the format: " \
+                  "[x1, y1, x2, y2] class_name confidence_score. This tool focuses on object detection, providing " \
+                  "locations of objects in the image. For image descriptions or other insights, explore additional tools."
+    def _run(self, image_path) -> str:
+        """Use the tool."""
+        results = object_detection_query(image_path)
+        detections = ""
+        for result in results:
+            box = result['box']
+            detections += '[{}, {}, {}, {}]'.format(int(box['xmin']), int(box['ymin']), int(box['xmax']), int(box['ymax']))
+            detections += ' {}'.format(result['label'])
+            detections += ' {}\n'.format(result['score'])
+        return detections
+    async def _arun(self, query: str) -> str:
+        """Use the tool asynchronously."""
+        raise NotImplementedError("custom_search does not support async")

htmlTemplate.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Updated CSS
+# CSS Styles
+css = '''
+<style>
+    /* Styling for the body of the Streamlit app */
+    body {
+        background-color: #f2f7ff; /* Soft blue background */
+        margin: 0; /* Remove default margin */
+        padding: 0; /* Remove default padding */
+    }
+    /* Styling for the chat container */
+    .chat-container {
+        max-width: 600px; /* Adjust the maximum width as needed */
+        margin: 0 auto; /* Center the chat container */
+        background-color: #ffffff; /* White background */
+        padding: 1rem; /* Add padding to the chat container */
+        border-radius: 1rem; /* Rounded corners for the chat container */
+        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); /* Add a subtle box shadow */
+    }
+    /* Styling for the chat messages */
+    .chat-message {
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin-bottom: 1rem;
+        display: flex;
+        border: 1px solid #d3d3d3; /* Add a subtle border */
+    }
+    /* Styling for user messages */
+    .chat-message.user {
+        background-color: #ffffff; /* White background for user messages */
+    }
+    /* Styling for bot messages */
+    .chat-message.bot {
+        background-color: #9dc8e5; /* Soft blue background for bot messages */
+    }
+    /* Styling for the avatar */
+    .chat-message .avatar {
+        width: 15%; /* Adjust avatar size */
+    }
+    /* Styling for the avatar image */
+    .chat-message .avatar img {
+        max-width: 60px;
+        max-height: 60px;
+        border-radius: 50%;
+        object-fit: cover;
+    }
+    /* Styling for the message content */
+    .chat-message .message {
+        flex: 1; /* Allow the message to take up remaining space */
+        padding: 0.75rem;
+        color: #495057; /* Dark text color for better readability */
+    }
+    /* Styling for strong (name) in the message */
+    .chat-message .message strong {
+        margin-right: 0.25rem; /* Adjust the margin as needed */
+    }
+</style>
+'''
+# HTML Templates for Bot and User Messages
+bot_template = '''
+<div class="chat-message bot">
+    <div class="avatar">
+        <img src="https://i.ibb.co/dp2yyWP/bot.jpg">
+    </div>
+    <div class="message">
+        <strong>Doraemon:</strong> {{MSG}}
+    </div>
+</div>
+'''
+user_template = '''
+<div class="chat-message user">
+    <div class="avatar">
+        <img src="https://i.ibb.co/JB2sps1/human.jpg">
+    </div>
+    <div class="message">
+        <strong>Nobita:</strong> {{MSG}}
+    </div>
+</div>
+'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain
+langchain
+streamlit
+langchain-google-genai
+transformers
+python-dotenv