import os import gradio as gr from gradio import ChatMessage from typing import Iterator import google.generativeai as genai # get Gemini API Key from the environ variable GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") genai.configure(api_key=GEMINI_API_KEY) # we will be using the Gemini 2.0 Flash model with Thinking capabilities model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219") def format_chat_history(messages: list) -> list: """ Formats the chat history into a structure Gemini can understand """ formatted_history = [] for message in messages: # Skip thinking messages (messages with metadata) if not (message.get("role") == "assistant" and "metadata" in message): formatted_history.append({ "role": "user" if message.get("role") == "user" else "assistant", "parts": [message.get("content", "")] }) return formatted_history def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]: """ Streams thoughts and response with conversation history support. """ try: print(f"\n=== New Request ===") print(f"User message: {user_message}") # Format chat history for Gemini chat_history = format_chat_history(messages) # Initialize Gemini chat chat = model.start_chat(history=chat_history) response = chat.send_message(user_message, stream=True) # Initialize buffers and flags thought_buffer = "" response_buffer = "" thinking_complete = False # Add initial thinking message messages.append( ChatMessage( role="assistant", content="", metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"} ) ) for chunk in response: parts = chunk.candidates[0].content.parts current_chunk = parts[0].text if len(parts) == 2 and not thinking_complete: # Complete thought and start response thought_buffer += current_chunk print(f"\n=== Complete Thought ===\n{thought_buffer}") messages[-1] = ChatMessage( role="assistant", content=thought_buffer, metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"} ) yield messages # Start response response_buffer = parts[1].text print(f"\n=== Starting Response ===\n{response_buffer}") messages.append( ChatMessage( role="assistant", content=response_buffer ) ) thinking_complete = True elif thinking_complete: # Stream response response_buffer += current_chunk print(f"\n=== Response Chunk ===\n{current_chunk}") messages[-1] = ChatMessage( role="assistant", content=response_buffer ) else: # Stream thinking thought_buffer += current_chunk print(f"\n=== Thinking Chunk ===\n{current_chunk}") messages[-1] = ChatMessage( role="assistant", content=thought_buffer, metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"} ) yield messages print(f"\n=== Final Response ===\n{response_buffer}") except Exception as e: print(f"\n=== Error ===\n{str(e)}") messages.append( ChatMessage( role="assistant", content=f"I apologize, but I encountered an error: {str(e)}" ) ) yield messages def user_message(msg: str, history: list) -> tuple[str, list]: """Adds user message to chat history""" history.append(ChatMessage(role="user", content=msg)) return "", history # Create the Gradio interface with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo: #with gr.Column(): gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭") chatbot = gr.Chatbot( type="messages", label="Gemini2.0 'Thinking' Chatbot", render_markdown=True, scale=1, avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu") ) with gr.Row(equal_height=True): input_box = gr.Textbox( lines=1, label="Chat Message", placeholder="Type your message here...", scale=4 ) clear_button = gr.Button("Clear Chat", scale=1) # Set up event handlers msg_store = gr.State("") # Store for preserving user message input_box.submit( lambda msg: (msg, msg, ""), # Store message and clear input inputs=[input_box], outputs=[msg_store, input_box, input_box], queue=False ).then( user_message, # Add user message to chat inputs=[msg_store, chatbot], outputs=[input_box, chatbot], queue=False ).then( stream_gemini_response, # Generate and stream response inputs=[msg_store, chatbot], outputs=chatbot ) clear_button.click( lambda: ([], "", ""), outputs=[chatbot, input_box, msg_store], queue=False ) # Launch the interface if __name__ == "__main__": demo.launch(debug=True)