import gradio as gr from gradio import ChatMessage import time import gradio as gr from gradio import ChatMessage import time sleep_time = 0.1 long_sleep_time = 1 def generate_response(history): history.append( ChatMessage( role="user", content="What is the weather in San Francisco right now?" ) ) yield history time.sleep(sleep_time) history.append( ChatMessage( role="assistant", content="In order to find the current weather in San Francisco, I will need to use my weather tool.", ) ) yield history time.sleep(sleep_time) history.append( ChatMessage( role="assistant", content="", metadata={"title": "Gathering Weather Websites", "id": 1}, ) ) yield history time.sleep(long_sleep_time) history[-1].content = "Will check: weather.com and sunny.org" yield history time.sleep(sleep_time) history.append( ChatMessage( role="assistant", content="Received weather from weather.com.", metadata={"title": "API Success ✅", "parent_id": 1, "id": 2}, ) ) yield history time.sleep(sleep_time) history.append( ChatMessage( role="assistant", content="API Error when connecting to sunny.org.", metadata={"title": "API Error 💥 ", "parent_id": 1, "id": 3}, ) ) yield history time.sleep(sleep_time) history.append( ChatMessage( role="assistant", content="I will try again", ) ) yield history def simulate_thinking_chat(message: str, history: list): """Mimicking thinking process and response""" # Add initial empty thinking message to chat history history.append( # Adds new message to the chat history list ChatMessage( # Creates a new chat message role="assistant", # Specifies this is from the assistant content="", # Initially empty content metadata={"title": "Thinking Process 💭"} # Setting a thinking header here ) ) time.sleep(0.5) yield history # Returns current state of chat history # Define the thoughts that LLM will "think" through thoughts = [ "First, I need to understand the core aspects of the query...", "Now, considering the broader context and implications...", "Analyzing potential approaches to formulate a comprehensive answer...", "Finally, structuring the response for clarity and completeness..." ] # Variable to store all thoughts as they accumulate accumulated_thoughts = "" # Loop through each thought for thought in thoughts: time.sleep(0.5) # Add a samll delay for realism # Add new thought to accumulated thoughts with markdown bullet point accumulated_thoughts += f"- {thought}\n\n" # \n\n creates line breaks # Update the thinking message with all thoughts so far history[-1] = ChatMessage( # Updates last message in history role="assistant", content=accumulated_thoughts.strip(), # Remove extra whitespace metadata={"title": "💭 Thinking Process"} # Shows thinking header ) yield history # Returns updated chat history # After thinking is complete, adding the final response history.append( ChatMessage( role="assistant", content="Based on my thoughts and analysis above, my response is: This dummy repro shows how thoughts of a thinking LLM can be progressively shown before providing its final answer." ) ) yield history # Returns final state of chat history # Gradio blocks with gr.chatbot with gr.Blocks() as demo1: gr.Markdown("# Thinking LLM Demo 🤔") chatbot = gr.Chatbot(type="messages", render_markdown=True) msg = gr.Textbox(placeholder="Type your message...") msg.submit( lambda m, h: (m, h + [ChatMessage(role="user", content=m)]), [msg, chatbot], [msg, chatbot] ).then( simulate_thinking_chat, [msg, chatbot], chatbot ) with gr.Blocks(theme="ocean") as demo2: chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True) demo2.load(generate_response, chatbot, chatbot) demo_tabbed = gr.TabbedInterface([demo1, demo2], ["First tab", "Second tab"]) if __name__ == "__main__": demo_tabbed.launch()