File size: 4,566 Bytes
7563a2f
 
 
 
96152df
 
 
 
 
 
 
7563a2f
 
 
 
 
 
 
96152df
7563a2f
 
 
 
 
 
 
96152df
7563a2f
 
 
96152df
 
7563a2f
 
 
96152df
 
7563a2f
96152df
7563a2f
 
 
96152df
 
7563a2f
 
 
96152df
7563a2f
 
 
96152df
 
7563a2f
 
 
96152df
7563a2f
 
 
 
96152df
7563a2f
 
 
 
120f827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96152df
120f827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96152df
91ab4ba
7563a2f
c097234
7563a2f
120f827
c113346
120f827
7563a2f
c113346
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
from gradio import ChatMessage
import time

import gradio as gr
from gradio import ChatMessage
import time

sleep_time = 0.1
long_sleep_time = 1

def generate_response(history):
    history.append(
        ChatMessage(
            role="user", content="What is the weather in San Francisco right now?"
        )
    )
    yield history
    time.sleep(sleep_time)
    history.append(
        ChatMessage(
            role="assistant",
            content="In order to find the current weather in San Francisco, I will need to use my weather tool.",
        )
    )
    yield history
    time.sleep(sleep_time)
    history.append(
        ChatMessage(
            role="assistant",
            content="",
            metadata={"title": "Gathering Weather Websites", "id": 1},
        )
    )
    yield history
    time.sleep(long_sleep_time)
    history[-1].content = "Will check: weather.com and sunny.org"
    yield history
    time.sleep(sleep_time)
    history.append(
        ChatMessage(
            role="assistant",
            content="Received weather from weather.com.",
            metadata={"title": "API Success βœ…", "parent_id": 1, "id": 2},
        )
    )
    yield history
    time.sleep(sleep_time)
    history.append(
        ChatMessage(
            role="assistant",
            content="API Error when connecting to sunny.org.",
            metadata={"title": "API Error πŸ’₯ ", "parent_id": 1, "id": 3},
        )
    )
    yield history
    time.sleep(sleep_time)

    history.append(
        ChatMessage(
            role="assistant",
            content="I will try again",
        )
    )
    yield history


def simulate_thinking_chat(message: str, history: list):
    """Mimicking thinking process and response"""
    # Add initial empty thinking message to chat history

    history.append(  # Adds new message to the chat history list
        ChatMessage(  # Creates a new chat message
            role="assistant",  # Specifies this is from the assistant
            content="",  # Initially empty content
            metadata={"title": "Thinking Process πŸ’­"}  # Setting a thinking header here
        )
    )
    time.sleep(0.5)
    yield history  # Returns current state of chat history
    
    # Define the thoughts that LLM will "think" through
    thoughts = [
        "First, I need to understand the core aspects of the query...",
        "Now, considering the broader context and implications...",
        "Analyzing potential approaches to formulate a comprehensive answer...",
        "Finally, structuring the response for clarity and completeness..."
    ]
    
    # Variable to store all thoughts as they accumulate
    accumulated_thoughts = ""
    
    # Loop through each thought
    for thought in thoughts:
        time.sleep(0.5)  # Add a samll delay for realism
        
        # Add new thought to accumulated thoughts with markdown bullet point
        accumulated_thoughts += f"- {thought}\n\n"  # \n\n creates line breaks
        
        # Update the thinking message with all thoughts so far
        history[-1] = ChatMessage(  # Updates last message in history
            role="assistant",
            content=accumulated_thoughts.strip(),  # Remove extra whitespace
            metadata={"title": "πŸ’­ Thinking Process"}  # Shows thinking header
        )
        yield history  # Returns updated chat history
    
    # After thinking is complete, adding the final response
    history.append(
        ChatMessage(
            role="assistant",
            content="Based on my thoughts and analysis above, my response is: This dummy repro shows how thoughts of a thinking LLM can be progressively shown before providing its final answer."
        )
    )
    yield history  # Returns final state of chat history


# Gradio blocks with gr.chatbot
with gr.Blocks() as demo1:
    gr.Markdown("# Thinking LLM Demo πŸ€”")
    chatbot = gr.Chatbot(type="messages", render_markdown=True)
    msg = gr.Textbox(placeholder="Type your message...")
    
    msg.submit(
        lambda m, h: (m, h + [ChatMessage(role="user", content=m)]),
        [msg, chatbot],
        [msg, chatbot]
    ).then(
        simulate_thinking_chat,
        [msg, chatbot],
        chatbot
    )


with gr.Blocks(theme="ocean") as demo2:
    chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True)
    demo2.load(generate_response, chatbot, chatbot)


demo_tabbed = gr.TabbedInterface([demo1, demo2], ["First tab", "Second tab"])
        
if __name__ == "__main__":
    demo_tabbed.launch()