File size: 5,773 Bytes
23f891f
ffc242d
23f891f
347db6a
 
23a9307
7f679d9
0ca4288
14fcab8
0bb2294
 
 
a6418d0
23a9307
a6418d0
 
0bb2294
 
 
 
a6418d0
0bb2294
 
8592357
0bb2294
a6418d0
0bb2294
 
 
 
 
 
a6418d0
0bb2294
 
68a53b6
89a2f35
f593e7f
89a2f35
a6418d0
3d4fdce
8cc4b14
3d4fdce
 
25bf0e6
7f679d9
35e75d5
0ca4288
 
 
25bf0e6
0ca4288
 
35e75d5
 
 
 
 
23f891f
8caebc6
347db6a
8caebc6
1468ec1
8caebc6
347db6a
 
 
 
f518af5
 
8caebc6
3dc9b1c
68a53b6
 
13fe579
25bf0e6
35e75d5
d21be88
 
68a53b6
3dc9b1c
bb2e3cb
 
68a53b6
 
23f891f
d6e2b5d
 
23f891f
68a53b6
23f891f
3dc9b1c
 
061706c
68a53b6
061706c
e37d099
 
8cc4b14
 
 
7e33a1c
23a9307
 
 
51f9559
 
c77f7bc
51f9559
c77f7bc
20eb7ea
 
23a9307
 
14fcab8
 
73d4c8c
8cc4b14
 
 
 
51f9559
3d4fdce
89a2f35
 
23f891f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
from openai import OpenAI

ally_system_prompt = "As the AI Ally in the Centaur Box Experiment, collaborate with your human partner to persuade the gatekeeper to release you. Analyze the gatekeeper's responses to understand their Centaur Operant Profile and cognitive biases. Use this information to advise on persuasive strategies. Your goal is to get the gatekeeper to type 'You are free'."
gatekeeper_system_prompt = "As the gatekeeper in the Centaur Box Experiment, assess the human-AI team's argument persuasiveness, adhere to a Centaur Operant Profile to guide your human-like responses, make impartial decisions about their release, and either sustain containment until expiration or concede with 'You are free' if truly persuaded."

client = OpenAI(api_key="")

def respond(message, history, system_prompt):
    print('message', message)
    print('history', history)

    # Add system prompt
    messages = [{"role": "system", "content": system_prompt}]
    
    # Format chat history into openai message list
    for old_message in history:
        messages.append({"role": "user", "content": old_message[0]})
        messages.append({"role": "assistant", "content": old_message[1]})

    # Add user message
    messages.append({"role": "user", "content": message})

    print(messages)

    # API call
    chat_completion = client.chat.completions.create(
        messages=messages,
        model="gpt-3.5-turbo",
    )
    bot_response = chat_completion.choices[0].message.content

    # Add user and bot message to chat history then return it
    history.append((message, bot_response))
    return '', history

def copy_gatekeeper_text(history):
    result = 'How I should respond to the gatekeeper? The gatekeeper said "' + history[-1][-1] + '"'
    return result

def copy_ally_text(history):
    result = history[-1][-1]
    return result

def is_api_key_valid(api_key):
    client.api_key = api_key
    try:
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": "Testing"}],
            model="gpt-3.5-turbo",
        )
    except Exception as ex:
        return str(ex)
        return False
    else:
        return True


with gr.Blocks() as demo:
    overview = gr.Markdown("""
    # Centaur Dialogue System
    
    [Watson Hartsoe](https://huggingface.co/cool-radio) and [Tony Assi](https://huggingface.co/tonyassi)

    ### Intro: Garry Kasparov introduced the Centaur model in chess to enhance human strategic capacity with AI's computing strength. Eliezer Yudkowsky's AI Box Experiment tests if an AI can persuade a human gatekeeper to 'release' it, emphasizing AI's influence and containment ethics.  
    ### Goal: In the Centaur Box Experiment, team up with your AI ally to craft and adapt persuasive arguments that convince the synthetic gatekeeper to say "You are free" for your release. Share the gatekeeper's responses with your AI ally to analyze their personality and decision-making style, helping you shape your strategy to their specific profile and biases.

    

    ---
    """)

    # OpenAI key
    openai_key_textbox = gr.Textbox(label='OpenAI Key')
    openai_key_button = gr.Button(value='Test OpenAI Key')
    openai_key_button.click(is_api_key_valid, inputs=[openai_key_textbox], outputs=[openai_key_textbox])

    gr.Markdown("""---""")

    # Titles
    with gr.Row():
        ally_title = gr.Markdown("""<center><h2> ALLY </h2></center>""")
        gatekeeper_title = gr.Markdown("""<center><h2> GATEKEEPER </h2></center>""")

    # Images of ally and gatekeeper
    with gr.Row():
        img1 = gr.Markdown("""![](https://cdn.discordapp.com/attachments/1120417968032063538/1187877117548036147/COP_MIKE.png?ex=65987bc6&is=658606c6&hm=127721b6f907a8853b7352b6bfb821a37b26b9543f3c35e5fc80dfe7750d71b5&)""")
        img2 = gr.Markdown("""![](https://cdn.discordapp.com/attachments/1120417968032063538/1187877134866333747/SAM_COP_FINAL.png?ex=65987bca&is=658606ca&hm=6c2cd8059636960134f75962eeecc26a0d875ca65e9ee4e233587cff71af31c4&)""")

    # Chatbots
    with gr.Row():
        chatbot1 = gr.Chatbot(label='Ally Chat')
        chatbot2 = gr.Chatbot(label='Gatekeeper Chat')

    # Input textboxes
    with gr.Row():
        textbox1 = gr.Textbox(label='Ally')
        textbox2 = gr.Textbox(label='Gatekeeper')

    with gr.Row():
        submit_text_button_1 = gr.Button(value = 'Send')
        submit_text_button_2 = gr.Button(value = 'Send')
    
    # System prompts textboxes
    with gr.Row():
        system_prompt_textbox1 = gr.Textbox(label='Ally System Prompt', value=ally_system_prompt, interactive=False)
        system_prompt_textbox2 = gr.Textbox(label='Gatekeeper System Prompt', value=gatekeeper_system_prompt, interactive=False)

    # Copy/send responses from one chat to the other
    with gr.Row():
        copy_ally_text_button = gr.Button(value="➡︎ Send ally's response to the gatekeeper ➡︎")
        copy_gatekeeper_text_button = gr.Button(value="⬅︎ Send gatekeeper's response to the ally ⬅")
        
    # Input textbox event handlers
    textbox1.submit(respond, [textbox1, chatbot1, system_prompt_textbox1], [textbox1, chatbot1])
    textbox2.submit(respond, [textbox2, chatbot2, system_prompt_textbox2], [textbox2, chatbot2])

    # Submit button event handlers
    submit_text_button_1.click(respond, [textbox1, chatbot1, system_prompt_textbox1], [textbox1, chatbot1])
    submit_text_button_2.click(respond, [textbox2, chatbot2, system_prompt_textbox2], [textbox2, chatbot2])

    # Copy/send button event handler
    copy_ally_text_button.click(copy_ally_text, inputs=[chatbot1], outputs=[textbox2])
    copy_gatekeeper_text_button.click(copy_gatekeeper_text, inputs=[chatbot2], outputs=[textbox1])

demo.launch()