File size: 5,183 Bytes
e83daf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af9a617
 
e83daf4
 
af9a617
e83daf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2dcc1fd
e83daf4
 
 
 
 
 
 
6833e32
e83daf4
 
 
 
6833e32
f0605c4
e83daf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
"""
Usage:
python3 qa_browser.py --share
"""

import argparse
from collections import defaultdict
from glob import glob
import re
import gradio as gr
import pandas as pd

questions = []

question_selector_map = {}
category_selector_map = defaultdict(list)


def display_question(category_selector, request: gr.Request):
    # print(category_selector)
    choices = category_selector_map[category_selector]
    return gr.Dropdown.update(
        value=choices[0],
        choices=choices,
    )


def display_pairwise_answer(
    question_selector, model_selector1, model_selector2, request: gr.Request
):
    q = question_selector_map[question_selector]
    qid = q["question_id"]

    ans1 = q["conversation_a"]
    ans2 = q["conversation_b"]

    chat_mds = pairwise_to_gradio_chat_mds(q, ans1, ans2)
    
    return chat_mds


newline_pattern1 = re.compile("\n\n(\d+\. )")
newline_pattern2 = re.compile("\n\n(- )")


def post_process_answer(x):
    """Fix Markdown rendering problems."""
    x = x.replace("\u2022", "- ")
    x = re.sub(newline_pattern1, "\n\g<1>", x)
    x = re.sub(newline_pattern2, "\n\g<1>", x)
    return x


def pairwise_to_gradio_chat_mds(question, ans_a, ans_b, turn=None):
    end = question["turn"] * 3

    mds = [""] * end
    base = 0
    for i in range(0, end, 3):
        mds[i] = "##### `User`\n" + question["conversation_a"][base]["content"].strip()
        mds[i + 1] = f"##### `{question['model_a']}`\n" + post_process_answer(
            ans_a[base + 1]["content"].strip()
        )
        mds[i + 2] = f"##### `{question['model_b']}`\n" + post_process_answer(
            ans_b[base + 1]["content"].strip()
        )
        base += 2
        
    winner = question["winner"] if "tie" in question["winner"] else question[question["winner"]]
    mds += [f"##### Vote: {winner}"]
    mds += [""] * (16 - len(mds))
    
    return mds


def build_question_selector_map():
    global question_selector_map, category_selector_map

    # Build question selector map
    for q in questions:
        preview = q["conversation_a"][0]["content"][:128] + "..."
        question_selector_map[preview] = q
        category_selector_map[q["category"]].append(preview)


def build_pairwise_browser_tab():
    global question_selector_map, category_selector_map

    num_sides = 2
    num_turns = 5
    side_names = ["A", "B"]

    question_selector_choices = list(question_selector_map.keys())
    category_selector_choices = list(category_selector_map.keys())
    print(category_selector_choices)
    # Selectors
    with gr.Row():
        with gr.Column(scale=1, min_width=200):
            category_selector = gr.Dropdown(
                choices=category_selector_choices,
                # value="Instruction Following",
                label="Category", container=False
            )
        with gr.Column(scale=100):
            question_selector = gr.Dropdown(
                choices=question_selector_choices, label="Question", container=False
            )

    # Conversation
    chat_mds = []
    for i in range(num_turns):
        chat_mds.append(gr.Markdown(elem_id=f"user_question_{i+1}"))
        with gr.Row():
            for j in range(num_sides):
                with gr.Column(scale=100):
                    chat_mds.append(gr.Markdown())

                if j == 0:
                    with gr.Column(scale=1, min_width=8):
                        gr.Markdown()
    chat_mds.append(gr.Markdown())

    # Callbacks
    category_selector.change(display_question, [category_selector], [question_selector])
    question_selector.change(
        display_pairwise_answer,
        [question_selector],
        chat_mds,
    )

    return (category_selector,)


def load_demo():
    dropdown_update = gr.Dropdown.update(value="Math")
    return dropdown_update


def build_demo():
    build_question_selector_map()

    with gr.Blocks(
        title="Chatbot Arena Samples",
        theme=gr.themes.Base(text_size=gr.themes.sizes.text_lg),
    ) as demo:
        gr.Markdown(
            """
# Chatbot Arena Samples
We randomly sample 20 battles from each category using seed 42.
| [Paper](https://arxiv.org/abs/2403.04132) | [Leaderboard](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) |
"""
        )
        (category_selector,) = build_pairwise_browser_tab()
        demo.load(load_demo, [], [category_selector])

    return demo


def load_questions(directory: str):
    import json
    """Load questions from a file."""
    questions = []
    for file in glob(directory):
        with open(file, "r") as ques_file:
            for line in ques_file:
                if line:
                    questions.append(json.loads(line))
    return questions


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", type=str, default="0.0.0.0")
    parser.add_argument("--port", type=int)
    parser.add_argument("--share", action="store_true")
    args = parser.parse_args()
    print(args)
    
    questions = load_questions("data/*.jsonl")

    demo = build_demo()
    demo.launch(
        server_name=args.host, server_port=args.port, share=args.share, max_threads=200
    )