|
import gradio as gr |
|
import pandas as pd |
|
import random |
|
from datasets import load_dataset, Dataset, DatasetDict |
|
from huggingface_hub import HfApi, login |
|
import os |
|
from datetime import datetime |
|
|
|
hf_api = HfApi() |
|
HF_TOKEN = os.getenv('HF_TOKEN') |
|
login(token=HF_TOKEN) |
|
|
|
|
|
dataset_1 = load_dataset("HumanLLMs/LlamaPair")["train"] |
|
dataset_2 = load_dataset("HumanLLMs/QwenPair")["train"] |
|
dataset_3 = load_dataset("HumanLLMs/MistralPair")["train"] |
|
|
|
df_log = pd.DataFrame(columns=["instruction", "selected_model", "pair", "submission_time"]) |
|
|
|
def remove_emojis(text): |
|
return text.encode('ascii', 'ignore').decode('ascii') |
|
|
|
def get_random_row(): |
|
selected_dataset = random.choice([dataset_1, dataset_2, dataset_3]) |
|
pair_name = ("LlamaPair" if selected_dataset == dataset_1 |
|
else "QwenPair" if selected_dataset == dataset_2 |
|
else "MistralPair") |
|
|
|
row = selected_dataset[random.randint(0, len(selected_dataset) - 1)] |
|
instruction = row["instruction"] |
|
response_human = row["response_human_like_model"] |
|
response_official = row["response_offical_instruct_model"] |
|
|
|
responses = [("Human-like Model", response_human), |
|
("Official Model", response_official)] |
|
random.shuffle(responses) |
|
|
|
return (instruction, remove_emojis(responses[0][1]), remove_emojis(responses[1][1]), |
|
responses[0][0], responses[1][0], pair_name) |
|
|
|
def format_response_1_html(response): |
|
return f''' |
|
<div style="border: 1px solid white; background-color: black; |
|
padding: 10px; margin: 5px;"> |
|
<strong style="color: white;">Answer 1:</strong> |
|
<div style="color: white;">{response}</div> |
|
</div> |
|
''' |
|
def format_response_2_html(response): |
|
return f''' |
|
<div style="border: 1px solid white; background-color: black; |
|
padding: 10px; margin: 5px;"> |
|
<strong style="color: white;">Answer 2:</strong> |
|
<div style="color: white;">{response}</div> |
|
</div> |
|
''' |
|
|
|
def submit_choice(selected_response, instruction, label_1, label_2, pair_name): |
|
try: |
|
df_log = pd.DataFrame(load_dataset("HumanLLMs/log")["train"]) |
|
except: |
|
df_log = pd.DataFrame(columns=["instruction", "selected_model", |
|
"pair", "submission_time"]) |
|
|
|
selected_model = label_1 if selected_response == "Answer 1" else label_2 |
|
submission_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
|
new_instruction, new_response_1, new_response_2, new_label_1, new_label_2, new_pair_name = get_random_row() |
|
new_entry = pd.DataFrame({ |
|
"instruction": [new_instruction], |
|
"selected_model": [selected_model], |
|
"pair": [pair_name], |
|
"submission_time": [submission_time] |
|
}) |
|
|
|
df_log = pd.concat([df_log, new_entry], ignore_index=True) |
|
df_log.to_csv("annotations_log.csv", index=False) |
|
log = Dataset.from_pandas(df_log) |
|
log.push_to_hub("HumanLLMs/log") |
|
|
|
|
|
return ( |
|
f"### Question:\n{new_instruction}", |
|
format_response_1_html(new_response_1), |
|
format_response_2_html(new_response_2), |
|
new_label_1, |
|
new_label_2, |
|
new_pair_name, |
|
"Your choice has been recorded. A new question is loaded!" |
|
) |
|
|
|
def create_interface(): |
|
instruction, response_1, response_2, label_1, label_2, pair_name = get_random_row() |
|
|
|
with gr.Blocks(theme=gr.themes.Default()) as demo: |
|
gr.Markdown("# Human-Likeness Voting System") |
|
gr.Markdown("![logo](logo.png)") |
|
gr.Markdown("This interface has been created to compare the performance of the human-like LLMs developed by our team with the models on which they were trained. The results of this study will be presented in a paper. Please ensure that your responses are fair and accurate when casting your vote and selecting the appropriate answer. We thank you for your contributions on behalf of the research team.") |
|
gr.Markdown("## Instructions") |
|
gr.Markdown( |
|
""" |
|
1. First, read the provided question carefully. |
|
2. Second, read both responses carefully. |
|
3. Finally, select the model that best resembles a human in terms of response quality.""" |
|
) |
|
current_instruction = gr.State(instruction) |
|
label_1_state = gr.State(label_1) |
|
label_2_state = gr.State(label_2) |
|
pair_name_state = gr.State(pair_name) |
|
question_display = gr.Markdown(value=f"### Question:\n{instruction}") |
|
with gr.Row(): |
|
with gr.Column(): |
|
response_1_display = gr.HTML(format_response_1_html(response_1)) |
|
with gr.Column(): |
|
response_2_display = gr.HTML(format_response_2_html(response_2)) |
|
with gr.Row(): |
|
selected_response = gr.Radio( |
|
["Answer 1", "Answer 2"], |
|
label="Which answer is better?", |
|
interactive=True |
|
) |
|
submit_btn = gr.Button("Submit Choice") |
|
|
|
status_output = gr.Textbox( |
|
interactive=False, |
|
label="Status", |
|
value="Select an answer and click Submit" |
|
) |
|
submit_btn.click( |
|
fn=submit_choice, |
|
inputs=[ |
|
selected_response, |
|
current_instruction, |
|
label_1_state, |
|
label_2_state, |
|
pair_name_state |
|
], |
|
outputs=[ |
|
question_display, |
|
response_1_display, |
|
response_2_display, |
|
label_1_state, |
|
label_2_state, |
|
pair_name_state, |
|
status_output |
|
] |
|
) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
interface = create_interface() |
|
interface.launch(share=True) |
|
|