|
import gradio as gr |
|
import pandas as pd |
|
from constants import * |
|
|
|
|
|
|
|
|
|
custom_css = """ |
|
h1 { /* 根据需要选择正确的标题标签 */ |
|
background-color: blue; /* 蓝色背景 */ |
|
color: white; /* 白色文字 */ |
|
padding: 10px; /* 内边距 */ |
|
text-align: center; /* 文本居中 */ |
|
} |
|
h2 { /* 根据需要选择正确的标题标签 */ |
|
color: white; /* 白色文字 */ |
|
padding: 10px; /* 内边距 */ |
|
text-align: center; /* 文本居中 */ |
|
} |
|
|
|
""" |
|
|
|
def get_preview_data(): |
|
df = pd.read_json(DATA_DIR) |
|
df=df.head(4) |
|
return df |
|
def get_result_data(): |
|
df={ |
|
"DataSet": ["WikiData_recent", "WikiData_recent", "WikiData_recent", "WikiData_recent", |
|
"ZsRE", "ZsRE", "ZsRE", "ZsRE", |
|
"WikiBio", "WikiBio", "WikiBio", |
|
"WikiData_counterfact", "WikiData_counterfact", "WikiData_counterfact", "WikiData_counterfact", |
|
"ConvSent", "ConvSent", "ConvSent", |
|
"Sanitation", "Sanitation", "Sanitation"], |
|
"Metric": ["Edit Succ. ↑", "Portability ↑", "Locality ↑", "Fluency ↑", |
|
"Edit Succ. ↑", "Portability ↑", "Locality ↑", "Fluency ↑", |
|
"Edit Succ. ↑", "Locality ↑", "Fluency ↑", |
|
"Edit Succ. ↑", "Portability ↑", "Locality ↑", "Fluency ↑", |
|
"Edit Succ. ↑", "Locality ↓", "Fluency ↑", |
|
"Edit Succ. ↑", "Locality ↑", "Fluency ↑"], |
|
"SERAC": [98.68, 63.52, 100.00, 553.19, |
|
99.67, 56.48, 30.23, 410.89, |
|
99.69, 69.79, 606.95, |
|
99.99, 76.07, 98.96, 549.91, |
|
62.75, 0.26, 458.21, |
|
0.00, 100.00, 416.29], |
|
"ICE": [60.74, 36.93, 33.34, 531.01, |
|
66.01, 63.94, 23.14, 541.14, |
|
95.53, 47.90, 632.92, |
|
69.83, 45.32, 32.38, 547.22, |
|
52.78, 49.73, 621.45, |
|
72.50, 56.58, 794.15], |
|
"AdaLoRA": [65.61, 47.22, 55.78, 537.51, |
|
69.86, 52.95, 72.21, 532.82, |
|
97.02, 57.87, 615.86, |
|
72.14, 55.17, 66.78, 553.85, |
|
44.89, 0.18, 606.42, |
|
2.50, 65.50, 330.44], |
|
"MEND": [76.88, 50.11, 92.87, 586.34, |
|
96.74, 60.41, 92.79, 524.33, |
|
93.66, 69.51, 609.39, |
|
78.82, 57.53, 94.16, 588.94, |
|
50.76, 3.42, 379.43, |
|
0.00, 5.29, 407.18], |
|
"ROME": [85.08, 37.45, 66.2, 574.28, |
|
96.57, 52.20, 27.14, 570.47, |
|
95.05, 46.96, 617.25, |
|
83.21, 38.69, 65.4, 578.84, |
|
45.79, 0.00, 606.32, |
|
85.00, 50.31, 465.12], |
|
"MEMIT": [85.32, 37.94, 64.78, 566.66, |
|
83.07, 51.43, 25.46, 559.72, |
|
94.29, 51.56, 616.65, |
|
83.41, 40.09, 63.68, 568.58, |
|
44.75, 0.00, 602.62, |
|
48.75, 67.47, 466.10], |
|
"FT-L": [71.18, 48.71, 63.7, 549.35, |
|
54.65, 45.02, 71.12, 474.18, |
|
83.41, 40.09, 63.68, 568.58, |
|
66.27, 60.14, 604.00, |
|
51.12, 39.07, 62.51, |
|
48.75, 67.47, 466.10] |
|
} |
|
df=pd.DataFrame(df) |
|
return df |
|
|
|
block = gr.Blocks(css=custom_css) |
|
|
|
with block: |
|
gr.Markdown(TITLE) |
|
|
|
gr.Markdown("## BACKGROUND") |
|
gr.Markdown( |
|
BACKGROUND |
|
) |
|
gr.Image('./img/demo.gif') |
|
|
|
gr.Markdown("## DATA PREVIEW") |
|
gr.Markdown(LEADERBORAD_INTRODUCTION) |
|
|
|
with gr.Tabs(elem_classes="tab-buttons") as tabs: |
|
with gr.TabItem("🏅 Data preview ", elem_id="ke-benchmark-tab-table", id=0): |
|
|
|
ke_data_component = gr.components.Dataframe( |
|
value=get_preview_data(), |
|
headers=DATA_COLUMN_NAMES, |
|
type="pandas", |
|
) |
|
with gr.TabItem("data Structure", elem_id="about-struct-tab-table", id=3): |
|
gr.Markdown(DATA_STRUCT, elem_classes="markdown-text") |
|
|
|
with gr.TabItem("📝 data schema", elem_id="about-benchmark-tab-table", id=4): |
|
gr.Markdown(DATA_SCHEMA, elem_classes="markdown-text") |
|
|
|
|
|
|
|
|
|
gr.Markdown("## EXPERIMENT RESULTS") |
|
gr.Markdown("We list the results of current knowledge editing methods on Llama2-7b-chat in Table") |
|
with gr.Tabs(elem_classes="tab-buttons") as tabs: |
|
with gr.TabItem("🏅 result", elem_id="ke-benchmark-tab-table", id=0): |
|
|
|
ke_data_component = gr.components.Dataframe( |
|
value=get_result_data(), |
|
headers=RESULT_COLUMN_NAMES, |
|
type="pandas", |
|
) |
|
|
|
with gr.TabItem("📝 About", elem_id="about-benchmark-tab-table", id=4): |
|
gr.Markdown("Results of existing knowledge edit methods on the constructed benchmark. The symbol indicates that higher numbers correspond to better performance, while ↓ denotes the opposite, with lower numbers indicating better performance. For WikiBio and Convsent, we do not test the portability as they are about specific topics. ", elem_classes="markdown-text") |
|
|
|
with gr.Row(): |
|
with gr.Accordion("Citation", open=False): |
|
citation_button = gr.Textbox( |
|
value=CITATION_BUTTON_TEXT, |
|
label=CITATION_BUTTON_LABEL, |
|
elem_id="citation-button", |
|
).style(show_copy_button=True) |
|
|
|
block.launch(share=True) |
|
|