commit-labeling / app.py
Petr Tsvetkov
Do not mention AI
1f691bd
raw
history blame
6.55 kB
import os
import random
import uuid
import gradio as gr
from datasets import load_dataset
HF_TOKEN = os.environ.get('HF_TOKEN')
HF_DATASET = os.environ.get('HF_DATASET')
configuration = "commitchronicle-py-long" # select a configuration
dataset = load_dataset("JetBrains-Research/lca-cmg",
configuration,
split="test",
cache_dir="data")
n_samples = len(dataset)
saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, HF_DATASET, private=True)
def get_github_link(repo, hash):
repo_url = f"https://github.com/{repo}/commit/{hash}"
return repo_url
def get_diff2html_demo_iframe(github_link):
diff2html_link = (f"https://diff2html.xyz/demo.html?matching=none"
f"&matchWordsThreshold=0.25"
f"&maxLineLengthHighlight=10000"
f"&diffStyle=word"
f"&colorScheme=light"
f"&renderNothingWhenEmpty=0"
f"&matchingMaxComparisons=2500"
f"&maxLineSizeInBlockForComparison=200"
f"&outputFormat=line-by-line"
f"&drawFileList=1&synchronisedScroll=1"
f"&highlight=1"
f"&fileListToggle=1"
f"&fileListStartVisible=0"
f"&highlightLanguages=[object%20Map]"
f"&smartSelection=1"
f"&fileContentToggle=1"
f"&stickyFileHeaders=1"
f"&diff={github_link}")
iframe_html = (f"<iframe "
f"src=\"{diff2html_link}\" "
f"title=\"diff2html Demo Diff Viewer\" "
f"style='width:100%; height:720px; overflow:auto'>"
f"</iframe>")
return iframe_html
def update_commit_view(sample_ind):
if sample_ind >= n_samples:
return None
record = dataset[sample_ind]
github_link = get_github_link(record['repo'], record['hash'])
github_link_md = f"[See the commit on GitHub]({github_link})"
diff_view = get_diff2html_demo_iframe(github_link)
commit_msg = record['message']
repo_val = record['repo']
hash_val = record['hash']
return github_link_md, diff_view, commit_msg, repo_val, hash_val
def next_sample(current_sample_ind, shuffled_idx):
if current_sample_ind == n_samples:
return None
current_sample_ind += 1
updated_view = update_commit_view(shuffled_idx[current_sample_ind])
return (current_sample_ind,) + updated_view
with gr.Blocks(theme=gr.themes.Soft()) as demo:
repo_val = gr.Textbox(interactive=False, label='repo', visible=False)
hash_val = gr.Textbox(interactive=False, label='hash', visible=False)
shuffled_idx_val = gr.JSON(visible=False)
with gr.Row():
current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1,
value=0,
interactive=False,
label='sample_ind',
info=f"Samples labeled/skipped (out of {n_samples})",
show_label=False,
container=False,
scale=5)
with gr.Column(scale=1):
skip_btn = gr.Button("Skip the current sample")
with gr.Row():
with gr.Column(scale=2):
github_link = gr.Markdown()
diff_view = gr.HTML()
with gr.Column(scale=1):
commit_msg = gr.Textbox(label="Commit message",
interactive=False,
)
gr.Markdown("## Please, answer the questions below")
verbosity_feedback = gr.Radio(info='How can you describe the length of the commit message above?',
label='verbosity',
show_label=False,
choices=[
('Too short', 0),
('Just right', 1),
('Too verbose', 2)])
correctness_feedback = gr.Radio(info='Is the commit message factually correct?',
label='is_correct',
show_label=False,
choices=[
('Yes', True),
('No', False)])
format_feedback = gr.Slider(info='Rate the commit message\'s format (1 - very bad, 5 - very good)',
label='format_score',
show_label=False,
minimum=1,
step=1,
interactive=True,
maximum=5)
submit_btn = gr.Button("Submit and continue")
session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False,
label='session')
commit_view = [
github_link,
diff_view,
commit_msg,
repo_val,
hash_val
]
feedback_form = [
session_val,
repo_val,
hash_val,
verbosity_feedback,
correctness_feedback,
format_feedback
]
saver.setup([current_sample_sld] + feedback_form, "feedback")
skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val],
outputs=[current_sample_sld] + commit_view)
def submit(current_sample, shuffled_idx, *args):
saver.flag((current_sample,) + args)
return next_sample(current_sample, shuffled_idx)
submit_btn.click(submit, inputs=[current_sample_sld, shuffled_idx_val] + feedback_form,
outputs=[current_sample_sld] + commit_view)
def init_session(current_sample):
session = str(uuid.uuid4())
shuffled_idx = list(range(n_samples))
random.shuffle(shuffled_idx)
return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample])
demo.load(init_session, inputs=[current_sample_sld], outputs=[session_val, shuffled_idx_val] + commit_view)
demo.launch()