Spaces:
Runtime error
Runtime error
Petr Tsvetkov
commited on
Commit
Β·
30e165f
1
Parent(s):
5f3a4af
Synthetic dataset generation for the first 5 samples; visualization fixed
Browse files- change_visualizer.py +1 -2
- generate_synthetic_dataset.py +10 -3
- hf_data_loader.py +4 -4
change_visualizer.py
CHANGED
@@ -20,8 +20,7 @@ def update_manual_view(diff_idx):
|
|
20 |
def update_synthetic_view(diff_idx):
|
21 |
diff_idx -= 1
|
22 |
return (df_synthetic.iloc[diff_idx]['annotated_diff'], df_synthetic.iloc[diff_idx]['initial_msg_pred'],
|
23 |
-
df_synthetic.iloc[diff_idx][
|
24 |
-
'get_annotated_diff'],
|
25 |
f"https://github.com/{df_synthetic.iloc[diff_idx]['repo']}/commit/{df_synthetic.iloc[diff_idx]['hash']}")
|
26 |
|
27 |
|
|
|
20 |
def update_synthetic_view(diff_idx):
|
21 |
diff_idx -= 1
|
22 |
return (df_synthetic.iloc[diff_idx]['annotated_diff'], df_synthetic.iloc[diff_idx]['initial_msg_pred'],
|
23 |
+
df_synthetic.iloc[diff_idx]['reference'],
|
|
|
24 |
f"https://github.com/{df_synthetic.iloc[diff_idx]['repo']}/commit/{df_synthetic.iloc[diff_idx]['hash']}")
|
25 |
|
26 |
|
generate_synthetic_dataset.py
CHANGED
@@ -57,9 +57,16 @@ def generate_synthetic_dataset():
|
|
57 |
df['initial_msg_prompt'] = df.apply(generate_prompt_for_row, axis=1)
|
58 |
initial_messages_pred = []
|
59 |
|
60 |
-
for prompt in tqdm(df['initial_msg_prompt']):
|
61 |
-
output =
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
df['initial_msg_pred'] = initial_messages_pred
|
65 |
|
|
|
57 |
df['initial_msg_prompt'] = df.apply(generate_prompt_for_row, axis=1)
|
58 |
initial_messages_pred = []
|
59 |
|
60 |
+
for i, prompt in enumerate(tqdm(df['initial_msg_prompt'])):
|
61 |
+
output = None
|
62 |
+
|
63 |
+
if i < 5:
|
64 |
+
while output is None:
|
65 |
+
try:
|
66 |
+
output = generate_initial_msg(prompt)
|
67 |
+
except:
|
68 |
+
pass
|
69 |
+
initial_messages_pred.append(output if output is not None else "TBA")
|
70 |
|
71 |
df['initial_msg_pred'] = initial_messages_pred
|
72 |
|
hf_data_loader.py
CHANGED
@@ -19,7 +19,7 @@ def load_full_commit_dataset_as_pandas():
|
|
19 |
|
20 |
|
21 |
def load_synthetic_dataset_as_pandas():
|
22 |
-
load_dataset(config.HF_SYNTHETIC_DATASET_NAME,
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
19 |
|
20 |
|
21 |
def load_synthetic_dataset_as_pandas():
|
22 |
+
return load_dataset(config.HF_SYNTHETIC_DATASET_NAME,
|
23 |
+
split=config.HF_SYNTHETIC_DATASET_SPLIT,
|
24 |
+
token=config.HF_TOKEN,
|
25 |
+
cache_dir=config.CACHE_DIR).to_pandas()
|