Petr Tsvetkov commited on
Commit
f5faae7
β€’
1 Parent(s): 6676c5a

Add edit distance and edit time metrics; add GPT-based metric

Browse files
api_wrappers/grazie_wrapper.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import time
2
 
3
  from grazie.api.client.chat.prompt import ChatPrompt
@@ -14,8 +15,19 @@ client = GrazieApiGatewayClient(
14
  grazie_jwt_token=config.GRAZIE_API_JWT_TOKEN
15
  )
16
 
 
 
 
17
 
18
- def generate_for_prompt(prompt):
 
 
 
 
 
 
 
 
19
  output = None
20
 
21
  while output is None:
@@ -24,7 +36,7 @@ def generate_for_prompt(prompt):
24
  chat=ChatPrompt()
25
  .add_system("You are a helpful assistant.")
26
  .add_user(prompt),
27
- profile=LLMProfile("gpt-4-1106-preview")
28
  ).content
29
  except:
30
  time.sleep(config.GRAZIE_TIMEOUT_SEC)
@@ -32,3 +44,23 @@ def generate_for_prompt(prompt):
32
  assert output is not None
33
 
34
  return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
  import time
3
 
4
  from grazie.api.client.chat.prompt import ChatPrompt
 
15
  grazie_jwt_token=config.GRAZIE_API_JWT_TOKEN
16
  )
17
 
18
+ LLM_CACHE_FILE = config.CACHE_DIR / f"{config.LLM_MODEL}.cache.pkl"
19
+ LLM_CACHE = {}
20
+ LLM_CACHE_USED = {}
21
 
22
+ if not LLM_CACHE_FILE.exists():
23
+ with open(LLM_CACHE_FILE, "wb") as file:
24
+ pickle.dump(obj=LLM_CACHE, file=file)
25
+
26
+ with open(LLM_CACHE_FILE, "rb") as file:
27
+ LLM_CACHE = pickle.load(file=file)
28
+
29
+
30
+ def llm_request(prompt):
31
  output = None
32
 
33
  while output is None:
 
36
  chat=ChatPrompt()
37
  .add_system("You are a helpful assistant.")
38
  .add_user(prompt),
39
+ profile=LLMProfile(config.LLM_MODEL)
40
  ).content
41
  except:
42
  time.sleep(config.GRAZIE_TIMEOUT_SEC)
 
44
  assert output is not None
45
 
46
  return output
47
+
48
+
49
+ def generate_for_prompt(prompt):
50
+ if prompt not in LLM_CACHE:
51
+ LLM_CACHE[prompt] = []
52
+
53
+ if prompt not in LLM_CACHE_USED:
54
+ LLM_CACHE_USED[prompt] = 0
55
+
56
+ while LLM_CACHE_USED[prompt] >= len(LLM_CACHE[prompt]):
57
+ new_response = llm_request(prompt)
58
+ LLM_CACHE[prompt].append(new_response)
59
+
60
+ with open(LLM_CACHE_FILE, "wb") as file:
61
+ pickle.dump(obj=LLM_CACHE, file=file)
62
+
63
+ result = LLM_CACHE[prompt][LLM_CACHE_USED[prompt]]
64
+ LLM_CACHE_USED[prompt] += 1
65
+
66
+ return result
api_wrappers/hf_data_loader.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  from datasets import load_dataset
2
 
3
  import config
@@ -18,9 +21,39 @@ def load_full_commit_as_pandas():
18
  columns={'message': 'reference'})
19
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def load_processed_rewriting_as_pandas():
22
  manual_rewriting = load_raw_rewriting_as_pandas()[
23
- ["hash", "repo", "commit_msg_start", "commit_msg_end", "session"]]
 
 
 
 
 
 
 
24
  manual_rewriting.set_index(["hash", "repo"], inplace=True)
25
 
26
  mods_dataset = load_full_commit_as_pandas()[["hash", "repo", "mods"]]
 
1
+ import json
2
+ from datetime import datetime, timedelta
3
+
4
  from datasets import load_dataset
5
 
6
  import config
 
21
  columns={'message': 'reference'})
22
 
23
 
24
+ def edit_time_from_history(history_str):
25
+ history = json.loads(history_str)
26
+
27
+ if len(history) == 0:
28
+ return 0
29
+
30
+ timestamps = list(map(lambda e: datetime.fromisoformat(e['ts']), history))
31
+ delta = (max(timestamps) - min(timestamps))
32
+
33
+ return delta // timedelta(milliseconds=1)
34
+
35
+
36
+ def edit_time_from_timestamps(row):
37
+ loaded_ts = datetime.fromisoformat(row['loaded_ts'])
38
+ submitted_ts = datetime.fromisoformat(row['submitted_ts'])
39
+
40
+ delta = submitted_ts - loaded_ts
41
+
42
+ result = delta // timedelta(milliseconds=1)
43
+
44
+ return result if result >= 0 else None
45
+
46
+
47
  def load_processed_rewriting_as_pandas():
48
  manual_rewriting = load_raw_rewriting_as_pandas()[
49
+ ["hash", "repo", "commit_msg_start", "commit_msg_end", "session", "commit_msg_history", "loaded_ts",
50
+ "submitted_ts"]]
51
+
52
+ manual_rewriting['edit_time_hist'] = manual_rewriting['commit_msg_history'].apply(edit_time_from_history)
53
+ manual_rewriting['edit_time'] = manual_rewriting.apply(edit_time_from_timestamps, axis=1)
54
+
55
+ manual_rewriting.drop(columns=['commit_msg_history', "loaded_ts", "submitted_ts"])
56
+
57
  manual_rewriting.set_index(["hash", "repo"], inplace=True)
58
 
59
  mods_dataset = load_full_commit_as_pandas()[["hash", "repo", "mods"]]
config.py CHANGED
@@ -22,6 +22,8 @@ HF_PREDICTIONS_DATASET_SPLIT = "test"
22
  HF_SYNTHETIC_DATASET_NAME = "petrtsv-jb/synthetic-commit-msg-rewriting"
23
  HF_SYNTHETIC_DATASET_SPLIT = 'train'
24
 
 
 
25
  CACHE_DIR = Path("cache")
26
  CACHE_DIR.mkdir(exist_ok=True)
27
 
 
22
  HF_SYNTHETIC_DATASET_NAME = "petrtsv-jb/synthetic-commit-msg-rewriting"
23
  HF_SYNTHETIC_DATASET_SPLIT = 'train'
24
 
25
+ LLM_MODEL = "gpt-4-1106-preview"
26
+
27
  CACHE_DIR = Path("cache")
28
  CACHE_DIR.mkdir(exist_ok=True)
29
 
custom_metrics/gpt_eval.py CHANGED
@@ -1,46 +1,53 @@
1
- import time
2
-
3
  from api_wrappers import grazie_wrapper
4
 
5
 
6
- def build_prompt(prediction, reference):
7
- return f"""Your task is to rate the quality of the generated commit message using the scale from 1 to 5.
8
-
9
- A good commit message has to be concise.
10
- Assign lower scores for the commit messages that are too verbose for a commit message.
11
 
12
- The generated commit message you have to evaluate:
13
- START OF THE GENERATED COMMIT MESSAGE
14
  {prediction}
15
- END OF THE GENERATED COMMIT MESSAGE
16
 
17
- Here is an example of an ideal reference commit message for the same commit:
18
- START OF THE REFERENCE COMMIT MESSAGE
 
19
  {reference}
20
  END OF THE REFERENCE COMMIT MESSAGE
21
 
22
- All the information in the reference commit message is true.
23
-
24
- Print only one integer number after the token "OUTPUT" - the rating of the generated commit message.
25
- Do not print anything that is not an integer.
26
-
27
- OUTPUT
28
  """
29
 
30
 
31
  N_RETRIES = 3
32
 
33
 
34
- def compute(prediction, reference):
35
- prompt = build_prompt(prediction, reference)
36
  outputs = []
 
37
 
38
  for i in range(N_RETRIES):
39
  try:
40
- output = grazie_wrapper.generate_for_prompt(prompt).strip()[-1]
41
  outputs.append(output)
42
- return int(output)
 
 
43
  except ValueError:
44
  continue
45
 
46
- raise RuntimeError(f"GPT4 cannot generate a number. Its outputs were: {str(outputs)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from api_wrappers import grazie_wrapper
2
 
3
 
4
+ def build_prompt_ref(prediction, reference):
5
+ return f"""Evaluate the following commit message based on clarity, specificity, context, and conciseness without
6
+ providing any additional feedback or commentary:
 
 
7
 
8
+ START OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
 
9
  {prediction}
10
+ END OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
11
 
12
+ For reference, consider this as an example of a good commit message for the same commit that is both concise and
13
+ specific:
14
+ START OF THE REFERENCE COMMIT MESSAGE
15
  {reference}
16
  END OF THE REFERENCE COMMIT MESSAGE
17
 
18
+ YOUR TASK: Provide a single number as a response, representing the rating on a scale from 1 to 10, where 1 is the
19
+ lowest quality and 10 is the highest quality. Do not include any other text or explanation in your response.
 
 
 
 
20
  """
21
 
22
 
23
  N_RETRIES = 3
24
 
25
 
26
+ def get_number_for_prompt(prompt):
 
27
  outputs = []
28
+ result = None
29
 
30
  for i in range(N_RETRIES):
31
  try:
32
+ output = grazie_wrapper.generate_for_prompt(prompt).strip().split()[-1]
33
  outputs.append(output)
34
+
35
+ result = int(output)
36
+ break
37
  except ValueError:
38
  continue
39
 
40
+ if result is None:
41
+ raise RuntimeError(f"LLM cannot generate a number. Its outputs were: {str(outputs)}")
42
+
43
+ return result
44
+
45
+
46
+ def compute_ref(prediction, reference, n_requests):
47
+ prompt = build_prompt_ref(prediction, reference)
48
+ results = [
49
+ get_number_for_prompt(prompt)
50
+ for _ in range(n_requests)
51
+ ]
52
+
53
+ return sum(results) / len(results)
generation_steps/metrics_analysis.py CHANGED
@@ -1,6 +1,7 @@
1
  import functools
2
  import operator
3
 
 
4
  import evaluate
5
  import pandas as pd
6
  from tqdm import tqdm
@@ -12,59 +13,78 @@ from custom_metrics import gpt_eval
12
  BLEU = evaluate.load('bleu', cache_dir=config.CACHE_DIR)
13
 
14
 
15
- def bleu_fn(pred, ref):
16
  return BLEU.compute(predictions=[pred], references=[ref])["bleu"]
17
 
18
 
19
  METEOR = evaluate.load('meteor', cache_dir=config.CACHE_DIR)
20
 
21
 
22
- def meteor_fn(pred, ref):
23
  return METEOR.compute(predictions=[pred], references=[ref])["meteor"]
24
 
25
 
26
  ROUGE = evaluate.load('rouge', cache_dir=config.CACHE_DIR)
27
 
28
 
29
- def rouge1_fn(pred, ref):
30
  return ROUGE.compute(predictions=[pred], references=[ref])["rouge1"]
31
 
32
 
33
- def rouge2_fn(pred, ref):
34
  return ROUGE.compute(predictions=[pred], references=[ref])["rouge2"]
35
 
36
 
37
- def rougeL_fn(pred, ref):
38
  return ROUGE.compute(predictions=[pred], references=[ref])["rougeL"]
39
 
40
 
41
  BERTSCORE = evaluate.load('bertscore', cache_dir=config.CACHE_DIR)
42
 
43
 
44
- def bertscore_fn(pred, ref):
45
  return BERTSCORE.compute(predictions=[pred], references=[ref], model_type="distilbert-base-uncased")["f1"][0]
46
 
47
 
48
- def gptscore_fn(pred, ref):
49
- return gpt_eval.compute(prediction=pred, reference=ref)
50
-
51
-
52
  CHRF = evaluate.load("chrf")
53
 
54
 
55
- def chrf_fn(pred, ref):
56
  return CHRF.compute(predictions=[pred], references=[[ref]])["score"]
57
 
58
 
59
  TER = evaluate.load("ter")
60
 
61
 
62
- def ter_fn(pred, ref):
63
  return TER.compute(predictions=[pred], references=[[ref]])["score"]
64
 
65
 
66
- METRICS = {
67
- # "gptscore": gptscore_fn,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  "bleu": bleu_fn,
69
  "meteor": meteor_fn,
70
  "rouge1": rouge1_fn,
@@ -72,7 +92,12 @@ METRICS = {
72
  "rougeL": rougeL_fn,
73
  "bertscore": bertscore_fn,
74
  "chrF": chrf_fn,
75
- "ter": ter_fn
 
 
 
 
 
76
  }
77
 
78
 
@@ -86,11 +111,11 @@ def compute_metrics(df):
86
  tqdm.pandas()
87
 
88
  def apply_metric_fn_to_row(row, fn, col_pred, col_ref):
89
- return fn(row[col_pred], row[col_ref])
90
 
91
- for metric in METRICS:
92
- print(f"Computing {metric}")
93
- metric_fn = METRICS[metric]
94
  df[f"{metric}_related"] = df.progress_apply(
95
  lambda row: apply_metric_fn_to_row(row=row,
96
  fn=metric_fn,
@@ -98,6 +123,10 @@ def compute_metrics(df):
98
  col_ref="commit_msg_end"),
99
  axis=1
100
  )
 
 
 
 
101
  df[f"{metric}_independent"] = df.progress_apply(
102
  lambda row: apply_metric_fn_to_row(row=row,
103
  fn=metric_fn,
@@ -106,25 +135,30 @@ def compute_metrics(df):
106
  axis=1
107
  )
108
 
109
- df[f"{metric}_pearson"] = df[f"{metric}_related"].corr(df[f"{metric}_independent"], method="pearson")
110
- df[f"{metric}_spearman"] = df[f"{metric}_related"].corr(df[f"{metric}_independent"], method="spearman")
 
 
 
 
 
111
 
112
  return df
113
 
114
 
115
  def correlations_for_group(group):
116
  correlations = []
117
- for metric in METRICS:
118
- correlations.append({
119
- f"{metric}_pearson": group[f"{metric}_related"].corr(group[f"{metric}_independent"], method="pearson"),
120
- f"{metric}_spearman": group[f"{metric}_related"].corr(group[f"{metric}_independent"], method="spearman")
121
- })
122
- for other_metric in METRICS:
123
  correlations.append({
124
- f"ind_{metric}_rel_{other_metric}_pearson": group[f"{other_metric}_related"].corr(
125
- group[f"{metric}_independent"], method="pearson"),
126
- f"ind_{metric}_rel_{other_metric}_spearman": group[f"{other_metric}_related"].corr(
127
- group[f"{metric}_independent"], method="spearman")
128
  })
129
  return pd.Series(functools.reduce(operator.ior, correlations, {}))
130
 
 
1
  import functools
2
  import operator
3
 
4
+ import Levenshtein
5
  import evaluate
6
  import pandas as pd
7
  from tqdm import tqdm
 
13
  BLEU = evaluate.load('bleu', cache_dir=config.CACHE_DIR)
14
 
15
 
16
+ def bleu_fn(pred, ref, **kwargs):
17
  return BLEU.compute(predictions=[pred], references=[ref])["bleu"]
18
 
19
 
20
  METEOR = evaluate.load('meteor', cache_dir=config.CACHE_DIR)
21
 
22
 
23
+ def meteor_fn(pred, ref, **kwargs):
24
  return METEOR.compute(predictions=[pred], references=[ref])["meteor"]
25
 
26
 
27
  ROUGE = evaluate.load('rouge', cache_dir=config.CACHE_DIR)
28
 
29
 
30
+ def rouge1_fn(pred, ref, **kwargs):
31
  return ROUGE.compute(predictions=[pred], references=[ref])["rouge1"]
32
 
33
 
34
+ def rouge2_fn(pred, ref, **kwargs):
35
  return ROUGE.compute(predictions=[pred], references=[ref])["rouge2"]
36
 
37
 
38
+ def rougeL_fn(pred, ref, **kwargs):
39
  return ROUGE.compute(predictions=[pred], references=[ref])["rougeL"]
40
 
41
 
42
  BERTSCORE = evaluate.load('bertscore', cache_dir=config.CACHE_DIR)
43
 
44
 
45
+ def bertscore_fn(pred, ref, **kwargs):
46
  return BERTSCORE.compute(predictions=[pred], references=[ref], model_type="distilbert-base-uncased")["f1"][0]
47
 
48
 
 
 
 
 
49
  CHRF = evaluate.load("chrf")
50
 
51
 
52
+ def chrf_fn(pred, ref, **kwargs):
53
  return CHRF.compute(predictions=[pred], references=[[ref]])["score"]
54
 
55
 
56
  TER = evaluate.load("ter")
57
 
58
 
59
+ def ter_fn(pred, ref, **kwargs):
60
  return TER.compute(predictions=[pred], references=[[ref]])["score"]
61
 
62
 
63
+ def edit_distance_fn(pred, ref, **kwargs):
64
+ return Levenshtein.distance(pred, ref)
65
+
66
+
67
+ def edit_time_fn(pred, ref, **kwargs):
68
+ return kwargs["edittime"]
69
+
70
+
71
+ def gptscore_ref_1_fn(pred, ref, **kwargs):
72
+ return gpt_eval.compute_ref(prediction=pred, reference=ref, n_requests=1)
73
+
74
+
75
+ def gptscore_ref_3_fn(pred, ref, **kwargs):
76
+ return gpt_eval.compute_ref(prediction=pred, reference=ref, n_requests=3)
77
+
78
+
79
+ def gptscore_ref_5_fn(pred, ref, **kwargs):
80
+ return gpt_eval.compute_ref(prediction=pred, reference=ref, n_requests=5)
81
+
82
+
83
+ IND_METRICS = {
84
+ "gptscore-ref-1-req": gptscore_ref_1_fn,
85
+ "gptscore-ref-3-req": gptscore_ref_3_fn,
86
+ # "gptscore-ref-5-req": gptscore_ref_5_fn,
87
+ "editdist": edit_distance_fn,
88
  "bleu": bleu_fn,
89
  "meteor": meteor_fn,
90
  "rouge1": rouge1_fn,
 
92
  "rougeL": rougeL_fn,
93
  "bertscore": bertscore_fn,
94
  "chrF": chrf_fn,
95
+ "ter": ter_fn,
96
+ }
97
+
98
+ REL_METRICS = {
99
+ "editdist": edit_distance_fn,
100
+ "edittime": edit_time_fn,
101
  }
102
 
103
 
 
111
  tqdm.pandas()
112
 
113
  def apply_metric_fn_to_row(row, fn, col_pred, col_ref):
114
+ return fn(row[col_pred], row[col_ref], edittime=row['edit_time'])
115
 
116
+ for metric in REL_METRICS:
117
+ print(f"Computing {metric} for the related pairs")
118
+ metric_fn = REL_METRICS[metric]
119
  df[f"{metric}_related"] = df.progress_apply(
120
  lambda row: apply_metric_fn_to_row(row=row,
121
  fn=metric_fn,
 
123
  col_ref="commit_msg_end"),
124
  axis=1
125
  )
126
+
127
+ for metric in IND_METRICS:
128
+ print(f"Computing {metric} for the independent pairs")
129
+ metric_fn = IND_METRICS[metric]
130
  df[f"{metric}_independent"] = df.progress_apply(
131
  lambda row: apply_metric_fn_to_row(row=row,
132
  fn=metric_fn,
 
135
  axis=1
136
  )
137
 
138
+ for rel_metric in REL_METRICS:
139
+ for ind_metric in IND_METRICS:
140
+ df[f"rel_{rel_metric}_ind_{ind_metric}_pearson"] = (
141
+ df[f"{rel_metric}_related"].corr(df[f"{ind_metric}_independent"], method="pearson"))
142
+
143
+ df[f"rel_{rel_metric}_ind_{ind_metric}_spearman"] = (
144
+ df[f"{rel_metric}_related"].corr(df[f"{ind_metric}_independent"], method="spearman"))
145
 
146
  return df
147
 
148
 
149
  def correlations_for_group(group):
150
  correlations = []
151
+ for rel_metric in REL_METRICS:
152
+ # correlations.append({
153
+ # f"{metric}_pearson": group[f"{metric}_related"].corr(group[f"{metric}_independent"], method="pearson"),
154
+ # f"{metric}_spearman": group[f"{metric}_related"].corr(group[f"{metric}_independent"], method="spearman")
155
+ # })
156
+ for ind_metric in IND_METRICS:
157
  correlations.append({
158
+ f"rel_{rel_metric}_ind_{ind_metric}_pearson": group[f"{rel_metric}_related"].corr(
159
+ group[f"{ind_metric}_independent"], method="pearson"),
160
+ f"rel_{rel_metric}_ind_{ind_metric}_spearman": group[f"{rel_metric}_related"].corr(
161
+ group[f"{ind_metric}_independent"], method="spearman"),
162
  })
163
  return pd.Series(functools.reduce(operator.ior, correlations, {}))
164
 
generation_steps/synthetic_end_to_start.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import pandas as pd
2
  from tqdm import tqdm
3
 
@@ -7,9 +9,9 @@ import statistics
7
  from api_wrappers import grazie_wrapper, hf_data_loader
8
  from generation_steps import examples
9
 
10
- GENERATION_MULTIPLIER = 2
11
  REL_INSERTIONS_THRESHOLD = 0.5
12
- GENERATION_ATTEMPTS = 5
13
 
14
 
15
  def build_prompt(reference, diff):
@@ -61,6 +63,8 @@ def generate_start_msg(end_msg, diff):
61
 
62
  COLS_TO_KEEP = ["hash", "repo", "commit_msg_end", "mods", "session"]
63
 
 
 
64
 
65
  def transform(df):
66
  print(f"End -> start synthesis:")
@@ -75,7 +79,7 @@ def transform(df):
75
  "commit_msg_start": []
76
  }
77
 
78
- for col in COLS_TO_KEEP:
79
  generated_data[col] = []
80
 
81
  for _, row in tqdm(df.iterrows(), total=len(df)):
@@ -87,6 +91,9 @@ def transform(df):
87
  for col in COLS_TO_KEEP:
88
  generated_data[col].append(row[col])
89
 
 
 
 
90
  generated_df = pd.DataFrame.from_dict(generated_data)
91
  generated_df['end_to_start'] = True
92
 
 
1
+ from itertools import chain
2
+
3
  import pandas as pd
4
  from tqdm import tqdm
5
 
 
9
  from api_wrappers import grazie_wrapper, hf_data_loader
10
  from generation_steps import examples
11
 
12
+ GENERATION_MULTIPLIER = 3
13
  REL_INSERTIONS_THRESHOLD = 0.5
14
+ GENERATION_ATTEMPTS = 3
15
 
16
 
17
  def build_prompt(reference, diff):
 
63
 
64
  COLS_TO_KEEP = ["hash", "repo", "commit_msg_end", "mods", "session"]
65
 
66
+ COLS_TO_DEFAULT = {"edit_time": None}
67
+
68
 
69
  def transform(df):
70
  print(f"End -> start synthesis:")
 
79
  "commit_msg_start": []
80
  }
81
 
82
+ for col in chain(COLS_TO_KEEP, COLS_TO_DEFAULT):
83
  generated_data[col] = []
84
 
85
  for _, row in tqdm(df.iterrows(), total=len(df)):
 
91
  for col in COLS_TO_KEEP:
92
  generated_data[col].append(row[col])
93
 
94
+ for col in COLS_TO_DEFAULT:
95
+ generated_data[col].append(COLS_TO_DEFAULT[col])
96
+
97
  generated_df = pd.DataFrame.from_dict(generated_data)
98
  generated_df['end_to_start'] = True
99
 
generation_steps/synthetic_start_to_end.py CHANGED
@@ -7,9 +7,9 @@ import statistics
7
  from api_wrappers import grazie_wrapper
8
  from generation_steps import examples
9
 
10
- GENERATION_MULTIPLIER = 2
11
  REL_DELETIONS_THRESHOLD = 0.75
12
- GENERATION_ATTEMPTS = 5
13
 
14
 
15
  def build_prompt(prediction, diff):
 
7
  from api_wrappers import grazie_wrapper
8
  from generation_steps import examples
9
 
10
+ GENERATION_MULTIPLIER = 3
11
  REL_DELETIONS_THRESHOLD = 0.75
12
+ GENERATION_ATTEMPTS = 3
13
 
14
 
15
  def build_prompt(prediction, diff):
generate_synthetic_dataset.py β†’ run_pipeline.py RENAMED
File without changes