File size: 1,425 Bytes
13e3243
 
 
2d03034
13e3243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6b5a66
13e3243
 
 
 
 
a6b5a66
13e3243
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import config
from api_wrappers import hf_data_loader

N_EXAMPLES = 15


def get_example_prompt_end_to_start(start_msg, end_msg):
    return f"""START OF THE EXAMPLE

For the following edited commit message:
START OF THE EDITED COMMIT MESSAGE 
{end_msg}
END OF THE EDITED COMMIT MESSAGE

You would output the following initial commit message:
START OF THE INITIAL COMMIT MESSAGE
{start_msg}
END OF THE INITIAL COMMIT MESSAGE

END OF THE EXAMPLE"""


def get_example_prompt_start_to_end(start_msg, end_msg):
    return f"""START OF THE EXAMPLE

For the following LLM-generated commit message:
START OF THE GENERATED COMMIT MESSAGE 
{start_msg}
END OF THE GENERATED COMMIT MESSAGE

You would output the following improved commit message:
START OF THE IMPROVED COMMIT MESSAGE
{end_msg}
END OF THE IMPROVED COMMIT MESSAGE

END OF THE EXAMPLE"""


manual_df = hf_data_loader.load_raw_rewriting_as_pandas()[["commit_msg_start", "commit_msg_end"]]
manual_df = manual_df.sample(n=N_EXAMPLES, random_state=config.RANDOM_STATE)


def generate_examples(end_to_start):
    prompt_fn = get_example_prompt_end_to_start if end_to_start else get_example_prompt_start_to_end
    examples = [prompt_fn(row["commit_msg_start"], row["commit_msg_end"]) for _, row in manual_df.iterrows()]

    return "\n".join(examples)


EXAMPLES_END_TO_START = generate_examples(end_to_start=True)
EXAMPLES_START_TO_END = generate_examples(end_to_start=False)