commit-message-editing-visualization / generate_synthetic_dataset.py
Petr Tsvetkov
Generate a dataset for the labeling app
6676c5a
raw
history blame
444 Bytes
import config
from api_wrappers import hf_data_loader
from generation_steps import synthetic_end_to_start, synthetic_start_to_end, metrics_analysis
def run():
df = hf_data_loader.load_processed_rewriting_as_pandas()
df = synthetic_end_to_start.transform(df)
df = synthetic_start_to_end.transform(df)
df = metrics_analysis.transform(df)
df.to_csv(config.SYNTHETIC_DATASET_ARTIFACT)
if __name__ == '__main__':
run()