{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true }, "source": [ "import pickle\n", "\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import seaborn as sns\n", "\n", "import config" ], "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "GROUP_LABELS = ['Golden', 'Synthetic: Backward', 'Synthetic: Forward', 'Synthetic: Backward → Forward',\n", " 'Synthetic: All']" ], "id": "3dedb547dfae89d5", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "def load_data(s):\n", " with open(config.OUTPUT_CHARTS_DIR / f\"{s}_data.pkl\", 'rb') as f:\n", " groups = pickle.load(f)\n", "\n", " data = []\n", " for i, group in enumerate(groups):\n", " for e in group:\n", " data.append({\n", " 'Subset': GROUP_LABELS[i],\n", " 'value': e\n", " })\n", " return pd.DataFrame.from_records(data)" ], "id": "837c039c80b1093e", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "name = 'deletions_norm'\n", "\n", "sns.set_theme(palette=\"colorblind\")\n", "\n", "ax = sns.displot(data=load_data(name), \n", " x='value', \n", " hue='Subset', \n", " kind='kde',\n", " aspect=1.5, \n", " common_norm=False,\n", " clip=(0, 10000))\n", "sns.move_legend(ax, \"center right\", bbox_to_anchor=(.70, .75))\n", "for line in ax.legend.get_lines():\n", " line.set_linewidth(5.0)\n", "\n", "plt.title(\"Number of \\\"delete\\\" operations: distribution\")\n", "plt.xlabel(\"Number of \\\"delete\\\" operations\")\n", "\n", "plt.savefig(config.OUTPUT_CHARTS_DIR / f\"{name}_dist.png\",bbox_inches='tight')\n", "plt.savefig(config.OUTPUT_CHARTS_DIR / f\"{name}_dist.svg\",bbox_inches='tight')\n", "plt.show()" ], "id": "8c756559f9bd4ec", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "name = 'insertions_norm'\n", "\n", "sns.set_theme(palette=\"colorblind\")\n", "\n", "ax = sns.displot(data=load_data(name), \n", " x='value', \n", " hue='Subset', \n", " kind='kde',\n", " aspect=1.5, \n", " common_norm=False,\n", " clip=(0, 10000))\n", "sns.move_legend(ax, \"center right\", bbox_to_anchor=(.70, .75))\n", "for line in ax.legend.get_lines():\n", " line.set_linewidth(5.0)\n", "\n", "plt.title(\"Number of \\\"insert\\\" operations: distribution\")\n", "plt.xlabel(\"Number of \\\"insert\\\" operations\")\n", "\n", "plt.savefig(config.OUTPUT_CHARTS_DIR / f\"{name}_dist.png\",bbox_inches='tight')\n", "plt.savefig(config.OUTPUT_CHARTS_DIR / f\"{name}_dist.svg\",bbox_inches='tight')\n", "plt.show()" ], "id": "af1668213091e2c1", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "import numpy as np\n", "\n", "def get_bins_for_report(intervals):\n", " bins = []\n", " for cur_interval in intervals:\n", " bins += list(map(int, np.linspace(start=cur_interval[0], stop=cur_interval[1], num=cur_interval[2])))\n", " print(\",\".join(list(map(str, bins))))\n", " \n", "get_bins_for_report([\n", " (0, 400, 30),\n", " (401, 1200, 20)\n", "])" ], "id": "c2d245ada4ae54a", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "markdown", "source": "## FUS logs", "id": "e293db4b140f920e" }, { "metadata": {}, "cell_type": "code", "source": [ "def to_hist(data):\n", " data = data.copy()\n", " num_samples = sum(data['y'])\n", " data['y'] = [e / num_samples / scale for e, scale in zip(data['y'], data['bin_size'])]\n", " \n", " return data" ], "id": "74f1d808e83aa528", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "FUS_REPORT_TYPE = \"all_ide\"\n", "\n", "RAW_FUS_REPORT = f\"data/fus_raw_report_{FUS_REPORT_TYPE}.csv\"\n", "\n", "FUS_METRICS = [\"CommitMessageEditDistance\", \"CommitMessageLengthDiff\"]\n", "\n", "FUS_DATA = {}\n", "\n", "for metric in FUS_METRICS:\n", " FUS_DATA[metric] = {\"x\": [], \"y\": [], \"bin_size\": []}\n", " \n", "with open(RAW_FUS_REPORT, \"r\") as f:\n", " current_metric = None\n", " for line in f.readlines():\n", " if line.isspace():\n", " current_metric = None\n", " continue\n", " \n", " skip_line = False\n", " \n", " for metric in FUS_METRICS:\n", " if metric in line:\n", " current_metric = metric\n", " skip_line = True\n", " break\n", " \n", " \n", " if not skip_line and current_metric is not None:\n", " tokens = line.strip().split(\",\")\n", " interval = tokens[0].replace(\"> \", \"\").split(\" - \")\n", " x_left = float(interval[0])\n", " x_right = float(interval[-1])\n", " x = (x_left + x_right) / 2\n", " y = float(tokens[-2])\n", " bin_size = x_right - x_left + 1\n", " FUS_DATA[current_metric][\"x\"].append(x)\n", " FUS_DATA[current_metric][\"y\"].append(y)\n", " FUS_DATA[current_metric][\"bin_size\"].append(bin_size)\n", " \n", " \n", "SKIP_FIRST = 2\n", "FUS_DATA_NO_HEAD = {}\n", "\n", "for metric in FUS_METRICS:\n", " FUS_DATA_NO_HEAD[metric] = {\n", " 'x': FUS_DATA[metric]['x'][SKIP_FIRST:], \n", " 'y': FUS_DATA[metric]['y'][SKIP_FIRST:],\n", " 'bin_size': FUS_DATA[metric]['bin_size'][SKIP_FIRST:]}" ], "id": "ed09c1d2fec35d58", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "name = 'editdist'\n", "\n", "sns.set_theme(palette=\"colorblind\")\n", "\n", "ax = sns.displot(data=load_data(name), \n", " x='value', \n", " hue='Subset', \n", " kind='kde',\n", " aspect=1.5, \n", " common_norm=False,\n", " clip=(0, 10000))\n", "\n", "\n", "sns.lineplot(data=to_hist(FUS_DATA_NO_HEAD[\"CommitMessageEditDistance\"]), x='x', y='y', style=True, dashes=[(2,2)], legend=False)\n", "\n", "sns.move_legend(ax, \"center right\", bbox_to_anchor=(.70, .75))\n", "for line in ax.legend.get_lines():\n", " line.set_linewidth(5.0)\n", "\n", "plt.title(f\"Edit distance ({FUS_REPORT_TYPE}): distribution\")\n", "plt.xlabel(\"Edit disatnce\")\n", "\n", "plt.savefig(config.OUTPUT_CHARTS_DIR / f\"{name}_fus_{FUS_REPORT_TYPE}_skip_{SKIP_FIRST}_dist.png\",bbox_inches='tight')\n", "plt.savefig(config.OUTPUT_CHARTS_DIR / f\"{name}_fus_{FUS_REPORT_TYPE}_skip_{SKIP_FIRST}_dist.svg\",bbox_inches='tight')\n", "plt.show()" ], "id": "be2eb3c1499e196f", "outputs": [], "execution_count": null }, { "metadata": {}, "cell_type": "code", "source": [ "name = 'lendiff'\n", "\n", "sns.set_theme(palette=\"colorblind\")\n", "\n", "ax = sns.displot(data=load_data(name), \n", " x='value', \n", " hue='Subset', \n", " kind='kde',\n", " aspect=1.5, \n", " common_norm=False,\n", " clip=(0, 10000))\n", "\n", "sns.lineplot(data=to_hist(FUS_DATA_NO_HEAD[\"CommitMessageLengthDiff\"]), x='x', y='y', style=True, dashes=[(2,2)], legend=False)\n", "\n", "sns.move_legend(ax, \"center right\", bbox_to_anchor=(.70, .75))\n", "for line in ax.legend.get_lines():\n", " line.set_linewidth(5.0)\n", "\n", "\n", "plt.title(f\"Length difference ({FUS_REPORT_TYPE}): distribution\")\n", "plt.xlabel(\"Length difference\")\n", "\n", "plt.savefig(config.OUTPUT_CHARTS_DIR / f\"{name}_fus_{FUS_REPORT_TYPE}_skip_{SKIP_FIRST}_dist.png\",bbox_inches='tight')\n", "plt.savefig(config.OUTPUT_CHARTS_DIR / f\"{name}_fus_{FUS_REPORT_TYPE}_skip_{SKIP_FIRST}_dist.svg\",bbox_inches='tight')\n", "plt.show()" ], "id": "31221bbd8ce257db", "outputs": [], "execution_count": null } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }