{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "ExecuteTime": { "end_time": "2024-05-01T15:23:17.507403Z", "start_time": "2024-05-01T15:23:17.497406Z" } }, "source": [ "import pandas as pd\n", "\n", "import config" ], "outputs": [], "execution_count": 7 }, { "cell_type": "code", "id": "2ac8757a17e62293", "metadata": { "ExecuteTime": { "end_time": "2024-05-01T15:23:19.365525Z", "start_time": "2024-05-01T15:23:19.120308Z" } }, "source": [ "df = pd.read_csv(config.SYNTHETIC_DATASET_ARTIFACT, index_col=0)\n", "\n", "df.head()" ], "outputs": [ { "data": { "text/plain": [ " hash \\\n", "0 9a581830e4fa02eed501b4e1f546a2e2ea358e13 \n", "1 37067a53c4b3b99982ef8e1f431ba0c9302b66e8 \n", "2 82e350064cb8d1622c7cde275567ae594483fe62 \n", "3 cf98f5e3705603ae21bef9b0a577bcd001a8c92e \n", "4 c17a80f47b772d759aeb0878aa767a768a6fdd0c \n", "\n", " repo \\\n", "0 bitcoinunlimited/bitcoinunlimited \n", "1 mesonbuild/meson \n", "2 mycroftai/mycroft-core \n", "3 mesonbuild/meson \n", "4 mesonbuild/meson \n", "\n", " commit_msg_start \\\n", "0 Add extensive test option to parallel RPC test... \n", "1 Refactor argument parsing and command executio... \n", "2 Add helper functions for disk space management... \n", "3 Update path resolution for non-Windows systems... \n", "4 Add support for VS2017 architecture detection\\... \n", "\n", " commit_msg_end \\\n", "0 Add new block attack patterns\\n\\n- Added test ... \n", "1 Introduce unified argument parsing in meson\\n\\... \n", "2 Refactor file_utils.py\\n\\n- Add helper functio... \n", "3 Enable loading crossfiles for all platforms ex... \n", "4 Add support for VS2017 architecture detection.... \n", "\n", " session \\\n", "0 032e60d7-621a-46b6-972f-7590cfaf6458 \n", "1 5d7f1209-4ed9-4620-87ca-975f029c7f6f \n", "2 93b1c57c-e56c-4d75-89a6-ae1158b4fa74 \n", "3 5d7f1209-4ed9-4620-87ca-975f029c7f6f \n", "4 16e57250-21ff-4cdd-ae0d-760cabcc6160 \n", "\n", " commit_msg_history \\\n", "0 [{\"t\": \"-\", \"p\": 4, \"c\": \"e\", \"ts\": \"2024-04-0... \n", "1 [] \n", "2 [{\"t\": \"+\", \"p\": 0, \"c\": \"R\", \"ts\": \"2024-04-0... \n", "3 [] \n", "4 [{\"t\": \"-\", \"p\": 45, \"c\": \"\\n\", \"ts\": \"2024-04... \n", "\n", " loaded_ts submitted_ts edit_time_hist \\\n", "0 2024-04-04T19:48:31.180017 2024-04-04T19:50:32.925989 59468.0 \n", "1 2024-04-15T16:50:17.208813 2024-04-15T15:29:02.014310 0.0 \n", "2 2024-04-04T19:52:38.276314 2024-04-04T19:57:02.449096 133655.0 \n", "3 2024-04-15T17:42:14.482856 2024-04-15T15:29:02.014310 0.0 \n", "4 2024-04-15T15:47:31.022477 2024-04-15T15:53:08.796895 163218.0 \n", "\n", " edit_time ... rel_edittime_ind_rouge2_pearson \\\n", "0 121745.0 ... 0.281944 \n", "1 NaN ... 0.281944 \n", "2 264172.0 ... 0.281944 \n", "3 NaN ... 0.281944 \n", "4 337774.0 ... 0.281944 \n", "\n", " rel_edittime_ind_rouge2_spearman rel_edittime_ind_rougeL_pearson \\\n", "0 0.218822 0.091196 \n", "1 0.218822 0.091196 \n", "2 0.218822 0.091196 \n", "3 0.218822 0.091196 \n", "4 0.218822 0.091196 \n", "\n", " rel_edittime_ind_rougeL_spearman rel_edittime_ind_bertscore_pearson \\\n", "0 0.071344 0.158807 \n", "1 0.071344 0.158807 \n", "2 0.071344 0.158807 \n", "3 0.071344 0.158807 \n", "4 0.071344 0.158807 \n", "\n", " rel_edittime_ind_bertscore_spearman rel_edittime_ind_chrF_pearson \\\n", "0 0.140481 0.184202 \n", "1 0.140481 0.184202 \n", "2 0.140481 0.184202 \n", "3 0.140481 0.184202 \n", "4 0.140481 0.184202 \n", "\n", " rel_edittime_ind_chrF_spearman rel_edittime_ind_ter_pearson \\\n", "0 0.079802 0.062616 \n", "1 0.079802 0.062616 \n", "2 0.079802 0.062616 \n", "3 0.079802 0.062616 \n", "4 0.079802 0.062616 \n", "\n", " rel_edittime_ind_ter_spearman \n", "0 0.305601 \n", "1 0.305601 \n", "2 0.305601 \n", "3 0.305601 \n", "4 0.305601 \n", "\n", "[5 rows x 71 columns]" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
hashrepocommit_msg_startcommit_msg_endsessioncommit_msg_historyloaded_tssubmitted_tsedit_time_histedit_time...rel_edittime_ind_rouge2_pearsonrel_edittime_ind_rouge2_spearmanrel_edittime_ind_rougeL_pearsonrel_edittime_ind_rougeL_spearmanrel_edittime_ind_bertscore_pearsonrel_edittime_ind_bertscore_spearmanrel_edittime_ind_chrF_pearsonrel_edittime_ind_chrF_spearmanrel_edittime_ind_ter_pearsonrel_edittime_ind_ter_spearman
09a581830e4fa02eed501b4e1f546a2e2ea358e13bitcoinunlimited/bitcoinunlimitedAdd extensive test option to parallel RPC test...Add new block attack patterns\\n\\n- Added test ...032e60d7-621a-46b6-972f-7590cfaf6458[{\"t\": \"-\", \"p\": 4, \"c\": \"e\", \"ts\": \"2024-04-0...2024-04-04T19:48:31.1800172024-04-04T19:50:32.92598959468.0121745.0...0.2819440.2188220.0911960.0713440.1588070.1404810.1842020.0798020.0626160.305601
137067a53c4b3b99982ef8e1f431ba0c9302b66e8mesonbuild/mesonRefactor argument parsing and command executio...Introduce unified argument parsing in meson\\n\\...5d7f1209-4ed9-4620-87ca-975f029c7f6f[]2024-04-15T16:50:17.2088132024-04-15T15:29:02.0143100.0NaN...0.2819440.2188220.0911960.0713440.1588070.1404810.1842020.0798020.0626160.305601
282e350064cb8d1622c7cde275567ae594483fe62mycroftai/mycroft-coreAdd helper functions for disk space management...Refactor file_utils.py\\n\\n- Add helper functio...93b1c57c-e56c-4d75-89a6-ae1158b4fa74[{\"t\": \"+\", \"p\": 0, \"c\": \"R\", \"ts\": \"2024-04-0...2024-04-04T19:52:38.2763142024-04-04T19:57:02.449096133655.0264172.0...0.2819440.2188220.0911960.0713440.1588070.1404810.1842020.0798020.0626160.305601
3cf98f5e3705603ae21bef9b0a577bcd001a8c92emesonbuild/mesonUpdate path resolution for non-Windows systems...Enable loading crossfiles for all platforms ex...5d7f1209-4ed9-4620-87ca-975f029c7f6f[]2024-04-15T17:42:14.4828562024-04-15T15:29:02.0143100.0NaN...0.2819440.2188220.0911960.0713440.1588070.1404810.1842020.0798020.0626160.305601
4c17a80f47b772d759aeb0878aa767a768a6fdd0cmesonbuild/mesonAdd support for VS2017 architecture detection\\...Add support for VS2017 architecture detection....16e57250-21ff-4cdd-ae0d-760cabcc6160[{\"t\": \"-\", \"p\": 45, \"c\": \"\\n\", \"ts\": \"2024-04...2024-04-15T15:47:31.0224772024-04-15T15:53:08.796895163218.0337774.0...0.2819440.2188220.0911960.0713440.1588070.1404810.1842020.0798020.0626160.305601
\n", "

5 rows × 71 columns

\n", "
" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 8 }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-01T15:11:08.418257Z", "start_time": "2024-05-01T15:11:08.408943Z" } }, "cell_type": "code", "source": "len(set(df['session'].to_list()))", "id": "4bcbc0f1d3d6d248", "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 6 }, { "cell_type": "code", "execution_count": 15, "id": "d19c12dd10b25c75", "metadata": { "ExecuteTime": { "end_time": "2024-05-01T13:02:40.761645Z", "start_time": "2024-05-01T13:02:40.740647Z" } }, "outputs": [ { "data": { "text/plain": [ "['editdist', 'edittime']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rel_metrics = [col.split(\"_\")[0] for col in df.columns if col.endswith(\"_related\")]\n", "rel_metrics" ] }, { "cell_type": "code", "execution_count": 16, "id": "79d644cd780b28a1", "metadata": { "ExecuteTime": { "end_time": "2024-05-01T13:02:44.072037Z", "start_time": "2024-05-01T13:02:44.055039Z" } }, "outputs": [ { "data": { "text/plain": [ "['gptscore-ref-1-req',\n", " 'gptscore-noref-1-req',\n", " 'editdist',\n", " 'bleu',\n", " 'meteor',\n", " 'rouge1',\n", " 'rouge2',\n", " 'rougeL',\n", " 'bertscore',\n", " 'chrF',\n", " 'ter']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ind_metrics = [col.split(\"_\")[0] for col in df.columns if col.endswith(\"_independent\")]\n", "ind_metrics" ] }, { "cell_type": "code", "execution_count": 19, "id": "fdc5ae636bffbc8b", "metadata": { "ExecuteTime": { "end_time": "2024-05-01T13:03:52.623346Z", "start_time": "2024-05-01T13:03:52.577076Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
hasheditdist_relatededittime_relatedgptscore-ref-1-req_independentgptscore-noref-1-req_independenteditdist_independentbleu_independentmeteor_independentrouge1_independentrouge2_independentrougeL_independentbertscore_independentchrF_independentter_independent
countmeanmeanmeanmeanmeanmeanmeanmeanmeanmeanmeanmeanmean
end_to_startstart_to_end
FalseFalse43355.441860364099.06257.2558148.116279491.0697670.0128050.2249610.2020630.0407180.1364270.78026632.067005312.732989
True129406.627907NaN7.2170548.178295491.0697670.0128050.2249610.2020630.0407180.1364270.78026632.067005312.732989
TrueFalse129433.899225NaN7.3565898.302326534.0155040.0095420.2218930.2051510.0390330.1341140.77716231.753065317.717517
True387444.509044NaN7.3126618.276486534.0155040.0095420.2218930.2051510.0390330.1341140.77716231.753065317.717517
\n", "
" ], "text/plain": [ " hash editdist_related edittime_related \\\n", " count mean mean \n", "end_to_start start_to_end \n", "False False 43 355.441860 364099.0625 \n", " True 129 406.627907 NaN \n", "True False 129 433.899225 NaN \n", " True 387 444.509044 NaN \n", "\n", " gptscore-ref-1-req_independent \\\n", " mean \n", "end_to_start start_to_end \n", "False False 7.255814 \n", " True 7.217054 \n", "True False 7.356589 \n", " True 7.312661 \n", "\n", " gptscore-noref-1-req_independent \\\n", " mean \n", "end_to_start start_to_end \n", "False False 8.116279 \n", " True 8.178295 \n", "True False 8.302326 \n", " True 8.276486 \n", "\n", " editdist_independent bleu_independent \\\n", " mean mean \n", "end_to_start start_to_end \n", "False False 491.069767 0.012805 \n", " True 491.069767 0.012805 \n", "True False 534.015504 0.009542 \n", " True 534.015504 0.009542 \n", "\n", " meteor_independent rouge1_independent \\\n", " mean mean \n", "end_to_start start_to_end \n", "False False 0.224961 0.202063 \n", " True 0.224961 0.202063 \n", "True False 0.221893 0.205151 \n", " True 0.221893 0.205151 \n", "\n", " rouge2_independent rougeL_independent \\\n", " mean mean \n", "end_to_start start_to_end \n", "False False 0.040718 0.136427 \n", " True 0.040718 0.136427 \n", "True False 0.039033 0.134114 \n", " True 0.039033 0.134114 \n", "\n", " bertscore_independent chrF_independent \\\n", " mean mean \n", "end_to_start start_to_end \n", "False False 0.780266 32.067005 \n", " True 0.780266 32.067005 \n", "True False 0.777162 31.753065 \n", " True 0.777162 31.753065 \n", "\n", " ter_independent \n", " mean \n", "end_to_start start_to_end \n", "False False 312.732989 \n", " True 312.732989 \n", "True False 317.717517 \n", " True 317.717517 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "AGGREGATION = {\"hash\": [\"count\"]}\n", "\n", "for metric in rel_metrics:\n", " AGGREGATION[f\"{metric}_related\"] = [\"mean\"]\n", "\n", "for metric in ind_metrics:\n", " AGGREGATION[f\"{metric}_independent\"] = [\"mean\"]\n", "\n", "df.groupby(by=[\"end_to_start\", \"start_to_end\"]).agg(AGGREGATION)" ] }, { "cell_type": "code", "execution_count": 47, "id": "3429b60eab154b79", "metadata": { "ExecuteTime": { "end_time": "2024-05-01T13:42:57.052768Z", "start_time": "2024-05-01T13:42:56.812556Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
allgolden+s2e+e2s+e2s+s2e
spearmanpearsonspearmanpearsonspearmanpearsonspearmanpearsonspearmanpearson
relativeindependent
editdistbertscore-0.184962-0.129057-0.316215-0.254700-0.308494-0.113525-0.181393-0.165924-0.135421-0.091748
bleu0.2601180.1859950.2690280.2596900.5128410.5028270.1098310.0681380.2297120.145062
chrF-0.199200-0.129029-0.343201-0.300656-0.238124-0.064922-0.233123-0.201726-0.156914-0.093376
editdist0.9099340.9106410.7107720.6628080.9504940.9350640.8619300.8781180.9393180.962305
gptscore-noref-1-req0.0320480.0553640.1555100.0485880.0678570.047215-0.029048-0.0131280.0121020.066882
gptscore-ref-1-req0.0245500.035295-0.009830-0.062574-0.015178-0.0360010.0713450.0875840.0130120.033618
meteor0.3360160.3719490.0680340.1732370.2036160.4257750.3725980.3600510.3922620.401802
rouge1-0.077574-0.043738-0.187349-0.163230-0.139874-0.065543-0.082093-0.035603-0.054034-0.030799
rouge20.4142560.3407320.2761390.3320870.5235590.5375600.3239110.2828720.4338590.324538
rougeL0.006513-0.008078-0.041502-0.034867-0.022288-0.0046640.0124090.0163720.021983-0.010644
ter0.6180950.3855150.5756140.5013850.7740860.4625540.5293380.3885920.5916840.354459
edittimebertscore0.1404810.1588070.1404810.158807NaNNaNNaNNaNNaNNaN
bleu0.3023800.3261670.3023800.326167NaNNaNNaNNaNNaNNaN
chrF0.0798020.1842020.0798020.184202NaNNaNNaNNaNNaNNaN
editdist0.2526450.4111310.2526450.411131NaNNaNNaNNaNNaNNaN
gptscore-noref-1-req0.2064650.0262350.2064650.026235NaNNaNNaNNaNNaNNaN
gptscore-ref-1-req0.130419-0.0552180.130419-0.055218NaNNaNNaNNaNNaNNaN
meteor0.2533800.4035640.2533800.403564NaNNaNNaNNaNNaNNaN
rouge10.1559260.1369710.1559260.136971NaNNaNNaNNaNNaNNaN
rouge20.2188220.2819440.2188220.281944NaNNaNNaNNaNNaNNaN
rougeL0.0713440.0911960.0713440.091196NaNNaNNaNNaNNaNNaN
ter0.3056010.0626160.3056010.062616NaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " all golden \\\n", " spearman pearson spearman pearson \n", "relative independent \n", "editdist bertscore -0.184962 -0.129057 -0.316215 -0.254700 \n", " bleu 0.260118 0.185995 0.269028 0.259690 \n", " chrF -0.199200 -0.129029 -0.343201 -0.300656 \n", " editdist 0.909934 0.910641 0.710772 0.662808 \n", " gptscore-noref-1-req 0.032048 0.055364 0.155510 0.048588 \n", " gptscore-ref-1-req 0.024550 0.035295 -0.009830 -0.062574 \n", " meteor 0.336016 0.371949 0.068034 0.173237 \n", " rouge1 -0.077574 -0.043738 -0.187349 -0.163230 \n", " rouge2 0.414256 0.340732 0.276139 0.332087 \n", " rougeL 0.006513 -0.008078 -0.041502 -0.034867 \n", " ter 0.618095 0.385515 0.575614 0.501385 \n", "edittime bertscore 0.140481 0.158807 0.140481 0.158807 \n", " bleu 0.302380 0.326167 0.302380 0.326167 \n", " chrF 0.079802 0.184202 0.079802 0.184202 \n", " editdist 0.252645 0.411131 0.252645 0.411131 \n", " gptscore-noref-1-req 0.206465 0.026235 0.206465 0.026235 \n", " gptscore-ref-1-req 0.130419 -0.055218 0.130419 -0.055218 \n", " meteor 0.253380 0.403564 0.253380 0.403564 \n", " rouge1 0.155926 0.136971 0.155926 0.136971 \n", " rouge2 0.218822 0.281944 0.218822 0.281944 \n", " rougeL 0.071344 0.091196 0.071344 0.091196 \n", " ter 0.305601 0.062616 0.305601 0.062616 \n", "\n", " +s2e +e2s \\\n", " spearman pearson spearman pearson \n", "relative independent \n", "editdist bertscore -0.308494 -0.113525 -0.181393 -0.165924 \n", " bleu 0.512841 0.502827 0.109831 0.068138 \n", " chrF -0.238124 -0.064922 -0.233123 -0.201726 \n", " editdist 0.950494 0.935064 0.861930 0.878118 \n", " gptscore-noref-1-req 0.067857 0.047215 -0.029048 -0.013128 \n", " gptscore-ref-1-req -0.015178 -0.036001 0.071345 0.087584 \n", " meteor 0.203616 0.425775 0.372598 0.360051 \n", " rouge1 -0.139874 -0.065543 -0.082093 -0.035603 \n", " rouge2 0.523559 0.537560 0.323911 0.282872 \n", " rougeL -0.022288 -0.004664 0.012409 0.016372 \n", " ter 0.774086 0.462554 0.529338 0.388592 \n", "edittime bertscore NaN NaN NaN NaN \n", " bleu NaN NaN NaN NaN \n", " chrF NaN NaN NaN NaN \n", " editdist NaN NaN NaN NaN \n", " gptscore-noref-1-req NaN NaN NaN NaN \n", " gptscore-ref-1-req NaN NaN NaN NaN \n", " meteor NaN NaN NaN NaN \n", " rouge1 NaN NaN NaN NaN \n", " rouge2 NaN NaN NaN NaN \n", " rougeL NaN NaN NaN NaN \n", " ter NaN NaN NaN NaN \n", "\n", " +e2s+s2e \n", " spearman pearson \n", "relative independent \n", "editdist bertscore -0.135421 -0.091748 \n", " bleu 0.229712 0.145062 \n", " chrF -0.156914 -0.093376 \n", " editdist 0.939318 0.962305 \n", " gptscore-noref-1-req 0.012102 0.066882 \n", " gptscore-ref-1-req 0.013012 0.033618 \n", " meteor 0.392262 0.401802 \n", " rouge1 -0.054034 -0.030799 \n", " rouge2 0.433859 0.324538 \n", " rougeL 0.021983 -0.010644 \n", " ter 0.591684 0.354459 \n", "edittime bertscore NaN NaN \n", " bleu NaN NaN \n", " chrF NaN NaN \n", " editdist NaN NaN \n", " gptscore-noref-1-req NaN NaN \n", " gptscore-ref-1-req NaN NaN \n", " meteor NaN NaN \n", " rouge1 NaN NaN \n", " rouge2 NaN NaN \n", " rougeL NaN NaN \n", " ter NaN NaN " ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 50, "id": "a3531f28722fa5bc", "metadata": { "ExecuteTime": { "end_time": "2024-05-01T13:49:09.514129Z", "start_time": "2024-05-01T13:49:09.295101Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
allgolden+s2e+e2s+e2s+s2e
spearmanpearsonspearmanpearsonspearmanpearsonspearmanpearsonspearmanpearson
relativeindependent
editdistbertscore-0.184962-0.129057-0.316215-0.254700-0.308494-0.113525-0.181393-0.165924-0.135421-0.091748
bleu0.2601180.1859950.2690280.2596900.5128410.5028270.1098310.0681380.2297120.145062
chrF-0.199200-0.129029-0.343201-0.300656-0.238124-0.064922-0.233123-0.201726-0.156914-0.093376
editdist0.9099340.9106410.7107720.6628080.9504940.9350640.8619300.8781180.9393180.962305
gptscore-noref-1-req0.0320480.0553640.1555100.0485880.0678570.047215-0.029048-0.0131280.0121020.066882
gptscore-ref-1-req0.0245500.035295-0.009830-0.062574-0.015178-0.0360010.0713450.0875840.0130120.033618
meteor0.3360160.3719490.0680340.1732370.2036160.4257750.3725980.3600510.3922620.401802
rouge1-0.077574-0.043738-0.187349-0.163230-0.139874-0.065543-0.082093-0.035603-0.054034-0.030799
rouge20.4142560.3407320.2761390.3320870.5235590.5375600.3239110.2828720.4338590.324538
rougeL0.006513-0.008078-0.041502-0.034867-0.022288-0.0046640.0124090.0163720.021983-0.010644
ter0.6180950.3855150.5756140.5013850.7740860.4625540.5293380.3885920.5916840.354459
edittimebertscore0.1404810.1588070.1404810.158807NaNNaNNaNNaNNaNNaN
bleu0.3023800.3261670.3023800.326167NaNNaNNaNNaNNaNNaN
chrF0.0798020.1842020.0798020.184202NaNNaNNaNNaNNaNNaN
editdist0.2526450.4111310.2526450.411131NaNNaNNaNNaNNaNNaN
gptscore-noref-1-req0.2064650.0262350.2064650.026235NaNNaNNaNNaNNaNNaN
gptscore-ref-1-req0.130419-0.0552180.130419-0.055218NaNNaNNaNNaNNaNNaN
meteor0.2533800.4035640.2533800.403564NaNNaNNaNNaNNaNNaN
rouge10.1559260.1369710.1559260.136971NaNNaNNaNNaNNaNNaN
rouge20.2188220.2819440.2188220.281944NaNNaNNaNNaNNaNNaN
rougeL0.0713440.0911960.0713440.091196NaNNaNNaNNaNNaNNaN
ter0.3056010.0626160.3056010.062616NaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " all golden \\\n", " spearman pearson spearman pearson \n", "relative independent \n", "editdist bertscore -0.184962 -0.129057 -0.316215 -0.254700 \n", " bleu 0.260118 0.185995 0.269028 0.259690 \n", " chrF -0.199200 -0.129029 -0.343201 -0.300656 \n", " editdist 0.909934 0.910641 0.710772 0.662808 \n", " gptscore-noref-1-req 0.032048 0.055364 0.155510 0.048588 \n", " gptscore-ref-1-req 0.024550 0.035295 -0.009830 -0.062574 \n", " meteor 0.336016 0.371949 0.068034 0.173237 \n", " rouge1 -0.077574 -0.043738 -0.187349 -0.163230 \n", " rouge2 0.414256 0.340732 0.276139 0.332087 \n", " rougeL 0.006513 -0.008078 -0.041502 -0.034867 \n", " ter 0.618095 0.385515 0.575614 0.501385 \n", "edittime bertscore 0.140481 0.158807 0.140481 0.158807 \n", " bleu 0.302380 0.326167 0.302380 0.326167 \n", " chrF 0.079802 0.184202 0.079802 0.184202 \n", " editdist 0.252645 0.411131 0.252645 0.411131 \n", " gptscore-noref-1-req 0.206465 0.026235 0.206465 0.026235 \n", " gptscore-ref-1-req 0.130419 -0.055218 0.130419 -0.055218 \n", " meteor 0.253380 0.403564 0.253380 0.403564 \n", " rouge1 0.155926 0.136971 0.155926 0.136971 \n", " rouge2 0.218822 0.281944 0.218822 0.281944 \n", " rougeL 0.071344 0.091196 0.071344 0.091196 \n", " ter 0.305601 0.062616 0.305601 0.062616 \n", "\n", " +s2e +e2s \\\n", " spearman pearson spearman pearson \n", "relative independent \n", "editdist bertscore -0.308494 -0.113525 -0.181393 -0.165924 \n", " bleu 0.512841 0.502827 0.109831 0.068138 \n", " chrF -0.238124 -0.064922 -0.233123 -0.201726 \n", " editdist 0.950494 0.935064 0.861930 0.878118 \n", " gptscore-noref-1-req 0.067857 0.047215 -0.029048 -0.013128 \n", " gptscore-ref-1-req -0.015178 -0.036001 0.071345 0.087584 \n", " meteor 0.203616 0.425775 0.372598 0.360051 \n", " rouge1 -0.139874 -0.065543 -0.082093 -0.035603 \n", " rouge2 0.523559 0.537560 0.323911 0.282872 \n", " rougeL -0.022288 -0.004664 0.012409 0.016372 \n", " ter 0.774086 0.462554 0.529338 0.388592 \n", "edittime bertscore NaN NaN NaN NaN \n", " bleu NaN NaN NaN NaN \n", " chrF NaN NaN NaN NaN \n", " editdist NaN NaN NaN NaN \n", " gptscore-noref-1-req NaN NaN NaN NaN \n", " gptscore-ref-1-req NaN NaN NaN NaN \n", " meteor NaN NaN NaN NaN \n", " rouge1 NaN NaN NaN NaN \n", " rouge2 NaN NaN NaN NaN \n", " rougeL NaN NaN NaN NaN \n", " ter NaN NaN NaN NaN \n", "\n", " +e2s+s2e \n", " spearman pearson \n", "relative independent \n", "editdist bertscore -0.135421 -0.091748 \n", " bleu 0.229712 0.145062 \n", " chrF -0.156914 -0.093376 \n", " editdist 0.939318 0.962305 \n", " gptscore-noref-1-req 0.012102 0.066882 \n", " gptscore-ref-1-req 0.013012 0.033618 \n", " meteor 0.392262 0.401802 \n", " rouge1 -0.054034 -0.030799 \n", " rouge2 0.433859 0.324538 \n", " rougeL 0.021983 -0.010644 \n", " ter 0.591684 0.354459 \n", "edittime bertscore NaN NaN \n", " bleu NaN NaN \n", " chrF NaN NaN \n", " editdist NaN NaN \n", " gptscore-noref-1-req NaN NaN \n", " gptscore-ref-1-req NaN NaN \n", " meteor NaN NaN \n", " rouge1 NaN NaN \n", " rouge2 NaN NaN \n", " rougeL NaN NaN \n", " ter NaN NaN " ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from analysis_util import get_correlations_for_groups\n", "\n", "get_correlations_for_groups(df, right_side=\"ind\")" ] }, { "cell_type": "code", "execution_count": null, "id": "d5dc33a4251baf9a", "metadata": {}, "outputs": [], "source": [ "get_correlations_for_groups(df, right_side=\"aggr\")" ] }, { "metadata": { "ExecuteTime": { "end_time": "2024-05-01T15:25:18.226195Z", "start_time": "2024-05-01T15:25:17.464762Z" } }, "cell_type": "code", "source": [ "from matplotlib import pyplot as plt\n", "\n", "plt.scatter(x=df['edittime_related'], y=df['editdist_related'])" ], "id": "5df60ac60034b274", "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "
" ], "image/png": "" }, "metadata": {}, "output_type": "display_data" } ], "execution_count": 11 } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 5 }