{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"eb33b19f-1206-41ee-84e2-e6258a12eef7","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2534,"status":"ok","timestamp":1720679529344,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"xwFh14uiZBrI","outputId":"d767799c-34c2-46a5-f052-378146a55321"},"outputs":[],"source":["from pathlib import Path\n","\n","if \"workding_dir\" not in locals():\n"," try:\n"," from google.colab import drive\n","\n"," drive.mount(\"/content/drive\")\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n"," except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["working dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["# haotian comp\n","import os\n","import sys\n","from pathlib import Path\n","\n","if \"workding_dir\" not in locals():\n"," workding_dir = str(Path.cwd().parent)\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"working dir:\", workding_dir)"]},{"cell_type":"code","execution_count":5,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":6,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[],"source":["model_orders = {\n"," \"internlm2_5-7b-chat-1m\": 10,\n"," \"Qwen2-7B-Instruct\": 20,\n"," \"Llama3.1-8B-Chinese-Chat\": 30,\n"," \"Llama3.1-70B-Chinese-Chat\": 40,\n"," \"Qwen2-72B-Instruct\": 50,\n","}"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[],"source":["markers = [\n"," \"o\",\n"," \"x\",\n"," \"^\",\n"," \"s\",\n"," \"d\",\n"," \"P\",\n"," \"X\",\n"," \"*\",\n"," \"v\",\n"," \">\",\n"," \"<\",\n"," \"p\",\n"," \"h\",\n"," \"H\",\n"," \"+\",\n"," \"|\",\n"," \"_\",\n","]\n","model_markers = {k: markers[i] for i, k in enumerate(model_orders.keys())}"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"data":{"text/html":["
\n"," | epoch | \n","model | \n","accuracy | \n","precision | \n","recall | \n","f1 | \n","
---|---|---|---|---|---|---|
0 | \n","0.0 | \n","internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf | \n","0.510667 | \n","0.743214 | \n","0.510667 | \n","0.535733 | \n","
1 | \n","0.2 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-35_... | \n","0.784333 | \n","0.797765 | \n","0.784333 | \n","0.786494 | \n","
2 | \n","0.4 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-70_... | \n","0.783667 | \n","0.799698 | \n","0.783667 | \n","0.788688 | \n","
3 | \n","0.6 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-105... | \n","0.724333 | \n","0.817117 | \n","0.724333 | \n","0.756580 | \n","
4 | \n","0.8 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-140... | \n","0.803000 | \n","0.803141 | \n","0.803000 | \n","0.802806 | \n","
5 | \n","1.0 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-175... | \n","0.767667 | \n","0.810844 | \n","0.767667 | \n","0.784319 | \n","
6 | \n","1.2 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-210... | \n","0.773667 | \n","0.809167 | \n","0.773667 | \n","0.787687 | \n","
7 | \n","1.4 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-245... | \n","0.762333 | \n","0.806229 | \n","0.762333 | \n","0.777669 | \n","
8 | \n","1.6 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-280... | \n","0.755333 | \n","0.808620 | \n","0.755333 | \n","0.775559 | \n","
9 | \n","1.8 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-315... | \n","0.748000 | \n","0.817200 | \n","0.748000 | \n","0.773991 | \n","
10 | \n","2.0 | \n","internlm/internlm2_5-7b-chat-1m/checkpoint-350... | \n","0.756000 | \n","0.812688 | \n","0.756000 | \n","0.777781 | \n","
0 | \n","0.0 | \n","Qwen/Qwen2-7B-Instruct_torch.float16_lf | \n","0.619333 | \n","0.755570 | \n","0.619333 | \n","0.672630 | \n","
1 | \n","0.2 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.flo... | \n","0.725000 | \n","0.784017 | \n","0.725000 | \n","0.748995 | \n","
2 | \n","0.4 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.flo... | \n","0.759000 | \n","0.800530 | \n","0.759000 | \n","0.774875 | \n","
3 | \n","0.6 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.fl... | \n","0.692667 | \n","0.803918 | \n","0.692667 | \n","0.733248 | \n","
4 | \n","0.8 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.fl... | \n","0.725000 | \n","0.795272 | \n","0.725000 | \n","0.747624 | \n","
5 | \n","1.0 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.fl... | \n","0.675667 | \n","0.781015 | \n","0.675667 | \n","0.708654 | \n","
6 | \n","1.2 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.fl... | \n","0.701333 | \n","0.796956 | \n","0.701333 | \n","0.736268 | \n","
7 | \n","1.4 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.fl... | \n","0.732667 | \n","0.792254 | \n","0.732667 | \n","0.755402 | \n","
8 | \n","1.6 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.fl... | \n","0.698333 | \n","0.785127 | \n","0.698333 | \n","0.729225 | \n","
9 | \n","1.8 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.fl... | \n","0.678333 | \n","0.785391 | \n","0.678333 | \n","0.716413 | \n","
10 | \n","2.0 | \n","Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.fl... | \n","0.689000 | \n","0.792972 | \n","0.689000 | \n","0.725999 | \n","
0 | \n","0.0 | \n","shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.fl... | \n","0.236667 | \n","0.745718 | \n","0.236667 | \n","0.339624 | \n","
1 | \n","0.2 | \n","shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoi... | \n","0.625667 | \n","0.827414 | \n","0.625667 | \n","0.693570 | \n","
2 | \n","0.4 | \n","shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoi... | \n","0.762000 | \n","0.789946 | \n","0.762000 | \n","0.766701 | \n","
3 | \n","0.6 | \n","shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoi... | \n","0.680333 | \n","0.798030 | \n","0.680333 | \n","0.721244 | \n","
4 | \n","0.8 | \n","shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoi... | \n","0.752333 | \n","0.807426 | \n","0.752333 | \n","0.773644 | \n","
5 | \n","1.0 | \n","shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoi... | \n","0.737000 | \n","0.809059 | \n","0.737000 | \n","0.763784 | \n","
0 | \n","0.0 | \n","Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf | \n","0.748667 | \n","0.803899 | \n","0.748667 | \n","0.761587 | \n","