{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"0ea8b46b-839b-445b-8043-ccdf4e920ace","showTitle":false,"title":""},"id":"YLH80COBzi_F"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"id":"63B5exAuzq4M"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n"," drive.mount('/content/drive')\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":368,"status":"ok","timestamp":1719461634865,"user":{"displayName":"Donghao Huang","userId":"00463591218503521679"},"user_tz":-480},"id":"zFulf0bg0H-9","outputId":"debdd535-c828-40b9-efc0-8a180e5830dd"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":589,"status":"ok","timestamp":1719462011879,"user":{"displayName":"Donghao Huang","userId":"00463591218503521679"},"user_tz":-480},"id":"DIUiweYYzi_I","outputId":"e16e9247-9077-4b0c-f8ea-17059f05a1c4"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"W2QyVreqhOGM","outputId":"68b9590e-1ac6-4c6f-e0c4-e273ec816419"},"outputs":[{"data":{"text/html":["
\n"," | text | \n","label | \n","title | \n","puzzle | \n","truth | \n","hfl/llama-3-chinese-8b-instruct-v3_torch.bfloat16_lf | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-35_torch.bfloat16_lf | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-70_torch.bfloat16_lf | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-105_torch.bfloat16_lf | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-140_torch.bfloat16_lf | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-175_torch.bfloat16_lf | \n","
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n","甄加索是自杀吗 | \n","不是 | \n","海岸之谜 | \n","在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... | \n","甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... | \n","不是。 | \n","不是 | \n","不是 | \n","不是 | \n","不是 | \n","不是 | \n","
1 | \n","甄加索有身体上的疾病吗 | \n","是 | \n","海岸之谜 | \n","在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... | \n","甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... | \n","是。 | \n","是 | \n","是 | \n","是 | \n","是 | \n","是 | \n","
2 | \n","画作是甄的 | \n","是 | \n","海岸之谜 | \n","在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... | \n","甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... | \n","回答:不重要。 | \n","不重要 | \n","是 | \n","不重要 | \n","是 | \n","是 | \n","
3 | \n","甄有心脏病吗 | \n","是 | \n","海岸之谜 | \n","在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... | \n","甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... | \n","是。 | \n","是 | \n","是 | \n","是 | \n","是 | \n","是 | \n","
4 | \n","车轮是凶手留下的 | \n","不是 | \n","海岸之谜 | \n","在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... | \n","甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... | \n","不重要。 | \n","不是 | \n","不是 | \n","不是 | \n","不是 | \n","不是 | \n","
... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","
2995 | \n","哭泣者必须在晚上祭奠吗 | \n","是 | \n","甄庄哭声 | \n","在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... | \n","原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... | \n","是。 | \n","不重要 | \n","不重要 | \n","不重要 | \n","不重要 | \n","不重要 | \n","
2996 | \n","尸体在湖里吗 | \n","不是 | \n","甄庄哭声 | \n","在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... | \n","原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... | \n","不是。 | \n","不重要 | \n","不重要 | \n","不重要 | \n","不重要 | \n","不是 | \n","
2997 | \n","哭泣者和死者有特殊关系吗 | \n","是 | \n","甄庄哭声 | \n","在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... | \n","原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... | \n","是。 | \n","是 | \n","是 | \n","是 | \n","是 | \n","是 | \n","
2998 | \n","是帽子的主人去世了吗 | \n","不是 | \n","甄庄哭声 | \n","在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... | \n","原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... | \n","回答正确。 | \n","是 | \n","是 | \n","是 | \n","是 | \n","不是 | \n","
2999 | \n","死者受伤了吗 | \n","不是 | \n","甄庄哭声 | \n","在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... | \n","原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... | \n","回答:不是。 | \n","不是 | \n","不是 | \n","不是 | \n","不是 | \n","不是 | \n","
3000 rows × 11 columns
\n","\n"," | epoch | \n","model | \n","accuracy | \n","precision | \n","recall | \n","f1 | \n","
---|---|---|---|---|---|---|
0 | \n","0.0 | \n","hfl/llama-3-chinese-8b-instruct-v3_torch.bfloa... | \n","0.456333 | \n","0.674450 | \n","0.456333 | \n","0.530122 | \n","
1 | \n","0.2 | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-... | \n","0.640667 | \n","0.765241 | \n","0.640667 | \n","0.686507 | \n","
2 | \n","0.4 | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-... | \n","0.722333 | \n","0.761495 | \n","0.722333 | \n","0.729669 | \n","
3 | \n","0.6 | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-... | \n","0.625667 | \n","0.769429 | \n","0.625667 | \n","0.674742 | \n","
4 | \n","0.8 | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-... | \n","0.717333 | \n","0.774693 | \n","0.717333 | \n","0.739105 | \n","
5 | \n","1.0 | \n","hfl/llama-3-chinese-8b-instruct-v3/checkpoint-... | \n","0.688000 | \n","0.767848 | \n","0.688000 | \n","0.718197 | \n","