{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "1b5b7e0e",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:07.130546Z",
"iopub.status.busy": "2024-10-19T03:27:07.130175Z",
"iopub.status.idle": "2024-10-19T03:27:23.548705Z",
"shell.execute_reply": "2024-10-19T03:27:23.547706Z"
},
"id": "B9q9ZSp5lo7X",
"outputId": "890cd4d8-5f85-4756-a09b-1dfe940ee81a",
"papermill": {
"duration": 16.444707,
"end_time": "2024-10-19T03:27:23.551114",
"exception": false,
"start_time": "2024-10-19T03:27:07.106407",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting seqeval\r\n",
" Downloading seqeval-1.2.2.tar.gz (43 kB)\r\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l-\b \b\\\b \b|\b \bdone\r\n",
"\u001b[?25hRequirement already satisfied: numpy>=1.14.0 in /opt/conda/lib/python3.10/site-packages (from seqeval) (1.26.4)\r\n",
"Requirement already satisfied: scikit-learn>=0.21.3 in /opt/conda/lib/python3.10/site-packages (from seqeval) (1.2.2)\r\n",
"Requirement already satisfied: scipy>=1.3.2 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.21.3->seqeval) (1.14.1)\r\n",
"Requirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\r\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\r\n",
"Building wheels for collected packages: seqeval\r\n",
" Building wheel for seqeval (setup.py) ... \u001b[?25l-\b \b\\\b \b|\b \bdone\r\n",
"\u001b[?25h Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=69bd75e0f0986914506845525271beaecf662a42481bb6c537436e831a99a0c6\r\n",
" Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\r\n",
"Successfully built seqeval\r\n",
"Installing collected packages: seqeval\r\n",
"Successfully installed seqeval-1.2.2\r\n"
]
}
],
"source": [
"!pip install seqeval"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "0e6809c5",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:23.595729Z",
"iopub.status.busy": "2024-10-19T03:27:23.595398Z",
"iopub.status.idle": "2024-10-19T03:27:32.907685Z",
"shell.execute_reply": "2024-10-19T03:27:32.906858Z"
},
"id": "JqGBmHCdlo7Y",
"papermill": {
"duration": 9.337092,
"end_time": "2024-10-19T03:27:32.910127",
"exception": false,
"start_time": "2024-10-19T03:27:23.573035",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"from transformers import BertTokenizer, BertForTokenClassification, AdamW, BertTokenizerFast\n",
"from nltk.tokenize import sent_tokenize, word_tokenize\n",
"import torch.nn as nn\n",
"import torch\n",
"import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e54c2694",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:32.955778Z",
"iopub.status.busy": "2024-10-19T03:27:32.954920Z",
"iopub.status.idle": "2024-10-19T03:27:33.127858Z",
"shell.execute_reply": "2024-10-19T03:27:33.126903Z"
},
"id": "YMZ4Ox4jlo7Z",
"outputId": "e93176cb-247d-430a-e0bd-5022444f9b87",
"papermill": {
"duration": 0.197735,
"end_time": "2024-10-19T03:27:33.130177",
"exception": false,
"start_time": "2024-10-19T03:27:32.932442",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to /usr/share/nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n",
"[nltk_data] Downloading package stopwords to /usr/share/nltk_data...\n",
"[nltk_data] Package stopwords is already up-to-date!\n"
]
}
],
"source": [
"import re\n",
"import nltk\n",
"from nltk.corpus import stopwords\n",
"from nltk.tokenize import word_tokenize\n",
"\n",
"# Download stopwords if not already downloaded\n",
"nltk.download('punkt')\n",
"nltk.download('stopwords')\n",
"\n",
"# Set of stop words (you can add more if needed)\n",
"stop_words = set(stopwords.words('english'))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f7f68ef5",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:33.174865Z",
"iopub.status.busy": "2024-10-19T03:27:33.174544Z",
"iopub.status.idle": "2024-10-19T03:27:37.069450Z",
"shell.execute_reply": "2024-10-19T03:27:37.068367Z"
},
"id": "tWH6Vp5Flo7a",
"papermill": {
"duration": 3.919845,
"end_time": "2024-10-19T03:27:37.071804",
"exception": false,
"start_time": "2024-10-19T03:27:33.151959",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"df = pd.read_excel(\"/kaggle/input/miimansa/G1.xlsx\")\n",
"df2 = pd.read_excel(\"/kaggle/input/miimansa/G2.xlsx\")\n",
"df3 = pd.read_excel(\"/kaggle/input/miimansa/G3.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ea5a10eb",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:37.116964Z",
"iopub.status.busy": "2024-10-19T03:27:37.116457Z",
"iopub.status.idle": "2024-10-19T03:27:37.135451Z",
"shell.execute_reply": "2024-10-19T03:27:37.134392Z"
},
"id": "-v1hN8xwdA_O",
"papermill": {
"duration": 0.04403,
"end_time": "2024-10-19T03:27:37.137759",
"exception": false,
"start_time": "2024-10-19T03:27:37.093729",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"df.dropna(inplace=True)\n",
"df2.dropna(inplace=True)\n",
"df3.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "fbe36515",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:37.184837Z",
"iopub.status.busy": "2024-10-19T03:27:37.184522Z",
"iopub.status.idle": "2024-10-19T03:27:37.202782Z",
"shell.execute_reply": "2024-10-19T03:27:37.201870Z"
},
"id": "gLVf86bYlo7a",
"outputId": "0b2c5507-2b12-4daa-aaf7-07f383d7a95a",
"papermill": {
"duration": 0.043363,
"end_time": "2024-10-19T03:27:37.204761",
"exception": false,
"start_time": "2024-10-19T03:27:37.161398",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" ID | \n",
" tags | \n",
" text | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" NCT02361944 | \n",
" 16:20:treatment,25:44:treatment | \n",
" Current use of hemo- or peritoneal dialysis | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" NCT02593526 | \n",
" 24:43:treatment, | \n",
" Intention to change to peritoneal dialysis, or... | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" NCT02703272 | \n",
" 27:52:treatment,58:66:treatment | \n",
" Participants with ongoing anticoagulation trea... | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" NCT03006302 | \n",
" 8:16:treatment | \n",
" Use of warfarin | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" NCT02931110 | \n",
" 1:9:treatment | \n",
" warfarin | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 ID tags \\\n",
"0 0 NCT02361944 16:20:treatment,25:44:treatment \n",
"1 1 NCT02593526 24:43:treatment, \n",
"2 2 NCT02703272 27:52:treatment,58:66:treatment \n",
"3 3 NCT03006302 8:16:treatment \n",
"4 4 NCT02931110 1:9:treatment \n",
"\n",
" text \n",
"0 Current use of hemo- or peritoneal dialysis \n",
"1 Intention to change to peritoneal dialysis, or... \n",
"2 Participants with ongoing anticoagulation trea... \n",
"3 Use of warfarin \n",
"4 warfarin "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "92afd862",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:37.249796Z",
"iopub.status.busy": "2024-10-19T03:27:37.249475Z",
"iopub.status.idle": "2024-10-19T03:27:40.749461Z",
"shell.execute_reply": "2024-10-19T03:27:40.748585Z"
},
"id": "wajmmeHTlo7d",
"outputId": "d6d32f14-ca2e-4f95-c456-fd03ac06289b",
"papermill": {
"duration": 3.525134,
"end_time": "2024-10-19T03:27:40.751577",
"exception": false,
"start_time": "2024-10-19T03:27:37.226443",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e9fbb901478541afaa5e4331803f95a5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/49.0 [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "db6042c77cbd4db0bca2331c558256d8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"vocab.txt: 0%| | 0.00/213k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4df732b79c4d44f6ab297c7ebe4b8f7b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0%| | 0.00/436k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "06b337d1462e48cc95af610666b09666",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config.json: 0%| | 0.00/570 [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
" warnings.warn(\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dc8efdeeb86448f1ab80d1d7ca7af5ca",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model.safetensors: 0%| | 0.00/436M [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
],
"source": [
"# Define the label mapping\n",
"label_map = {\n",
" \"O\": 0,\n",
" \"B-treatment\": 1, \"I-treatment\": 2,\n",
" \"B-chronic_disease\": 3, \"I-chronic_disease\": 4,\n",
" \"B-cancer\": 5, \"I-cancer\": 6,\n",
" \"B-allergy_name\": 7, \"I-allergy_name\": 8\n",
"}\n",
"\n",
"num_labels = len(label_map)\n",
"max_sent_len = 256\n",
"\n",
"# Hyperparameters\n",
"batch_size = 16\n",
"learning_rate = 3e-5\n",
"num_epochs=5\n",
"\n",
"# Define device: Use GPU (cuda) if available, else use CPU\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"\n",
"# Initialize the tokenizer\n",
"# tokenizer = BertTokenizer.from_pretrained(\"bert-base-cased\")\n",
"tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased')\n",
"model = BertForTokenClassification.from_pretrained(\"bert-base-cased\", num_labels=len(label_map))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "487f1510",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:40.798381Z",
"iopub.status.busy": "2024-10-19T03:27:40.798066Z",
"iopub.status.idle": "2024-10-19T03:27:40.803487Z",
"shell.execute_reply": "2024-10-19T03:27:40.802663Z"
},
"id": "PDt6GgVVcNcd",
"papermill": {
"duration": 0.03074,
"end_time": "2024-10-19T03:27:40.805307",
"exception": false,
"start_time": "2024-10-19T03:27:40.774567",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def drop_long_sequences(df, tokenizer, max_tokens=256):\n",
" def is_within_limit(text, max_tokens):\n",
" tokens = tokenizer.tokenize(text)\n",
" return len(tokens) <= max_tokens\n",
"\n",
" filtered_df = df[df['text'].apply(lambda x: is_within_limit(x, max_tokens))]\n",
"\n",
" print(f\"Dropped {len(df) - len(filtered_df)} rows with more than {max_tokens} tokens.\")\n",
" return filtered_df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "fd6c78a9",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:40.852574Z",
"iopub.status.busy": "2024-10-19T03:27:40.851831Z",
"iopub.status.idle": "2024-10-19T03:27:43.203412Z",
"shell.execute_reply": "2024-10-19T03:27:43.202285Z"
},
"id": "PqnSXwPbcW-E",
"outputId": "b7aa8d2c-8477-42c8-cbc5-655184ecfdb5",
"papermill": {
"duration": 2.377186,
"end_time": "2024-10-19T03:27:43.205540",
"exception": false,
"start_time": "2024-10-19T03:27:40.828354",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dropped 2 rows with more than 256 tokens.\n",
"Dropped 2 rows with more than 256 tokens.\n",
"Dropped 2 rows with more than 256 tokens.\n"
]
}
],
"source": [
"df = drop_long_sequences(df, tokenizer, max_tokens=256)\n",
"df2 = drop_long_sequences(df2, tokenizer, max_tokens=256)\n",
"df3 = drop_long_sequences(df3, tokenizer, max_tokens=256)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "b1939a46",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:43.253867Z",
"iopub.status.busy": "2024-10-19T03:27:43.253567Z",
"iopub.status.idle": "2024-10-19T03:27:43.260250Z",
"shell.execute_reply": "2024-10-19T03:27:43.259429Z"
},
"id": "5Dk0CW3Ilo7b",
"outputId": "d3e44387-7aed-4e53-997a-37e63bcc5a70",
"papermill": {
"duration": 0.033121,
"end_time": "2024-10-19T03:27:43.262089",
"exception": false,
"start_time": "2024-10-19T03:27:43.228968",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(6452, 4)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2d40ac5d",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:43.310003Z",
"iopub.status.busy": "2024-10-19T03:27:43.309699Z",
"iopub.status.idle": "2024-10-19T03:27:43.324765Z",
"shell.execute_reply": "2024-10-19T03:27:43.323724Z"
},
"id": "_okbZqOJlo7c",
"outputId": "a05c76d8-0259-45a7-88d1-5648187bed9a",
"papermill": {
"duration": 0.041527,
"end_time": "2024-10-19T03:27:43.326702",
"exception": false,
"start_time": "2024-10-19T03:27:43.285175",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training set size: 5883\n",
"Test set size: 1471\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"# Split dataset into 80% train and 20% test\n",
"train_df1, test_df1 = train_test_split(df, test_size=0.2, random_state=42)\n",
"train_df2, test_df2 = train_test_split(df2, test_size=0.2, random_state=42)\n",
"train_df3, test_df3 = train_test_split(df3, test_size=0.2, random_state=42)\n",
"\n",
"# Display the sizes of the train and test sets\n",
"print(f\"Training set size: {len(train_df1)}\")\n",
"print(f\"Test set size: {len(test_df1)}\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "b158b8f8",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:43.376469Z",
"iopub.status.busy": "2024-10-19T03:27:43.376193Z",
"iopub.status.idle": "2024-10-19T03:27:43.382245Z",
"shell.execute_reply": "2024-10-19T03:27:43.381397Z"
},
"id": "3F2E9_7elo7e",
"papermill": {
"duration": 0.031938,
"end_time": "2024-10-19T03:27:43.384242",
"exception": false,
"start_time": "2024-10-19T03:27:43.352304",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def extract_sentence_labels(tags):\n",
" \"\"\"\n",
" Extract multi-labels for the entire sentence based on the tags.\n",
" Multiple labels will be handled by creating a binary vector where 1 indicates the presence of a tag.\n",
" \"\"\"\n",
" label_vector = np.zeros(num_labels)\n",
"\n",
" if pd.isna(tags) or tags == \"\":\n",
" return label_vector # Return a vector of 0's if no tags are present\n",
"\n",
" tag_entries = tags.split(',')\n",
" for entry in tag_entries:\n",
" if entry:\n",
" _, _, tag_type = entry.split(':')\n",
" label_idx = label_map.get(tag_type)\n",
" if label_idx is not None:\n",
" label_vector[label_idx] = 1 # Set the corresponding label to 1\n",
"\n",
" return label_vector"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "6b8fec54",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:43.432466Z",
"iopub.status.busy": "2024-10-19T03:27:43.432186Z",
"iopub.status.idle": "2024-10-19T03:27:54.274957Z",
"shell.execute_reply": "2024-10-19T03:27:54.274180Z"
},
"id": "6Ge2bypClo7e",
"papermill": {
"duration": 10.869075,
"end_time": "2024-10-19T03:27:54.277259",
"exception": false,
"start_time": "2024-10-19T03:27:43.408184",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def preprocess_data(df):\n",
"# max_len=float('-inf')\n",
" all_input_ids = []\n",
" all_attention_masks = []\n",
" all_labels = []\n",
"\n",
" for _, row in df.iterrows():\n",
" text = row['text']\n",
" entity = row['tags']\n",
"\n",
" if not pd.isna(entity) and not pd.isna(text):\n",
"\n",
" entity = entity.split(',')\n",
" # Remove all empty strings using filter\n",
" entities = list(filter(lambda x: x.strip(), entity))\n",
" # print(\"entities: \", entities)\n",
"\n",
" tokenized_input = tokenizer(text, truncation=True, padding='max_length', max_length=max_sent_len, return_offsets_mapping=True)\n",
" input_ids = tokenized_input['input_ids']\n",
" attention_mask = tokenized_input['attention_mask']\n",
" offset_mapping = tokenized_input['offset_mapping']\n",
"\n",
" labels = ['O'] * len(input_ids)\n",
"\n",
" for entity in entities:\n",
" start_idx, end_idx, label = entity.split(':')\n",
" start_idx, end_idx = int(start_idx)-1, int(end_idx)-1\n",
"\n",
" entity_started = False\n",
" for idx, (start, end) in enumerate(offset_mapping):\n",
" if start_idx <= start < end_idx and end != 0:\n",
" if not entity_started:\n",
" labels[idx] = f\"B-{label}\"\n",
" entity_started = True\n",
" else:\n",
" labels[idx] = f\"I-{label}\"\n",
" elif end < start_idx:\n",
" entity_started = False\n",
"\n",
" all_input_ids.append(input_ids)\n",
" all_attention_masks.append(attention_mask)\n",
" all_labels.append([label_map[label] for label in labels])\n",
"\n",
" # Get processed data\n",
" processed_data = {\n",
" \"tokens\": all_input_ids,\n",
" \"attention_mask\": all_attention_masks,\n",
" \"labels\": all_labels\n",
" }\n",
" return processed_data\n",
"\n",
"train_processed_data1 = preprocess_data(train_df1)\n",
"test_processed_data1 = preprocess_data(test_df1)\n",
"\n",
"train_processed_data2 = preprocess_data(train_df2)\n",
"test_processed_data2 = preprocess_data(test_df2)\n",
"\n",
"train_processed_data3 = preprocess_data(train_df3)\n",
"test_processed_data3 = preprocess_data(test_df3)\n",
"\n",
"# Display the sizes of the train and test sets\n",
"# print(f\"train_processed_data size: {train_processed_data1}\")\n",
"# print(f\"test_processed_data size: {test_processed_data1}\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "9ba4d8d0",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:54.326075Z",
"iopub.status.busy": "2024-10-19T03:27:54.325729Z",
"iopub.status.idle": "2024-10-19T03:27:54.332433Z",
"shell.execute_reply": "2024-10-19T03:27:54.331492Z"
},
"id": "ifdDc-t4lo7f",
"outputId": "15dc431e-0f6d-4731-cffb-01257ec01cbe",
"papermill": {
"duration": 0.033277,
"end_time": "2024-10-19T03:27:54.334431",
"exception": false,
"start_time": "2024-10-19T03:27:54.301154",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tokens: ['[CLS]', 'In', '##vasive', 'cancer', 'in', 'the', 'con', '##tral', '##ater', '##al', 'breast', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']\n",
"Labels: [0, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"Label names: ['O', 'B-cancer', 'I-cancer', 'I-cancer', 'I-cancer', 'I-cancer', 'I-cancer', 'I-cancer', 'I-cancer', 'I-cancer', 'I-cancer', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n"
]
}
],
"source": [
"id2label = {v: k for k, v in label_map.items()}\n",
"\n",
"input_ids = train_processed_data1['tokens'][4]\n",
"print(\"Tokens:\", tokenizer.convert_ids_to_tokens(input_ids))\n",
"\n",
"labels = train_processed_data1['labels'][4]\n",
"print(\"Labels:\", labels)\n",
"print(\"Label names:\", [id2label[label] for label in labels])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "66a1ac9f",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:54.385597Z",
"iopub.status.busy": "2024-10-19T03:27:54.384859Z",
"iopub.status.idle": "2024-10-19T03:27:54.390883Z",
"shell.execute_reply": "2024-10-19T03:27:54.390020Z"
},
"id": "ohVMaS7Vlo7g",
"outputId": "016f80f8-8149-4b76-a036-8b449ff5b550",
"papermill": {
"duration": 0.034669,
"end_time": "2024-10-19T03:27:54.392720",
"exception": false,
"start_time": "2024-10-19T03:27:54.358051",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{0: 'O',\n",
" 1: 'B-treatment',\n",
" 2: 'I-treatment',\n",
" 3: 'B-chronic_disease',\n",
" 4: 'I-chronic_disease',\n",
" 5: 'B-cancer',\n",
" 6: 'I-cancer',\n",
" 7: 'B-allergy_name',\n",
" 8: 'I-allergy_name'}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"id2label"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "c2ad9c0d",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:54.442258Z",
"iopub.status.busy": "2024-10-19T03:27:54.441982Z",
"iopub.status.idle": "2024-10-19T03:27:54.453664Z",
"shell.execute_reply": "2024-10-19T03:27:54.452705Z"
},
"id": "0LlOrM6Ylo7g",
"papermill": {
"duration": 0.039126,
"end_time": "2024-10-19T03:27:54.455630",
"exception": false,
"start_time": "2024-10-19T03:27:54.416504",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"from torch.utils.data import DataLoader, Dataset\n",
"\n",
"class NERDataset(Dataset):\n",
" def __init__(self, encodings, attention_masks, labels):\n",
" self.encodings = encodings\n",
" self.attention_masks = attention_masks\n",
" self.labels = labels\n",
"\n",
" def __getitem__(self, idx):\n",
" item={}\n",
" item['input_ids'] = torch.tensor(self.encodings[idx])\n",
" item['labels'] = torch.tensor(self.labels[idx])\n",
" item['attention_mask'] = torch.tensor(self.attention_masks[idx])\n",
"\n",
" return item\n",
"\n",
" def __len__(self):\n",
" return len(self.labels)\n",
"\n",
"# Create the train dataset1 and dataloader\n",
"ner_dataset = NERDataset(train_processed_data1[\"tokens\"], train_processed_data1[\"attention_mask\"], train_processed_data1[\"labels\"])\n",
"train_dataloader1 = DataLoader(ner_dataset, batch_size=batch_size, shuffle=True)\n",
"# Create the test dataset and dataloader\n",
"ner_dataset = NERDataset(test_processed_data1[\"tokens\"], test_processed_data1[\"attention_mask\"], test_processed_data1[\"labels\"])\n",
"test_dataloader1 = DataLoader(ner_dataset, batch_size=batch_size, shuffle=True)\n",
"\n",
"# Create the train dataset2 and dataloader\n",
"ner_dataset = NERDataset(train_processed_data2[\"tokens\"], train_processed_data2[\"attention_mask\"], train_processed_data2[\"labels\"])\n",
"train_dataloader2 = DataLoader(ner_dataset, batch_size=batch_size, shuffle=True)\n",
"# Create the test dataset and dataloader\n",
"ner_dataset = NERDataset(test_processed_data2[\"tokens\"], test_processed_data2[\"attention_mask\"], test_processed_data2[\"labels\"])\n",
"test_dataloader2 = DataLoader(ner_dataset, batch_size=batch_size, shuffle=True)\n",
"\n",
"# Create the train dataset3 and dataloader\n",
"ner_dataset = NERDataset(train_processed_data3[\"tokens\"], train_processed_data3[\"attention_mask\"], train_processed_data3[\"labels\"])\n",
"train_dataloader3 = DataLoader(ner_dataset, batch_size=batch_size, shuffle=True)\n",
"# Create the test dataset and dataloader\n",
"ner_dataset = NERDataset(test_processed_data3[\"tokens\"], test_processed_data3[\"attention_mask\"], test_processed_data3[\"labels\"])\n",
"test_dataloader3 = DataLoader(ner_dataset, batch_size=batch_size, shuffle=True)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "55a9064a",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:54.503203Z",
"iopub.status.busy": "2024-10-19T03:27:54.502877Z",
"iopub.status.idle": "2024-10-19T03:27:54.507328Z",
"shell.execute_reply": "2024-10-19T03:27:54.506570Z"
},
"id": "5YTVKKFelo7h",
"papermill": {
"duration": 0.030373,
"end_time": "2024-10-19T03:27:54.509242",
"exception": false,
"start_time": "2024-10-19T03:27:54.478869",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# Create train dataloaders\n",
"train_dataloaders = {\n",
" \"T1\": train_dataloader1,\n",
" \"T2\": train_dataloader2,\n",
" \"T3\": train_dataloader3\n",
"}\n",
"\n",
"# Create test dataloaders\n",
"test_dataloaders = {\n",
" \"T1\": test_dataloader1,\n",
" \"T2\": test_dataloader2,\n",
" \"T3\": test_dataloader3\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "0db97df6",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:54.558325Z",
"iopub.status.busy": "2024-10-19T03:27:54.558017Z",
"iopub.status.idle": "2024-10-19T03:27:54.566817Z",
"shell.execute_reply": "2024-10-19T03:27:54.566013Z"
},
"id": "YoP8NH7_lo7h",
"papermill": {
"duration": 0.036128,
"end_time": "2024-10-19T03:27:54.568730",
"exception": false,
"start_time": "2024-10-19T03:27:54.532602",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def train_model(model, dataloader, optimizer, model_save_path, ewc=None, epochs=3):\n",
"\n",
" model.to(device)\n",
"\n",
" loss_values=[]\n",
" min_loss = float('inf') # Set to infinity at the start\n",
"\n",
" model.train()\n",
" for epoch in range(epochs):\n",
" total_loss = 0\n",
" for batch in tqdm.tqdm(dataloader):\n",
" outputs = model(input_ids=batch['input_ids'].to(device),\n",
" attention_mask=batch['attention_mask'].to(device),\n",
" labels=batch['labels'].to(device))\n",
" loss = outputs.loss\n",
"\n",
" if ewc:\n",
" loss += ewc.penalty()\n",
"\n",
" total_loss += loss.item()\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
" # Append average loss of the epoch to the list\n",
" avg_loss = total_loss / len(dataloader)\n",
" loss_values.append(avg_loss)\n",
"\n",
" print(f\"Epoch {epoch + 1} - Loss: {total_loss / len(dataloader)}\")\n",
"\n",
" # Check if current loss is lower than the minimum loss\n",
" if avg_loss < min_loss:\n",
" print(f\"New minimum loss: {avg_loss:.4f}, saving model...\")\n",
" min_loss = avg_loss # Update the minimum loss\n",
" # Save the model's state_dict (weights)\n",
" torch.save(model.state_dict(), model_save_path)\n",
" return loss_values, model\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "c559ac48",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:27:54.618328Z",
"iopub.status.busy": "2024-10-19T03:27:54.617988Z",
"iopub.status.idle": "2024-10-19T03:48:39.354329Z",
"shell.execute_reply": "2024-10-19T03:48:39.353239Z"
},
"id": "oAVH5NbRlo7i",
"outputId": "29051985-5854-4f88-f62a-1f6d525a5517",
"papermill": {
"duration": 1244.764251,
"end_time": "2024-10-19T03:48:39.356796",
"exception": false,
"start_time": "2024-10-19T03:27:54.592545",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training on Task T1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.10/site-packages/transformers/optimization.py:591: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"100%|██████████| 368/368 [03:56<00:00, 1.55it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1 - Loss: 0.08122424427774208\n",
"New minimum loss: 0.0812, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 368/368 [04:09<00:00, 1.48it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 2 - Loss: 0.025342956971144304\n",
"New minimum loss: 0.0253, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 368/368 [04:10<00:00, 1.47it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 3 - Loss: 0.016992725335749918\n",
"New minimum loss: 0.0170, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 368/368 [04:10<00:00, 1.47it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 4 - Loss: 0.011919787018838257\n",
"New minimum loss: 0.0119, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 368/368 [04:10<00:00, 1.47it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 5 - Loss: 0.009049752625920202\n",
"New minimum loss: 0.0090, saving model...\n"
]
}
],
"source": [
"# Task 1 training\n",
"print(\"Training on Task T1\")\n",
"# Optimizer\n",
"optimizer = AdamW(model.parameters(), lr=learning_rate)\n",
"loss_values, model = train_model(model, train_dataloaders[\"T1\"], optimizer, 'model_weights1.pth', epochs=num_epochs)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "bff88255",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:48:39.730515Z",
"iopub.status.busy": "2024-10-19T03:48:39.729790Z",
"iopub.status.idle": "2024-10-19T03:48:40.109164Z",
"shell.execute_reply": "2024-10-19T03:48:40.108226Z"
},
"id": "Wnfcrkkalo7i",
"papermill": {
"duration": 0.547607,
"end_time": "2024-10-19T03:48:40.111079",
"exception": false,
"start_time": "2024-10-19T03:48:39.563472",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# Plotting the training loss\n",
"plt.plot(range(1, num_epochs + 1), loss_values, marker='o', label=\"Training Loss\")\n",
"plt.title(\"Training Loss over Epochs\")\n",
"plt.xlabel(\"Epochs\")\n",
"plt.ylabel(\"Loss\")\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "d3d010fe",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:48:40.469949Z",
"iopub.status.busy": "2024-10-19T03:48:40.469176Z",
"iopub.status.idle": "2024-10-19T03:48:40.475905Z",
"shell.execute_reply": "2024-10-19T03:48:40.475030Z"
},
"id": "Eloe2CR8lo7j",
"papermill": {
"duration": 0.198187,
"end_time": "2024-10-19T03:48:40.477782",
"exception": false,
"start_time": "2024-10-19T03:48:40.279595",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['O',\n",
" 'B-treatment',\n",
" 'I-treatment',\n",
" 'B-chronic_disease',\n",
" 'I-chronic_disease',\n",
" 'B-cancer',\n",
" 'I-cancer',\n",
" 'B-allergy_name',\n",
" 'I-allergy_name']"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(label_map.keys())"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "4368e762",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:48:40.815381Z",
"iopub.status.busy": "2024-10-19T03:48:40.814692Z",
"iopub.status.idle": "2024-10-19T03:48:40.832244Z",
"shell.execute_reply": "2024-10-19T03:48:40.831329Z"
},
"id": "gwsrnHeklo7j",
"papermill": {
"duration": 0.186816,
"end_time": "2024-10-19T03:48:40.834142",
"exception": false,
"start_time": "2024-10-19T03:48:40.647326",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"from sklearn.metrics import f1_score\n",
"import numpy as np\n",
"from seqeval.metrics import classification_report\n",
"\n",
"def evaluation(test_dataloaders, model):\n",
"\n",
" # Evaluation on test dataset\n",
" model.eval()\n",
"\n",
" correct_predictions = 0\n",
" total = 0\n",
"\n",
" y_true = []\n",
" y_pred = []\n",
"\n",
" with torch.no_grad():\n",
" for batch in tqdm.tqdm(test_dataloaders):\n",
" input_ids = batch['input_ids'].to(device)\n",
" labels = batch['labels'].to(device)\n",
"\n",
" outputs = model(input_ids)\n",
" # Get predictions by taking the argmax of the logits\n",
" predictions = torch.argmax(outputs.logits, dim=-1)\n",
"\n",
" # Convert to numpy arrays\n",
" labels = labels.cpu().numpy()\n",
" predictions = predictions.cpu().numpy()\n",
"\n",
"# print(\"labels: \", labels.shape)\n",
"# print(\"predictions: \", predictions.shape)\n",
"\n",
"# print(\"labels: \", labels)\n",
"# print(\"predictions: \", predictions)\n",
"# return\n",
"\n",
" for label, pred in zip(labels, predictions):\n",
" # Filter out -100 labels\n",
" y_true.append([id2label[l] for l in label if l != -100])\n",
" y_pred.append([id2label[p] for p, l in zip(pred, label) if l != -100])\n",
"\n",
" print(classification_report(y_true, y_pred))\n",
" print(\"*\"*40)\n",
"\n",
"# print(y_true)\n",
"# print(y_pred)\n",
"\n",
" report = classification_report(y_true, y_pred, output_dict=True)\n",
"\n",
" # Extracting F1 scores for each entity type\n",
" entity_f1_scores = {}\n",
" for label in ['treatment', 'chronic_disease', 'cancer', 'allergy_name']:\n",
" entity_f1_scores[label] = report[label]['f1-score']\n",
"\n",
" weighted_avg_f1 = report['weighted avg']['f1-score']\n",
"\n",
" print(\"Entity-wise F1 scores:\")\n",
" for entity, score in entity_f1_scores.items():\n",
" print(f\"{entity}: {score:.4f}\")\n",
" print(f\"Weighted Average F1 score: {weighted_avg_f1:.4f}\")\n",
"\n",
" return (entity_f1_scores, weighted_avg_f1)"
]
},
{
"cell_type": "markdown",
"id": "07db5b76",
"metadata": {
"id": "IGfSMxuPlo7k",
"papermill": {
"duration": 0.163507,
"end_time": "2024-10-19T03:48:41.163645",
"exception": false,
"start_time": "2024-10-19T03:48:41.000138",
"status": "completed"
},
"tags": []
},
"source": [
"### Task 2"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "e2e40190",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:48:41.497244Z",
"iopub.status.busy": "2024-10-19T03:48:41.496062Z",
"iopub.status.idle": "2024-10-19T03:48:41.507922Z",
"shell.execute_reply": "2024-10-19T03:48:41.506948Z"
},
"id": "CI9uE4_flo7m",
"papermill": {
"duration": 0.181607,
"end_time": "2024-10-19T03:48:41.509989",
"exception": false,
"start_time": "2024-10-19T03:48:41.328382",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"class EWC:\n",
" def __init__(self, model, dataloader, importance=1000):\n",
" self.model = model\n",
" self.importance = importance\n",
" self.params = {n: p for n, p in self.model.named_parameters() if p.requires_grad}\n",
" self._means = {}\n",
" self._fishers = {}\n",
" self.dataloader = dataloader\n",
" self.compute_fisher_information()\n",
"\n",
" def compute_fisher_information(self):\n",
" self.model.eval()\n",
" fisher_diagonals = {n: torch.zeros_like(p) for n, p in self.params.items()}\n",
"\n",
" for batch in self.dataloader:\n",
" outputs = self.model(input_ids=batch['input_ids'].to(device),\n",
" attention_mask=batch['attention_mask'].to(device),\n",
" labels=batch['labels'].to(device))\n",
" loss = outputs.loss\n",
" loss.backward()\n",
"\n",
" for n, p in self.params.items():\n",
" fisher_diagonals[n] += (p.grad ** 2) / len(self.dataloader)\n",
"\n",
" for n, p in fisher_diagonals.items():\n",
" self._fishers[n] = fisher_diagonals[n]\n",
" self._means[n] = self.params[n].detach().clone()\n",
"\n",
" def penalty(self):\n",
" loss = 0\n",
" for n, p in self.params.items():\n",
" fisher = self._fishers[n]\n",
" mean = self._means[n]\n",
" loss += (fisher * (p - mean) ** 2).sum()\n",
" return loss * self.importance\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "e1159fec",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T03:48:41.852631Z",
"iopub.status.busy": "2024-10-19T03:48:41.851811Z",
"iopub.status.idle": "2024-10-19T04:08:04.342631Z",
"shell.execute_reply": "2024-10-19T04:08:04.341791Z"
},
"id": "_Y6MYJzqlo7m",
"papermill": {
"duration": 1162.666952,
"end_time": "2024-10-19T04:08:04.345282",
"exception": false,
"start_time": "2024-10-19T03:48:41.678330",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training on Task T2\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 323/323 [03:50<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1 - Loss: 0.029211484109146677\n",
"New minimum loss: 0.0292, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 323/323 [03:50<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 2 - Loss: 0.018161326241693906\n",
"New minimum loss: 0.0182, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 323/323 [03:50<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 3 - Loss: 0.012592665302918953\n",
"New minimum loss: 0.0126, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 323/323 [03:50<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 4 - Loss: 0.008822424781664446\n",
"New minimum loss: 0.0088, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 323/323 [03:51<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 5 - Loss: 0.006582645188127544\n",
"New minimum loss: 0.0066, saving model...\n"
]
}
],
"source": [
"import random\n",
"from torch.utils.data import Subset, ConcatDataset\n",
"\n",
"# Keep 100 examples from T1\n",
"replay_buffer_T1 = random.sample(range(len(train_dataloaders[\"T1\"].dataset)), 100)\n",
"sampled_dataset = Subset(train_dataloaders[\"T1\"].dataset, replay_buffer_T1)\n",
"replay_dataloader_T1 = DataLoader(sampled_dataset, batch_size=batch_size, shuffle=True)\n",
"\n",
"# Combine the two datasets\n",
"combined_dataset = ConcatDataset([train_dataloaders[\"T2\"].dataset, replay_dataloader_T1.dataset])\n",
"# Create a new DataLoader from the combined dataset\n",
"combined_loader = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True)\n",
"\n",
"# Task 2 training with EWC\n",
"print(\"Training on Task T2\")\n",
"\n",
"# model = BertForTokenClassification.from_pretrained(\"bert-base-cased\", num_labels=len(label_map))\n",
"# model.load_state_dict(torch.load('model_weights1.pth', weights_only=True))\n",
"# model.to(device)\n",
"\n",
"# Elastic Weight Consolidation for Task 2\n",
"ewc_T2 = EWC(model, replay_dataloader_T1)\n",
"\n",
"loss_values, model = train_model(model, train_dataloaders[\"T2\"], optimizer, 'model_weights2.pth', ewc=ewc_T2, epochs=num_epochs)\n",
"# Assuming `model` is your trained model\n",
"# torch.save(multi_label_model.state_dict(), 'model_weights2.pth')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "9756bb52",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T04:08:04.976483Z",
"iopub.status.busy": "2024-10-19T04:08:04.976123Z",
"iopub.status.idle": "2024-10-19T04:08:05.265154Z",
"shell.execute_reply": "2024-10-19T04:08:05.264237Z"
},
"id": "vphcmWcBlo7n",
"papermill": {
"duration": 0.619363,
"end_time": "2024-10-19T04:08:05.267369",
"exception": false,
"start_time": "2024-10-19T04:08:04.648006",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# Plotting the training loss\n",
"plt.plot(range(1, num_epochs + 1), loss_values, marker='o', label=\"Training Loss\")\n",
"plt.title(\"Training Loss over Epochs\")\n",
"plt.xlabel(\"Epochs\")\n",
"plt.ylabel(\"Loss\")\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "0ce612a1",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T04:08:05.848193Z",
"iopub.status.busy": "2024-10-19T04:08:05.847798Z",
"iopub.status.idle": "2024-10-19T04:27:01.520968Z",
"shell.execute_reply": "2024-10-19T04:27:01.520130Z"
},
"id": "mMJ9RdNFlo7n",
"papermill": {
"duration": 1135.965761,
"end_time": "2024-10-19T04:27:01.523469",
"exception": false,
"start_time": "2024-10-19T04:08:05.557708",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training on Task T3\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 314/314 [03:44<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1 - Loss: 0.028447151246367937\n",
"New minimum loss: 0.0284, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 314/314 [03:44<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 2 - Loss: 0.017575527700020153\n",
"New minimum loss: 0.0176, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 314/314 [03:44<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 3 - Loss: 0.012083320739785814\n",
"New minimum loss: 0.0121, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 314/314 [03:44<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 4 - Loss: 0.008300796071079317\n",
"New minimum loss: 0.0083, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 314/314 [03:44<00:00, 1.40it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 5 - Loss: 0.00641057728663692\n",
"New minimum loss: 0.0064, saving model...\n"
]
}
],
"source": [
"from torch.utils.data import ConcatDataset\n",
"\n",
"# Keep 100 examples from T1 and T2\n",
"replay_buffer_T1 = random.sample(range(len(train_dataloaders[\"T1\"].dataset)), 100)\n",
"sampled_dataset1 = Subset(train_dataloaders[\"T1\"].dataset, replay_buffer_T1)\n",
"replay_buffer_T2 = random.sample(range(len(train_dataloaders[\"T2\"].dataset)), 100)\n",
"sampled_dataset2 = Subset(train_dataloaders[\"T2\"].dataset, replay_buffer_T2)\n",
"combined_dataset = ConcatDataset([sampled_dataset1, sampled_dataset2])\n",
"replay_dataloader_T1_T2 = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True)\n",
"\n",
"# Combine the two datasets\n",
"combined_dataset = ConcatDataset([train_dataloaders[\"T3\"].dataset, replay_dataloader_T1_T2.dataset])\n",
"# Create a new DataLoader from the combined dataset\n",
"combined_loader = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True)\n",
"\n",
"# Task 2 training with EWC\n",
"print(\"Training on Task T3\")\n",
"\n",
"# model = BertForTokenClassification.from_pretrained(\"bert-base-cased\", num_labels=len(label_map))\n",
"# model.load_state_dict(torch.load('model_weights2.pth', weights_only=True))\n",
"# model.to(device)\n",
"\n",
"# Elastic Weight Consolidation for Task 1 and Task 2\n",
"ewc_T1_T2 = EWC(model, replay_dataloader_T1_T2)\n",
"\n",
"loss_values, model = train_model(model, train_dataloaders[\"T3\"], optimizer, 'model_weights3.pth', ewc=ewc_T1_T2, epochs=num_epochs)\n",
"# Assuming `model` is your trained model\n",
"# torch.save(multi_label_model.state_dict(), 'model_weights3.pth')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "faf9a6ac",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T04:27:02.356894Z",
"iopub.status.busy": "2024-10-19T04:27:02.356252Z",
"iopub.status.idle": "2024-10-19T04:27:02.642888Z",
"shell.execute_reply": "2024-10-19T04:27:02.641948Z"
},
"id": "0YsWI0wClo7o",
"papermill": {
"duration": 0.703735,
"end_time": "2024-10-19T04:27:02.644889",
"exception": false,
"start_time": "2024-10-19T04:27:01.941154",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# Plotting the training loss\n",
"plt.plot(range(1, num_epochs + 1), loss_values, marker='o', label=\"Training Loss\")\n",
"plt.title(\"Training Loss over Epochs\")\n",
"plt.xlabel(\"Epochs\")\n",
"plt.ylabel(\"Loss\")\n",
"plt.legend()\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "4011f2b4",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T04:27:03.519958Z",
"iopub.status.busy": "2024-10-19T04:27:03.519556Z",
"iopub.status.idle": "2024-10-19T05:24:17.930675Z",
"shell.execute_reply": "2024-10-19T05:24:17.929777Z"
},
"id": "CPeGbtV4lo7o",
"papermill": {
"duration": 3434.867153,
"end_time": "2024-10-19T05:24:17.933208",
"exception": false,
"start_time": "2024-10-19T04:27:03.066055",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training on G1+G2+G3 \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"100%|██████████| 1004/1004 [11:25<00:00, 1.46it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1 - Loss: 0.049782790117470395\n",
"New minimum loss: 0.0498, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1004/1004 [11:25<00:00, 1.46it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 2 - Loss: 0.021665205914653332\n",
"New minimum loss: 0.0217, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1004/1004 [11:25<00:00, 1.46it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 3 - Loss: 0.016814892424921693\n",
"New minimum loss: 0.0168, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1004/1004 [11:26<00:00, 1.46it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 4 - Loss: 0.012903303460684601\n",
"New minimum loss: 0.0129, saving model...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1004/1004 [11:27<00:00, 1.46it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 5 - Loss: 0.010335032426728032\n",
"New minimum loss: 0.0103, saving model...\n"
]
}
],
"source": [
"#Training on complete G1+G2+G3\n",
"# Combine the datasets\n",
"combined_train_dataset = ConcatDataset([train_dataloaders[\"T1\"].dataset, train_dataloaders[\"T2\"].dataset, train_dataloaders[\"T3\"].dataset])\n",
"combined_test_dataset = ConcatDataset([test_dataloaders[\"T1\"].dataset, test_dataloaders[\"T2\"].dataset, test_dataloaders[\"T3\"].dataset])\n",
"\n",
"# Create a new DataLoader from the combined dataset\n",
"combined_train_loader_123 = DataLoader(combined_train_dataset, batch_size=batch_size, shuffle=True)\n",
"combined_test_loader_123 = DataLoader(combined_test_dataset, batch_size=batch_size, shuffle=True)\n",
"\n",
"print(\"Training on G1+G2+G3 \")\n",
"model = BertForTokenClassification.from_pretrained(\"bert-base-cased\", num_labels=len(label_map))# Optimizer\n",
"optimizer = AdamW(model.parameters(), lr=learning_rate)\n",
"loss_values, _ = train_model(model, combined_train_loader_123, optimizer, 'model_weights4.pth', epochs=num_epochs)"
]
},
{
"cell_type": "markdown",
"id": "2642e21c",
"metadata": {
"id": "HIKlgZ-llo7p",
"papermill": {
"duration": 0.845308,
"end_time": "2024-10-19T05:24:19.581088",
"exception": false,
"start_time": "2024-10-19T05:24:18.735780",
"status": "completed"
},
"tags": []
},
"source": [
"## Evaluation"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "e0f13d48",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T05:24:21.255939Z",
"iopub.status.busy": "2024-10-19T05:24:21.255015Z",
"iopub.status.idle": "2024-10-19T05:24:57.894478Z",
"shell.execute_reply": "2024-10-19T05:24:57.893525Z"
},
"id": "PM9Iwib6lo7p",
"papermill": {
"duration": 38.318841,
"end_time": "2024-10-19T05:24:58.721275",
"exception": false,
"start_time": "2024-10-19T05:24:20.402434",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Task 1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
" 0%| | 0/92 [00:00, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n",
"100%|██████████| 92/92 [00:19<00:00, 4.68it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" allergy_name 0.06 0.02 0.03 51\n",
" cancer 0.56 0.59 0.58 387\n",
"chronic_disease 0.52 0.42 0.47 1118\n",
" treatment 0.49 0.60 0.54 1248\n",
"\n",
" micro avg 0.51 0.51 0.51 2804\n",
" macro avg 0.41 0.41 0.40 2804\n",
" weighted avg 0.51 0.51 0.51 2804\n",
"\n",
"****************************************\n",
"Entity-wise F1 scores:\n",
"treatment: 0.5415\n",
"chronic_disease: 0.4657\n",
"cancer: 0.5765\n",
"allergy_name: 0.0299\n",
"Weighted Average F1 score: 0.5068\n"
]
}
],
"source": [
"## Task 1\n",
"print(\"Task 1\")\n",
"\n",
"model = BertForTokenClassification.from_pretrained(\"bert-base-cased\", num_labels=len(label_map))\n",
"model.load_state_dict(torch.load('model_weights1.pth', weights_only=True))\n",
"model.to(device)\n",
"\n",
"T1_results = evaluation(test_dataloaders['T1'], model)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "6e135e6d",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T05:25:00.409497Z",
"iopub.status.busy": "2024-10-19T05:25:00.408607Z",
"iopub.status.idle": "2024-10-19T05:26:07.994354Z",
"shell.execute_reply": "2024-10-19T05:26:07.993364Z"
},
"id": "A9DCeiFmlo7p",
"papermill": {
"duration": 69.295494,
"end_time": "2024-10-19T05:26:08.823638",
"exception": false,
"start_time": "2024-10-19T05:24:59.528144",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Task 1, 2\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 173/173 [00:36<00:00, 4.76it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" allergy_name 0.49 0.45 0.47 105\n",
" cancer 0.59 0.58 0.59 759\n",
"chronic_disease 0.55 0.73 0.63 2207\n",
" treatment 0.58 0.65 0.61 2391\n",
"\n",
" micro avg 0.57 0.67 0.62 5462\n",
" macro avg 0.55 0.60 0.58 5462\n",
" weighted avg 0.57 0.67 0.61 5462\n",
"\n",
"****************************************\n",
"Entity-wise F1 scores:\n",
"treatment: 0.6140\n",
"chronic_disease: 0.6312\n",
"cancer: 0.5852\n",
"allergy_name: 0.4700\n",
"Weighted Average F1 score: 0.6142\n"
]
}
],
"source": [
"model = BertForTokenClassification.from_pretrained(\"bert-base-cased\", num_labels=len(label_map))\n",
"model.load_state_dict(torch.load('model_weights2.pth', weights_only=True))\n",
"model.to(device)\n",
"\n",
"# Combine the two datasets\n",
"combined_dataset = ConcatDataset([test_dataloaders[\"T1\"].dataset, test_dataloaders[\"T2\"].dataset])\n",
"# Create a new DataLoader from the combined dataset\n",
"combined_loader = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True)\n",
"\n",
"## Task 1\n",
"print(\"Task 1, 2\")\n",
"T2_results = evaluation(combined_loader, model)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "3b2b9885",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T05:26:10.536885Z",
"iopub.status.busy": "2024-10-19T05:26:10.536539Z",
"iopub.status.idle": "2024-10-19T05:27:46.811167Z",
"shell.execute_reply": "2024-10-19T05:27:46.810178Z"
},
"id": "Po4MRsUOlo7q",
"papermill": {
"duration": 97.127076,
"end_time": "2024-10-19T05:27:46.813299",
"exception": false,
"start_time": "2024-10-19T05:26:09.686223",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Task 1, 2 and 3\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 252/252 [00:51<00:00, 4.87it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" allergy_name 0.60 0.57 0.58 182\n",
" cancer 0.62 0.60 0.61 1091\n",
"chronic_disease 0.61 0.62 0.61 3242\n",
" treatment 0.56 0.71 0.63 3530\n",
"\n",
" micro avg 0.59 0.65 0.62 8045\n",
" macro avg 0.60 0.62 0.61 8045\n",
" weighted avg 0.59 0.65 0.62 8045\n",
"\n",
"****************************************\n",
"Entity-wise F1 scores:\n",
"treatment: 0.6263\n",
"chronic_disease: 0.6110\n",
"cancer: 0.6101\n",
"allergy_name: 0.5803\n",
"Weighted Average F1 score: 0.6169\n"
]
}
],
"source": [
"## Task 3\n",
"print(\"Task 1, 2 and 3\")\n",
"\n",
"model = BertForTokenClassification.from_pretrained(\"bert-base-cased\", num_labels=len(label_map))\n",
"model.load_state_dict(torch.load('model_weights3.pth', weights_only=True))\n",
"model.to(device)\n",
"\n",
"T3_results = evaluation(combined_test_loader_123, model)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "c2fbeac6",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T05:27:48.544865Z",
"iopub.status.busy": "2024-10-19T05:27:48.544000Z",
"iopub.status.idle": "2024-10-19T05:29:24.151053Z",
"shell.execute_reply": "2024-10-19T05:29:24.149945Z"
},
"id": "0ckKGcchlo7q",
"papermill": {
"duration": 96.45821,
"end_time": "2024-10-19T05:29:24.153095",
"exception": false,
"start_time": "2024-10-19T05:27:47.694885",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Task 4 - G1+G2+G3\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 252/252 [00:51<00:00, 4.93it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" allergy_name 0.46 0.34 0.39 182\n",
" cancer 0.62 0.64 0.63 1091\n",
"chronic_disease 0.59 0.57 0.58 3242\n",
" treatment 0.58 0.49 0.53 3530\n",
"\n",
" micro avg 0.59 0.54 0.56 8045\n",
" macro avg 0.56 0.51 0.53 8045\n",
" weighted avg 0.59 0.54 0.56 8045\n",
"\n",
"****************************************\n",
"Entity-wise F1 scores:\n",
"treatment: 0.5326\n",
"chronic_disease: 0.5763\n",
"cancer: 0.6279\n",
"allergy_name: 0.3861\n",
"Weighted Average F1 score: 0.5598\n"
]
}
],
"source": [
"## Task 3\n",
"print(\"Task 4 - G1+G2+G3\")\n",
"\n",
"model = BertForTokenClassification.from_pretrained(\"bert-base-cased\", num_labels=len(label_map))\n",
"model.load_state_dict(torch.load('model_weights4.pth', weights_only=True))\n",
"model.to(device)\n",
"\n",
"T4_results = evaluation(combined_test_loader_123, model)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "3ae6d10a",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T05:29:25.926295Z",
"iopub.status.busy": "2024-10-19T05:29:25.925448Z",
"iopub.status.idle": "2024-10-19T05:29:25.931491Z",
"shell.execute_reply": "2024-10-19T05:29:25.930640Z"
},
"id": "SCSe8E_CVPdW",
"papermill": {
"duration": 0.870674,
"end_time": "2024-10-19T05:29:25.933385",
"exception": false,
"start_time": "2024-10-19T05:29:25.062711",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def get_all_scores(results):\n",
" score_dict = {'Weighted Average':[]}\n",
" for result in results:\n",
" for entity, score in result[0].items():\n",
" score_dict[entity] = score_dict.get(entity,[])\n",
" score_dict[entity].append(score)\n",
" score_dict['Weighted Average'].append(result[1])\n",
" score_df = pd.DataFrame(score_dict)\n",
" return score_df"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "7606e6b5",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T05:29:27.703993Z",
"iopub.status.busy": "2024-10-19T05:29:27.703600Z",
"iopub.status.idle": "2024-10-19T05:29:27.717788Z",
"shell.execute_reply": "2024-10-19T05:29:27.716931Z"
},
"id": "VOX78HFplo7r",
"papermill": {
"duration": 0.880314,
"end_time": "2024-10-19T05:29:27.719798",
"exception": false,
"start_time": "2024-10-19T05:29:26.839484",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Performance on the test set of T1 | \n",
" Performance on the test set of T1 and T2. | \n",
" Performance on the test set of T1, T2 and T3. | \n",
" Performance on combined G1+G2+G3 | \n",
"
\n",
" \n",
" \n",
" \n",
" Weighted Average | \n",
" 0.506789 | \n",
" 0.614178 | \n",
" 0.616918 | \n",
" 0.559810 | \n",
"
\n",
" \n",
" treatment | \n",
" 0.541501 | \n",
" 0.613997 | \n",
" 0.626349 | \n",
" 0.532574 | \n",
"
\n",
" \n",
" chronic_disease | \n",
" 0.465672 | \n",
" 0.631210 | \n",
" 0.611009 | \n",
" 0.576314 | \n",
"
\n",
" \n",
" cancer | \n",
" 0.576485 | \n",
" 0.585170 | \n",
" 0.610075 | \n",
" 0.627876 | \n",
"
\n",
" \n",
" allergy_name | \n",
" 0.029851 | \n",
" 0.470000 | \n",
" 0.580282 | \n",
" 0.386076 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Performance on the test set of T1 \\\n",
"Weighted Average 0.506789 \n",
"treatment 0.541501 \n",
"chronic_disease 0.465672 \n",
"cancer 0.576485 \n",
"allergy_name 0.029851 \n",
"\n",
" Performance on the test set of T1 and T2. \\\n",
"Weighted Average 0.614178 \n",
"treatment 0.613997 \n",
"chronic_disease 0.631210 \n",
"cancer 0.585170 \n",
"allergy_name 0.470000 \n",
"\n",
" Performance on the test set of T1, T2 and T3. \\\n",
"Weighted Average 0.616918 \n",
"treatment 0.626349 \n",
"chronic_disease 0.611009 \n",
"cancer 0.610075 \n",
"allergy_name 0.580282 \n",
"\n",
" Performance on combined G1+G2+G3 \n",
"Weighted Average 0.559810 \n",
"treatment 0.532574 \n",
"chronic_disease 0.576314 \n",
"cancer 0.627876 \n",
"allergy_name 0.386076 "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_scores_df = get_all_scores([T1_results, T2_results, T3_results, T4_results]).T\n",
"all_scores_df.columns = [\"Performance on the test set of T1\",\"Performance on the test set of T1 and T2.\",\"Performance on the test set of T1, T2 and T3.\",\"Performance on combined G1+G2+G3\"]\n",
"all_scores_df"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "73b43a29",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-19T05:29:29.496012Z",
"iopub.status.busy": "2024-10-19T05:29:29.495622Z",
"iopub.status.idle": "2024-10-19T05:29:29.504783Z",
"shell.execute_reply": "2024-10-19T05:29:29.503948Z"
},
"id": "enLklxLYYJ3f",
"papermill": {
"duration": 0.876876,
"end_time": "2024-10-19T05:29:29.506741",
"exception": false,
"start_time": "2024-10-19T05:29:28.629865",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"all_scores_df.to_csv('all_scores_df.csv')"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kaggle": {
"accelerator": "nvidiaTeslaT4",
"dataSources": [
{
"datasetId": 5886347,
"sourceId": 9639798,
"sourceType": "datasetVersion"
},
{
"datasetId": 5902909,
"sourceId": 9661596,
"sourceType": "datasetVersion"
}
],
"dockerImageVersionId": 30787,
"isGpuEnabled": true,
"isInternetEnabled": true,
"language": "python",
"sourceType": "notebook"
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
},
"papermill": {
"default_parameters": {},
"duration": 7349.251517,
"end_time": "2024-10-19T05:29:33.328399",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2024-10-19T03:27:04.076882",
"version": "2.6.0"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {
"06302581b083456b9c3c8c963644a989": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"06b337d1462e48cc95af610666b09666": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_3017f343371943908ea25ee280c2e9bd",
"IPY_MODEL_62d2ee84b5744a04a3b748dfe732eded",
"IPY_MODEL_54ec13be30554926b7976e543d094425"
],
"layout": "IPY_MODEL_4f1072a412db40419f85a09ae42f2bb9"
}
},
"09f90ce81bd64eccb1a1518a1faff7e9": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1493273896304bfd9899e71216b42ab7": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"18cf30eccdc84df3b4781cf1fbbb78a3": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8153dad620e640b0a9b5604febd27d64",
"max": 435755784.0,
"min": 0.0,
"orientation": "horizontal",
"style": "IPY_MODEL_e4f6862c274747b39bf143bcf9fe97b0",
"value": 435755784.0
}
},
"1ac46f0a3b5f40beb8aeffc492755fdf": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3fa198b1bbd74d3ca808db47c6bcfb91",
"placeholder": "",
"style": "IPY_MODEL_d495045efcf744dca3d51dec98f0734e",
"value": " 436M/436M [00:02<00:00, 262MB/s]"
}
},
"231a5bb115b342c48e4c4fd2eeb44857": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"2b8d2f901ab243318b5cfbeff8f25eee": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2b94730544a24657b6119ca3cd72860a": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c6b9c36b4d134c6f87726559c57d3ae1",
"placeholder": "",
"style": "IPY_MODEL_f52def1ec17b412cb60d44beef8080ee",
"value": "vocab.txt: 100%"
}
},
"2dc678e82fa64c48a790e3d819348391": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"2e626c68a5ab4b91ac0c52cc2b81508e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3017f343371943908ea25ee280c2e9bd": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f1f1e7a622044ef68a1ce1a241e002ac",
"placeholder": "",
"style": "IPY_MODEL_2e626c68a5ab4b91ac0c52cc2b81508e",
"value": "config.json: 100%"
}
},
"3696fef0fada456b8cb387f1653215be": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3fa198b1bbd74d3ca808db47c6bcfb91": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4293c9517ab64783bb8dc7683e103b98": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"4b8b04381c264cd19a7217e3e5f2c6fb": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_85a6f29eed654e609c714e833d490760",
"placeholder": "",
"style": "IPY_MODEL_9318bbe7299a4facb4089b2b3f03f7e4",
"value": "model.safetensors: 100%"
}
},
"4df732b79c4d44f6ab297c7ebe4b8f7b": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_ccc45da15caf4419851727e39bb5a547",
"IPY_MODEL_f9110544b8c4435bb3ee6320eb0c11ea",
"IPY_MODEL_caf8825fcd1f416e927d41b783f20e7d"
],
"layout": "IPY_MODEL_1493273896304bfd9899e71216b42ab7"
}
},
"4f1072a412db40419f85a09ae42f2bb9": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"54ec13be30554926b7976e543d094425": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_2b8d2f901ab243318b5cfbeff8f25eee",
"placeholder": "",
"style": "IPY_MODEL_f3186d2a3d9f4c22be1d0f110c50efd0",
"value": " 570/570 [00:00<00:00, 49.8kB/s]"
}
},
"62d2ee84b5744a04a3b748dfe732eded": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_fd467c9c41be4d29995c699b75b81cd5",
"max": 570.0,
"min": 0.0,
"orientation": "horizontal",
"style": "IPY_MODEL_658373fa07ac408a89ed555b0a840a14",
"value": 570.0
}
},
"658373fa07ac408a89ed555b0a840a14": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"74b541919b784641a8526b4ca1d0a404": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"7945c90c7d474b51abd266091f901952": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"81527808acc0472cbdd51e7c6420ce8a": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8153dad620e640b0a9b5604febd27d64": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"85a6f29eed654e609c714e833d490760": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"87a61e94a64a4c488860663c7a17d964": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9318bbe7299a4facb4089b2b3f03f7e4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"a66f9039603d4070893769f603c55b88": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"a89ad9a1b1164c17912bbfd6a1c671bf": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ad070181b22f49c5a6c3ae0caa55d3b8": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_81527808acc0472cbdd51e7c6420ce8a",
"max": 213450.0,
"min": 0.0,
"orientation": "horizontal",
"style": "IPY_MODEL_2dc678e82fa64c48a790e3d819348391",
"value": 213450.0
}
},
"aeb890f8c9b6434ba3feda619ccd3b07": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a89ad9a1b1164c17912bbfd6a1c671bf",
"placeholder": "",
"style": "IPY_MODEL_7945c90c7d474b51abd266091f901952",
"value": "tokenizer_config.json: 100%"
}
},
"af620a1558ad48f0bf24a24e7b0d7e3c": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b1a021dcde9240dcb0c2be5725a96c59": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b659c0a2dd244c6d9313eeba2ec38298": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c625e09aef47433894e0d121efcaa2e6": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c6b9c36b4d134c6f87726559c57d3ae1": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"caf8825fcd1f416e927d41b783f20e7d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_09f90ce81bd64eccb1a1518a1faff7e9",
"placeholder": "",
"style": "IPY_MODEL_4293c9517ab64783bb8dc7683e103b98",
"value": " 436k/436k [00:00<00:00, 5.96MB/s]"
}
},
"cbe1d6d4f28b4db8b41c24c8d6ea7179": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ccc45da15caf4419851727e39bb5a547": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c625e09aef47433894e0d121efcaa2e6",
"placeholder": "",
"style": "IPY_MODEL_231a5bb115b342c48e4c4fd2eeb44857",
"value": "tokenizer.json: 100%"
}
},
"ccf8ab04c1bf49c08a02749a5bf94090": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d3e6ad5135884663b0d44b60f4981af4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cbe1d6d4f28b4db8b41c24c8d6ea7179",
"max": 49.0,
"min": 0.0,
"orientation": "horizontal",
"style": "IPY_MODEL_eb488c1b67674794a5b860fe2cc56dae",
"value": 49.0
}
},
"d495045efcf744dca3d51dec98f0734e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"db6042c77cbd4db0bca2331c558256d8": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_2b94730544a24657b6119ca3cd72860a",
"IPY_MODEL_ad070181b22f49c5a6c3ae0caa55d3b8",
"IPY_MODEL_dd2b300b9e5a4a5e8d7d781fc66e1436"
],
"layout": "IPY_MODEL_af620a1558ad48f0bf24a24e7b0d7e3c"
}
},
"dc8efdeeb86448f1ab80d1d7ca7af5ca": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_4b8b04381c264cd19a7217e3e5f2c6fb",
"IPY_MODEL_18cf30eccdc84df3b4781cf1fbbb78a3",
"IPY_MODEL_1ac46f0a3b5f40beb8aeffc492755fdf"
],
"layout": "IPY_MODEL_b1a021dcde9240dcb0c2be5725a96c59"
}
},
"dd2b300b9e5a4a5e8d7d781fc66e1436": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_06302581b083456b9c3c8c963644a989",
"placeholder": "",
"style": "IPY_MODEL_ccf8ab04c1bf49c08a02749a5bf94090",
"value": " 213k/213k [00:00<00:00, 4.07MB/s]"
}
},
"e4f6862c274747b39bf143bcf9fe97b0": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"e6853cc7a3cb439ba6fb13a4e1d68825": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b659c0a2dd244c6d9313eeba2ec38298",
"placeholder": "",
"style": "IPY_MODEL_74b541919b784641a8526b4ca1d0a404",
"value": " 49.0/49.0 [00:00<00:00, 3.94kB/s]"
}
},
"e9fbb901478541afaa5e4331803f95a5": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_aeb890f8c9b6434ba3feda619ccd3b07",
"IPY_MODEL_d3e6ad5135884663b0d44b60f4981af4",
"IPY_MODEL_e6853cc7a3cb439ba6fb13a4e1d68825"
],
"layout": "IPY_MODEL_87a61e94a64a4c488860663c7a17d964"
}
},
"eb488c1b67674794a5b860fe2cc56dae": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"f1f1e7a622044ef68a1ce1a241e002ac": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f3186d2a3d9f4c22be1d0f110c50efd0": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f52def1ec17b412cb60d44beef8080ee": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f9110544b8c4435bb3ee6320eb0c11ea": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3696fef0fada456b8cb387f1653215be",
"max": 435797.0,
"min": 0.0,
"orientation": "horizontal",
"style": "IPY_MODEL_a66f9039603d4070893769f603c55b88",
"value": 435797.0
}
},
"fd467c9c41be4d29995c699b75b81cd5": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
}
},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}