,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=[],\n",
"resume_from_checkpoint=None,\n",
"run_name=/content/artifacts/checkpoints,\n",
"save_on_each_node=False,\n",
"save_safetensors=False,\n",
"save_steps=50,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
"torch_compile_backend=None,\n",
"torch_compile_mode=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_cpu=False,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.02,\n",
"warmup_steps=100,\n",
"weight_decay=0.0,\n",
")"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from utils import parse_training_args\n",
"\n",
"training_args = parse_training_args(params)\n",
"training_args"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2ae3e5f9-e28e-457b-b6bf-a62a472241bf",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T04:16:30.856558Z",
"iopub.status.busy": "2023-10-23T04:16:30.845583Z",
"iopub.status.idle": "2023-10-23T04:16:30.859550Z",
"shell.execute_reply": "2023-10-23T04:16:30.858918Z"
},
"papermill": {
"duration": 1.039895,
"end_time": "2023-10-23T04:16:30.861071",
"exception": false,
"start_time": "2023-10-23T04:16:29.821176",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# data = data[\"train\"].train_test_split(test_size=0.1)\n",
"# data\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5bc91439-6108-445c-8f85-e6558c9f0677",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T04:16:32.873189Z",
"iopub.status.busy": "2023-10-23T04:16:32.872448Z",
"iopub.status.idle": "2023-10-23T04:16:33.145627Z",
"shell.execute_reply": "2023-10-23T04:16:33.144802Z"
},
"papermill": {
"duration": 1.290055,
"end_time": "2023-10-23T04:16:33.147320",
"exception": false,
"start_time": "2023-10-23T04:16:31.857265",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! mkdir -p {trained_model_path_lora}"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "b33e407a-9d4f-49f6-a74b-b80db8cc3a8a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T04:16:36.127583Z",
"iopub.status.busy": "2023-10-23T04:16:36.126817Z",
"iopub.status.idle": "2023-10-23T07:07:47.130996Z",
"shell.execute_reply": "2023-10-23T07:07:47.130335Z"
},
"papermill": {
"duration": 10272.969761,
"end_time": "2023-10-23T07:07:47.132555",
"exception": false,
"start_time": "2023-10-23T04:16:34.162794",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [2370/2370 2:51:03, Epoch 2/3]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 50 | \n",
" 0.881200 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.341200 | \n",
"
\n",
" \n",
" 150 | \n",
" 0.178000 | \n",
"
\n",
" \n",
" 200 | \n",
" 0.138400 | \n",
"
\n",
" \n",
" 250 | \n",
" 0.104300 | \n",
"
\n",
" \n",
" 300 | \n",
" 0.085100 | \n",
"
\n",
" \n",
" 350 | \n",
" 0.070900 | \n",
"
\n",
" \n",
" 400 | \n",
" 0.059100 | \n",
"
\n",
" \n",
" 450 | \n",
" 0.054200 | \n",
"
\n",
" \n",
" 500 | \n",
" 0.052800 | \n",
"
\n",
" \n",
" 550 | \n",
" 0.049400 | \n",
"
\n",
" \n",
" 600 | \n",
" 0.046500 | \n",
"
\n",
" \n",
" 650 | \n",
" 0.041700 | \n",
"
\n",
" \n",
" 700 | \n",
" 0.044300 | \n",
"
\n",
" \n",
" 750 | \n",
" 0.043600 | \n",
"
\n",
" \n",
" 800 | \n",
" 0.042000 | \n",
"
\n",
" \n",
" 850 | \n",
" 0.035900 | \n",
"
\n",
" \n",
" 900 | \n",
" 0.038100 | \n",
"
\n",
" \n",
" 950 | \n",
" 0.033700 | \n",
"
\n",
" \n",
" 1000 | \n",
" 0.033300 | \n",
"
\n",
" \n",
" 1050 | \n",
" 0.033800 | \n",
"
\n",
" \n",
" 1100 | \n",
" 0.033500 | \n",
"
\n",
" \n",
" 1150 | \n",
" 0.032800 | \n",
"
\n",
" \n",
" 1200 | \n",
" 0.033500 | \n",
"
\n",
" \n",
" 1250 | \n",
" 0.031600 | \n",
"
\n",
" \n",
" 1300 | \n",
" 0.033600 | \n",
"
\n",
" \n",
" 1350 | \n",
" 0.032900 | \n",
"
\n",
" \n",
" 1400 | \n",
" 0.029600 | \n",
"
\n",
" \n",
" 1450 | \n",
" 0.033000 | \n",
"
\n",
" \n",
" 1500 | \n",
" 0.032800 | \n",
"
\n",
" \n",
" 1550 | \n",
" 0.032300 | \n",
"
\n",
" \n",
" 1600 | \n",
" 0.030600 | \n",
"
\n",
" \n",
" 1650 | \n",
" 0.025900 | \n",
"
\n",
" \n",
" 1700 | \n",
" 0.027000 | \n",
"
\n",
" \n",
" 1750 | \n",
" 0.027400 | \n",
"
\n",
" \n",
" 1800 | \n",
" 0.025700 | \n",
"
\n",
" \n",
" 1850 | \n",
" 0.025400 | \n",
"
\n",
" \n",
" 1900 | \n",
" 0.026400 | \n",
"
\n",
" \n",
" 1950 | \n",
" 0.025500 | \n",
"
\n",
" \n",
" 2000 | \n",
" 0.026300 | \n",
"
\n",
" \n",
" 2050 | \n",
" 0.025600 | \n",
"
\n",
" \n",
" 2100 | \n",
" 0.026500 | \n",
"
\n",
" \n",
" 2150 | \n",
" 0.025600 | \n",
"
\n",
" \n",
" 2200 | \n",
" 0.026000 | \n",
"
\n",
" \n",
" 2250 | \n",
" 0.026500 | \n",
"
\n",
" \n",
" 2300 | \n",
" 0.025700 | \n",
"
\n",
" \n",
" 2350 | \n",
" 0.025800 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=2370, training_loss=0.06678998734377607, metrics={'train_runtime': 10270.6027, 'train_samples_per_second': 0.924, 'train_steps_per_second': 0.231, 'total_flos': 2.160162583196713e+17, 'train_loss': 0.06678998734377607, 'epoch': 3.0})"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trainer = transformers.Trainer(\n",
" model=model,\n",
" train_dataset=data[\"train\"],\n",
"# eval_dataset=data[\"test\"],\n",
" args=training_args,\n",
" data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
")\n",
"model.config.use_cache = False # silence the warnings. Please re-enable for inference!\n",
"\n",
"checkpoint_path = Path(\"/content/artifacts/checkpoints\")\n",
"\n",
"# Only set resume_from_checkpoint True when directory exists and contains files\n",
"resume_from_checkpoint = checkpoint_path.is_dir() and any(checkpoint_path.iterdir())\n",
"if resume_from_checkpoint:\n",
" print(\"Resuming from checkpoint:\", list(checkpoint_path.rglob(\"\")))\n",
"trainer.train(resume_from_checkpoint=resume_from_checkpoint)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "172e47a7-400e-4f82-a5e3-38135ecf532f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T07:07:49.427665Z",
"iopub.status.busy": "2023-10-23T07:07:49.427050Z",
"iopub.status.idle": "2023-10-23T07:08:07.740366Z",
"shell.execute_reply": "2023-10-23T07:08:07.739680Z"
},
"papermill": {
"duration": 19.377847,
"end_time": "2023-10-23T07:08:07.742055",
"exception": false,
"start_time": "2023-10-23T07:07:48.364208",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"PeftModelForCausalLM(\n",
" (base_model): LoraModel(\n",
" (model): LlamaForCausalLM(\n",
" (model): LlamaModel(\n",
" (embed_tokens): ModulesToSaveWrapper(\n",
" (original_module): Embedding(32001, 4096)\n",
" (modules_to_save): ModuleDict(\n",
" (default): Embedding(32001, 4096)\n",
" )\n",
" )\n",
" (layers): ModuleList(\n",
" (0-31): 32 x LlamaDecoderLayer(\n",
" (self_attn): LlamaAttention(\n",
" (q_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (k_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (v_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (o_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (rotary_emb): LlamaRotaryEmbedding()\n",
" )\n",
" (mlp): LlamaMLP(\n",
" (gate_proj): Linear(\n",
" in_features=4096, out_features=11008, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=11008, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (up_proj): Linear(\n",
" in_features=4096, out_features=11008, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=11008, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (down_proj): Linear(\n",
" in_features=11008, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=11008, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): LlamaRMSNorm()\n",
" (post_attention_layernorm): LlamaRMSNorm()\n",
" )\n",
" )\n",
" (norm): LlamaRMSNorm()\n",
" )\n",
" (lm_head): ModulesToSaveWrapper(\n",
" (original_module): Linear(in_features=4096, out_features=32001, bias=False)\n",
" (modules_to_save): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=32001, bias=False)\n",
" )\n",
" )\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.save_pretrained(trained_model_path_lora)\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "dea4e68e-57a7-48bd-bad9-f03dfe3f8a06",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T07:08:09.719819Z",
"iopub.status.busy": "2023-10-23T07:08:09.719055Z",
"iopub.status.idle": "2023-10-23T07:08:09.968284Z",
"shell.execute_reply": "2023-10-23T07:08:09.967347Z"
},
"papermill": {
"duration": 1.229019,
"end_time": "2023-10-23T07:08:09.969828",
"exception": false,
"start_time": "2023-10-23T07:08:08.740809",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 1.2G\r\n",
" 512 -rw-r--r-- 1 root 3003 88 Oct 23 07:07 README.md\r\n",
"1.0K -rw-r--r-- 1 root 3003 550 Oct 23 07:08 adapter_config.json\r\n",
"1.2G -rw-r--r-- 1 root 3003 1.2G Oct 23 07:07 adapter_model.bin\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -lash {trained_model_path_lora}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "09db36b7-ead6-4368-9bfb-13ba1ba800a5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T07:08:11.940246Z",
"iopub.status.busy": "2023-10-23T07:08:11.939444Z",
"iopub.status.idle": "2023-10-23T07:09:04.484842Z",
"shell.execute_reply": "2023-10-23T07:09:04.484162Z"
},
"papermill": {
"duration": 54.728628,
"end_time": "2023-10-23T07:09:05.635793",
"exception": false,
"start_time": "2023-10-23T07:08:10.907165",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"LlamaForCausalLM(\n",
" (model): LlamaModel(\n",
" (embed_tokens): Embedding(32001, 4096)\n",
" (layers): ModuleList(\n",
" (0-31): 32 x LlamaDecoderLayer(\n",
" (self_attn): LlamaAttention(\n",
" (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (rotary_emb): LlamaRotaryEmbedding()\n",
" )\n",
" (mlp): LlamaMLP(\n",
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): LlamaRMSNorm()\n",
" (post_attention_layernorm): LlamaRMSNorm()\n",
" )\n",
" )\n",
" (norm): LlamaRMSNorm()\n",
" )\n",
" (lm_head): Linear(in_features=4096, out_features=32001, bias=False)\n",
")"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = model.merge_and_unload().half()\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "270a9a72-3a12-4d83-aa7d-2d167cb28cb4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T07:09:07.731540Z",
"iopub.status.busy": "2023-10-23T07:09:07.730902Z",
"iopub.status.idle": "2023-10-23T07:09:07.975280Z",
"shell.execute_reply": "2023-10-23T07:09:07.974458Z"
},
"papermill": {
"duration": 1.355032,
"end_time": "2023-10-23T07:09:07.976846",
"exception": false,
"start_time": "2023-10-23T07:09:06.621814",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 0\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 23 04:16 checkpoints\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 23 04:16 lora\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 23 04:10 src\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -l {trained_model_path}"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "260e9d79-6eb8-4516-bf8f-825a25606391",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T07:09:09.990340Z",
"iopub.status.busy": "2023-10-23T07:09:09.989655Z",
"iopub.status.idle": "2023-10-23T07:11:33.903117Z",
"shell.execute_reply": "2023-10-23T07:11:33.902350Z"
},
"papermill": {
"duration": 145.986999,
"end_time": "2023-10-23T07:11:34.968252",
"exception": false,
"start_time": "2023-10-23T07:09:08.981253",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"('/content/artifacts/tokenizer_config.json',\n",
" '/content/artifacts/special_tokens_map.json',\n",
" '/content/artifacts/tokenizer.model',\n",
" '/content/artifacts/added_tokens.json',\n",
" '/content/artifacts/tokenizer.json')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.save_pretrained(trained_model_path)\n",
"tokenizer.save_pretrained(trained_model_path)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "6d90a920-fb22-4291-8466-411ff41e31be",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T07:11:36.839690Z",
"iopub.status.busy": "2023-10-23T07:11:36.838894Z",
"iopub.status.idle": "2023-10-23T07:11:37.088096Z",
"shell.execute_reply": "2023-10-23T07:11:37.087230Z"
},
"papermill": {
"duration": 1.198205,
"end_time": "2023-10-23T07:11:37.089762",
"exception": false,
"start_time": "2023-10-23T07:11:35.891557",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 13G\r\n",
" 512 -rw-r--r-- 1 root 3003 21 Oct 23 07:11 added_tokens.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 23 04:16 checkpoints\r\n",
"1.0K -rw-r--r-- 1 root 3003 648 Oct 23 07:09 config.json\r\n",
" 512 -rw-r--r-- 1 root 3003 183 Oct 23 07:09 generation_config.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 23 04:16 lora\r\n",
"9.3G -rw-r--r-- 1 root 3003 9.3G Oct 23 07:09 pytorch_model-00001-of-00002.bin\r\n",
"3.3G -rw-r--r-- 1 root 3003 3.3G Oct 23 07:11 pytorch_model-00002-of-00002.bin\r\n",
" 24K -rw-r--r-- 1 root 3003 24K Oct 23 07:11 pytorch_model.bin.index.json\r\n",
"1.0K -rw-r--r-- 1 root 3003 552 Oct 23 07:11 special_tokens_map.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 23 04:10 src\r\n",
"1.8M -rw-r--r-- 1 root 3003 1.8M Oct 23 07:11 tokenizer.json\r\n",
"489K -rw-r--r-- 1 root 3003 489K Oct 23 07:11 tokenizer.model\r\n",
"1.5K -rw-r--r-- 1 root 3003 1.1K Oct 23 07:11 tokenizer_config.json\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -lash {trained_model_path}"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "202a694a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-23T07:11:39.015703Z",
"iopub.status.busy": "2023-10-23T07:11:39.014885Z"
},
"papermill": {
"duration": null,
"end_time": null,
"exception": false,
"start_time": "2023-10-23T07:11:38.011529",
"status": "running"
},
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "06408c12de9a45139bdafb067bc717dd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"pytorch_model-00002-of-00002.bin: 0%| | 0.00/3.50G [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "10ecb48e9cad4d3fa8ac2f9964ce90fe",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Upload 2 LFS files: 0%| | 0/2 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8e122f8d4b30478cb39b5502cd6323de",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"pytorch_model-00001-of-00002.bin: 0%| | 0.00/9.98G [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from huggingface_hub import HfApi\n",
"import shutil\n",
"\n",
"tokenizer_model_path_base = Path(model_path) / \"tokenizer.model\"\n",
"tokenizer_model_path_trained = Path(trained_model_path) / \"tokenizer.model\"\n",
"if tokenizer_model_path_base.exists() and not tokenizer_model_path_trained.exists():\n",
" shutil.copy(tokenizer_model_path_base, tokenizer_model_path_trained)\n",
"\n",
"repo_id = params.get(\"push_to_hub\")\n",
"if repo_id:\n",
" model.push_to_hub(repo_id)\n",
" tokenizer.push_to_hub(repo_id)\n",
" hf_api = HfApi()\n",
" # Upload tokenizer.model if it was in base model\n",
" if tokenizer_model_path_base.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=tokenizer_model_path_base,\n",
" path_in_repo=tokenizer_model_path_base.name,\n",
" repo_id=repo_id,\n",
" )\n",
" logs_path = Path(\"/content/artifacts/src/train.ipynb\")\n",
" if logs_path.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=logs_path,\n",
" path_in_repo=logs_path.name,\n",
" repo_id=repo_id,\n",
" )\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"papermill": {
"default_parameters": {},
"duration": null,
"end_time": null,
"environment_variables": {},
"exception": null,
"input_path": "/content/src/train.ipynb",
"output_path": "/content/artifacts/src/train.ipynb",
"parameters": {},
"start_time": "2023-10-23T04:10:29.401501",
"version": "2.4.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}