{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "b12ae8a3-9e08-402c-894c-31697fad6c56", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "eab00695e2b240ffb58ab998c85c0e7d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='
\n", " \n", " \n", " [ 7971/30600 16:14 < 46:07, 8.18 it/s, Epoch 26.05/100]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation Loss
1No log0.085583
20.1041000.081926
30.1041000.079540
40.0919000.078066
50.0876000.076963
60.0876000.075823
70.0845000.075087
80.0845000.075002
90.0830000.073672
100.0809000.073238
110.0809000.072717
120.0802000.072234
130.0802000.071684
140.0797000.072137
150.0780000.071143
160.0780000.070724
170.0765000.070303
180.0774000.069888
190.0774000.069760
200.0762000.069610
210.0762000.069183
220.0756000.069061
230.0756000.068791
240.0756000.068658
250.0750000.068027
260.0750000.068032

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model = ConsistentSentenceRegressor(\n", " freeze_bert=True)\n", "\n", "training_args = TrainingArguments(\n", " output_dir=\".\",\n", " learning_rate=1e-5,\n", " per_device_train_batch_size=64,\n", " num_train_epochs=100,\n", " weight_decay=0.02,\n", " evaluation_strategy=\"epoch\",\n", " eval_accumulation_steps=1,\n", " save_strategy=\"epoch\",\n", " load_best_model_at_end=True,\n", " push_to_hub=True,\n", ")\n", "\n", "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=tokenized_dataset[\"train\"],\n", " eval_dataset=tokenized_dataset[\"test\"],\n", " tokenizer=tokenizer,\n", " data_collator=data_collator,\n", ")\n", "\n", "trainer.train()\n", "trainer.push_to_hub('factual-consistency-regression-ja')" ] }, { "cell_type": "code", "execution_count": null, "id": "a6eb93f7-5a38-49a2-be0d-e42267e23a0a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "3638c8d8-fc85-4caf-83a4-4fd2ad6fb95d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "environment": { "kernel": "python3", "name": "pytorch-gpu.2-0.m112", "type": "gcloud", "uri": "gcr.io/deeplearning-platform-release/pytorch-gpu.2-0:m112" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }