{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "fac5022c-df54-4824-8c57-98fe045372fd", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-09-02 15:52:58.146880: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 692 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:18:00.0, compute capability: 7.5\n", "2024-09-02 15:52:58.148409: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13673 MB memory: -> device: 1, name: Tesla T4, pci bus id: 0000:19:00.0, compute capability: 7.5\n", "2024-09-02 15:52:58.149801: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 13673 MB memory: -> device: 2, name: Tesla T4, pci bus id: 0000:35:00.0, compute capability: 7.5\n", "2024-09-02 15:52:58.151206: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 13673 MB memory: -> device: 3, name: Tesla T4, pci bus id: 0000:36:00.0, compute capability: 7.5\n", "2024-09-02 15:52:58.152545: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:4 with 13673 MB memory: -> device: 4, name: Tesla T4, pci bus id: 0000:e7:00.0, compute capability: 7.5\n", "2024-09-02 15:52:58.153906: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:5 with 13673 MB memory: -> device: 5, name: Tesla T4, pci bus id: 0000:e8:00.0, compute capability: 7.5\n", "2024-09-02 15:52:58.155220: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:6 with 13673 MB memory: -> device: 6, name: Tesla T4, pci bus id: 0000:f4:00.0, compute capability: 7.5\n", "2024-09-02 15:52:58.156627: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:7 with 13673 MB memory: -> device: 7, name: Tesla T4, pci bus id: 0000:f5:00.0, compute capability: 7.5\n", "2024-09-02 15:53:08.871520: W external/local_tsl/tsl/framework/bfc_allocator.cc:482] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.95GiB (rounded to 2097152000)requested by op StatelessRandomNormalV2\n", "If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. \n", "Current allocation summary follows.\n", "Current allocation summary follows.\n", "2024-09-02 15:53:08.871548: I external/local_tsl/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc\n", "2024-09-02 15:53:08.871559: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (256): \tTotal Chunks: 6, Chunks in use: 6. 1.5KiB allocated for chunks. 1.5KiB in use in bin. 48B client-requested in use in bin.\n", "2024-09-02 15:53:08.871566: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (512): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871572: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (1024): \tTotal Chunks: 1, Chunks in use: 1. 1.2KiB allocated for chunks. 1.2KiB in use in bin. 1.0KiB client-requested in use in bin.\n", "2024-09-02 15:53:08.871577: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (2048): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871582: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (4096): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871586: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (8192): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871591: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (16384): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871596: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (32768): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871600: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (65536): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871605: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (131072): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871609: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (262144): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871613: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (524288): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871618: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (1048576): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871622: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (2097152): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871627: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (4194304): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871631: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (8388608): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871636: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (16777216): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871640: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (33554432): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871645: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (67108864): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871650: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (134217728): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871656: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (268435456): \tTotal Chunks: 1, Chunks in use: 0. 692.62MiB allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n", "2024-09-02 15:53:08.871662: I external/local_tsl/tsl/framework/bfc_allocator.cc:1062] Bin for 1.95GiB was 256.00MiB, Chunk State: \n", "2024-09-02 15:53:08.871670: I external/local_tsl/tsl/framework/bfc_allocator.cc:1068] Size: 692.62MiB | Requested Size: 0B | in_use: 0 | bin_num: 20, prev: Size: 256B | Requested Size: 16B | in_use: 1 | bin_num: -1\n", "2024-09-02 15:53:08.871674: I external/local_tsl/tsl/framework/bfc_allocator.cc:1075] Next region of size 726269952\n", "2024-09-02 15:53:08.871681: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000000 of size 256 next 1\n", "2024-09-02 15:53:08.871685: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000100 of size 1280 next 2\n", "2024-09-02 15:53:08.871689: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000600 of size 256 next 3\n", "2024-09-02 15:53:08.871692: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000700 of size 256 next 4\n", "2024-09-02 15:53:08.871695: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000800 of size 256 next 5\n", "2024-09-02 15:53:08.871700: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000900 of size 256 next 6\n", "2024-09-02 15:53:08.871703: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000a00 of size 256 next 7\n", "2024-09-02 15:53:08.871708: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] Free at 7f7fac000b00 of size 726267136 next 18446744073709551615\n", "2024-09-02 15:53:08.871713: I external/local_tsl/tsl/framework/bfc_allocator.cc:1100] Summary of in-use Chunks by size: \n", "2024-09-02 15:53:08.871718: I external/local_tsl/tsl/framework/bfc_allocator.cc:1103] 6 Chunks of size 256 totalling 1.5KiB\n", "2024-09-02 15:53:08.871723: I external/local_tsl/tsl/framework/bfc_allocator.cc:1103] 1 Chunks of size 1280 totalling 1.2KiB\n", "2024-09-02 15:53:08.871727: I external/local_tsl/tsl/framework/bfc_allocator.cc:1107] Sum Total of in-use chunks: 2.8KiB\n", "2024-09-02 15:53:08.871732: I external/local_tsl/tsl/framework/bfc_allocator.cc:1109] Total bytes in pool: 726269952 memory_limit_: 726269952 available bytes: 0 curr_region_allocation_bytes_: 1452539904\n", "2024-09-02 15:53:08.871740: I external/local_tsl/tsl/framework/bfc_allocator.cc:1114] Stats: \n", "Limit: 726269952\n", "InUse: 2816\n", "MaxInUse: 2816\n", "NumAllocs: 9\n", "MaxAllocSize: 1280\n", "Reserved: 0\n", "PeakReserved: 0\n", "LargestFreeBlock: 0\n", "\n", "2024-09-02 15:53:08.871746: W external/local_tsl/tsl/framework/bfc_allocator.cc:494] *___________________________________________________________________________________________________\n", "2024-09-02 15:53:08.871766: W tensorflow/core/framework/op_kernel.cc:1840] OP_REQUIRES failed at stateless_random_ops_v2.cc:64 : RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[256000,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n", "2024-09-02 15:53:08.871780: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[256000,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n" ] }, { "ename": "ResourceExhaustedError", "evalue": "{{function_node __wrapped__StatelessRandomNormalV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[256000,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:StatelessRandomNormalV2]", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mResourceExhaustedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[4], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mKAGGLE_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m9a33b6e88bcb6058b1281d777fa6808d\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkeras_nlp\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m gemma_lm \u001b[38;5;241m=\u001b[39m \u001b[43mkeras_nlp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodels\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mGemmaCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_preset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgemma_2b_en\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m gemma_lm\u001b[38;5;241m.\u001b[39mgenerate(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mKeras is a\u001b[39m\u001b[38;5;124m\"\u001b[39m, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m30\u001b[39m)\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/models/task.py:264\u001b[0m, in \u001b[0;36mTask.from_preset\u001b[0;34m(cls, preset, load_weights, **kwargs)\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs:\n\u001b[1;32m 263\u001b[0m config_overrides[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 264\u001b[0m backbone \u001b[38;5;241m=\u001b[39m \u001b[43mbackbone_preset_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_preset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 265\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 266\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_weights\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig_overrides\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig_overrides\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpreprocessor\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs:\n\u001b[1;32m 270\u001b[0m preprocessor \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpreprocessor\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/models/backbone.py:190\u001b[0m, in \u001b[0;36mBackbone.from_preset\u001b[0;34m(cls, preset, load_weights, **kwargs)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(preset_cls, \u001b[38;5;28mcls\u001b[39m):\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 185\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPreset has type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpreset_cls\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` which is not a \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124ma subclass of calling class `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`. Call \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 187\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`from_preset` directly on `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpreset_cls\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 188\u001b[0m )\n\u001b[0;32m--> 190\u001b[0m backbone \u001b[38;5;241m=\u001b[39m \u001b[43mload_serialized_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCONFIG_FILE\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m load_weights:\n\u001b[1;32m 192\u001b[0m jax_memory_cleanup(backbone)\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/utils/preset_utils.py:569\u001b[0m, in \u001b[0;36mload_serialized_object\u001b[0;34m(preset, config_file, config_overrides)\u001b[0m\n\u001b[1;32m 567\u001b[0m config \u001b[38;5;241m=\u001b[39m load_config(preset, config_file)\n\u001b[1;32m 568\u001b[0m config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconfig\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconfig\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_overrides}\n\u001b[0;32m--> 569\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mkeras\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msaving\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdeserialize_keras_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras/src/saving/serialization_lib.py:718\u001b[0m, in \u001b[0;36mdeserialize_keras_object\u001b[0;34m(config, custom_objects, safe_mode, **kwargs)\u001b[0m\n\u001b[1;32m 716\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m custom_obj_scope, safe_mode_scope:\n\u001b[1;32m 717\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 718\u001b[0m instance \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43minner_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 719\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 720\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 721\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m could not be deserialized properly. Please\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 722\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ensure that components that are Python object\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mconfig=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mException encountered: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 727\u001b[0m )\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/models/backbone.py:119\u001b[0m, in \u001b[0;36mBackbone.from_config\u001b[0;34m(cls, config)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_config\u001b[39m(\u001b[38;5;28mcls\u001b[39m, config):\n\u001b[1;32m 117\u001b[0m \u001b[38;5;66;03m# The default `from_config()` for functional models will return a\u001b[39;00m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;66;03m# vanilla `keras.Model`. We override it to get a subclass instance back.\u001b[39;00m\n\u001b[0;32m--> 119\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/models/gemma/gemma_backbone.py:171\u001b[0m, in \u001b[0;36mGemmaBackbone.__init__\u001b[0;34m(self, vocabulary_size, num_layers, num_query_heads, num_key_value_heads, hidden_dim, intermediate_dim, head_dim, query_head_dim_normalize, use_post_ffw_norm, use_post_attention_norm, attention_logit_soft_cap, final_logit_soft_cap, use_sliding_window_attention, sliding_window_size, layer_norm_epsilon, dropout, dtype, **kwargs)\u001b[0m\n\u001b[1;32m 165\u001b[0m token_id_input \u001b[38;5;241m=\u001b[39m keras\u001b[38;5;241m.\u001b[39mInput(\n\u001b[1;32m 166\u001b[0m shape\u001b[38;5;241m=\u001b[39m(\u001b[38;5;28;01mNone\u001b[39;00m,), dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloat32\u001b[39m\u001b[38;5;124m\"\u001b[39m, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtoken_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 167\u001b[0m )\n\u001b[1;32m 168\u001b[0m padding_mask_input \u001b[38;5;241m=\u001b[39m keras\u001b[38;5;241m.\u001b[39mInput(\n\u001b[1;32m 169\u001b[0m shape\u001b[38;5;241m=\u001b[39m(\u001b[38;5;28;01mNone\u001b[39;00m,), dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloat32\u001b[39m\u001b[38;5;124m\"\u001b[39m, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpadding_mask\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 170\u001b[0m )\n\u001b[0;32m--> 171\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtoken_embedding\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken_id_input\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 172\u001b[0m x \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m*\u001b[39m ops\u001b[38;5;241m.\u001b[39mcast(ops\u001b[38;5;241m.\u001b[39msqrt(hidden_dim), x\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[1;32m 173\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m transformer_layer \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransformer_layers:\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 120\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/layers/modeling/reversible_embedding.py:115\u001b[0m, in \u001b[0;36mReversibleEmbedding.build\u001b[0;34m(self, inputs_shape)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mbuild\u001b[39m(\u001b[38;5;28mself\u001b[39m, inputs_shape\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs_shape\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 117\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtie_weights:\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreverse_embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madd_weight(\n\u001b[1;32m 119\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreverse_embeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 120\u001b[0m shape\u001b[38;5;241m=\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_dim, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_dim),\n\u001b[1;32m 121\u001b[0m initializer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membeddings_initializer,\n\u001b[1;32m 122\u001b[0m dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdtype,\n\u001b[1;32m 123\u001b[0m )\n", "\u001b[0;31mResourceExhaustedError\u001b[0m: {{function_node __wrapped__StatelessRandomNormalV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[256000,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:StatelessRandomNormalV2]" ] } ], "source": [ "# Set Kaggle API credentials\n", "import os\n", "os.environ[\"KAGGLE_USERNAME\"] = \"rogerkorantenng\"\n", "os.environ[\"KAGGLE_KEY\"] = \"9a33b6e88bcb6058b1281d777fa6808d\"\n", "import keras_nlp\n", "gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset(\"gemma_2b_en\")\n", "gemma_lm.generate(\"Keras is a\", max_length=30)" ] }, { "cell_type": "code", "execution_count": null, "id": "e5488db1-48b2-4d0c-a4cd-1329a333b3f9", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }