File size: 26,913 Bytes
3506b46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "fac5022c-df54-4824-8c57-98fe045372fd",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-02 15:52:58.146880: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 692 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:18:00.0, compute capability: 7.5\n",
      "2024-09-02 15:52:58.148409: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13673 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:19:00.0, compute capability: 7.5\n",
      "2024-09-02 15:52:58.149801: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 13673 MB memory:  -> device: 2, name: Tesla T4, pci bus id: 0000:35:00.0, compute capability: 7.5\n",
      "2024-09-02 15:52:58.151206: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 13673 MB memory:  -> device: 3, name: Tesla T4, pci bus id: 0000:36:00.0, compute capability: 7.5\n",
      "2024-09-02 15:52:58.152545: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:4 with 13673 MB memory:  -> device: 4, name: Tesla T4, pci bus id: 0000:e7:00.0, compute capability: 7.5\n",
      "2024-09-02 15:52:58.153906: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:5 with 13673 MB memory:  -> device: 5, name: Tesla T4, pci bus id: 0000:e8:00.0, compute capability: 7.5\n",
      "2024-09-02 15:52:58.155220: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:6 with 13673 MB memory:  -> device: 6, name: Tesla T4, pci bus id: 0000:f4:00.0, compute capability: 7.5\n",
      "2024-09-02 15:52:58.156627: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:7 with 13673 MB memory:  -> device: 7, name: Tesla T4, pci bus id: 0000:f5:00.0, compute capability: 7.5\n",
      "2024-09-02 15:53:08.871520: W external/local_tsl/tsl/framework/bfc_allocator.cc:482] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.95GiB (rounded to 2097152000)requested by op StatelessRandomNormalV2\n",
      "If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. \n",
      "Current allocation summary follows.\n",
      "Current allocation summary follows.\n",
      "2024-09-02 15:53:08.871548: I external/local_tsl/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc\n",
      "2024-09-02 15:53:08.871559: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (256): \tTotal Chunks: 6, Chunks in use: 6. 1.5KiB allocated for chunks. 1.5KiB in use in bin. 48B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871566: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (512): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871572: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (1024): \tTotal Chunks: 1, Chunks in use: 1. 1.2KiB allocated for chunks. 1.2KiB in use in bin. 1.0KiB client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871577: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (2048): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871582: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (4096): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871586: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (8192): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871591: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (16384): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871596: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (32768): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871600: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (65536): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871605: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (131072): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871609: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (262144): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871613: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (524288): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871618: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (1048576): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871622: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (2097152): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871627: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (4194304): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871631: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (8388608): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871636: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (16777216): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871640: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (33554432): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871645: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (67108864): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871650: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (134217728): \tTotal Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871656: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (268435456): \tTotal Chunks: 1, Chunks in use: 0. 692.62MiB allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.\n",
      "2024-09-02 15:53:08.871662: I external/local_tsl/tsl/framework/bfc_allocator.cc:1062] Bin for 1.95GiB was 256.00MiB, Chunk State: \n",
      "2024-09-02 15:53:08.871670: I external/local_tsl/tsl/framework/bfc_allocator.cc:1068]   Size: 692.62MiB | Requested Size: 0B | in_use: 0 | bin_num: 20, prev:   Size: 256B | Requested Size: 16B | in_use: 1 | bin_num: -1\n",
      "2024-09-02 15:53:08.871674: I external/local_tsl/tsl/framework/bfc_allocator.cc:1075] Next region of size 726269952\n",
      "2024-09-02 15:53:08.871681: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000000 of size 256 next 1\n",
      "2024-09-02 15:53:08.871685: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000100 of size 1280 next 2\n",
      "2024-09-02 15:53:08.871689: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000600 of size 256 next 3\n",
      "2024-09-02 15:53:08.871692: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000700 of size 256 next 4\n",
      "2024-09-02 15:53:08.871695: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000800 of size 256 next 5\n",
      "2024-09-02 15:53:08.871700: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000900 of size 256 next 6\n",
      "2024-09-02 15:53:08.871703: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] InUse at 7f7fac000a00 of size 256 next 7\n",
      "2024-09-02 15:53:08.871708: I external/local_tsl/tsl/framework/bfc_allocator.cc:1095] Free  at 7f7fac000b00 of size 726267136 next 18446744073709551615\n",
      "2024-09-02 15:53:08.871713: I external/local_tsl/tsl/framework/bfc_allocator.cc:1100]      Summary of in-use Chunks by size: \n",
      "2024-09-02 15:53:08.871718: I external/local_tsl/tsl/framework/bfc_allocator.cc:1103] 6 Chunks of size 256 totalling 1.5KiB\n",
      "2024-09-02 15:53:08.871723: I external/local_tsl/tsl/framework/bfc_allocator.cc:1103] 1 Chunks of size 1280 totalling 1.2KiB\n",
      "2024-09-02 15:53:08.871727: I external/local_tsl/tsl/framework/bfc_allocator.cc:1107] Sum Total of in-use chunks: 2.8KiB\n",
      "2024-09-02 15:53:08.871732: I external/local_tsl/tsl/framework/bfc_allocator.cc:1109] Total bytes in pool: 726269952 memory_limit_: 726269952 available bytes: 0 curr_region_allocation_bytes_: 1452539904\n",
      "2024-09-02 15:53:08.871740: I external/local_tsl/tsl/framework/bfc_allocator.cc:1114] Stats: \n",
      "Limit:                       726269952\n",
      "InUse:                            2816\n",
      "MaxInUse:                         2816\n",
      "NumAllocs:                           9\n",
      "MaxAllocSize:                     1280\n",
      "Reserved:                            0\n",
      "PeakReserved:                        0\n",
      "LargestFreeBlock:                    0\n",
      "\n",
      "2024-09-02 15:53:08.871746: W external/local_tsl/tsl/framework/bfc_allocator.cc:494] *___________________________________________________________________________________________________\n",
      "2024-09-02 15:53:08.871766: W tensorflow/core/framework/op_kernel.cc:1840] OP_REQUIRES failed at stateless_random_ops_v2.cc:64 : RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[256000,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n",
      "2024-09-02 15:53:08.871780: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[256000,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n"
     ]
    },
    {
     "ename": "ResourceExhaustedError",
     "evalue": "{{function_node __wrapped__StatelessRandomNormalV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[256000,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:StatelessRandomNormalV2]",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mResourceExhaustedError\u001b[0m                    Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 6\u001b[0m\n\u001b[1;32m      4\u001b[0m os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mKAGGLE_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m9a33b6e88bcb6058b1281d777fa6808d\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkeras_nlp\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m gemma_lm \u001b[38;5;241m=\u001b[39m \u001b[43mkeras_nlp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodels\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mGemmaCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_preset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgemma_2b_en\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      7\u001b[0m gemma_lm\u001b[38;5;241m.\u001b[39mgenerate(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mKeras is a\u001b[39m\u001b[38;5;124m\"\u001b[39m, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m30\u001b[39m)\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/models/task.py:264\u001b[0m, in \u001b[0;36mTask.from_preset\u001b[0;34m(cls, preset, load_weights, **kwargs)\u001b[0m\n\u001b[1;32m    262\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs:\n\u001b[1;32m    263\u001b[0m     config_overrides[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 264\u001b[0m backbone \u001b[38;5;241m=\u001b[39m \u001b[43mbackbone_preset_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_preset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    265\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpreset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    266\u001b[0m \u001b[43m    \u001b[49m\u001b[43mload_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_weights\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    267\u001b[0m \u001b[43m    \u001b[49m\u001b[43mconfig_overrides\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig_overrides\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    268\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpreprocessor\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs:\n\u001b[1;32m    270\u001b[0m     preprocessor \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpreprocessor\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/models/backbone.py:190\u001b[0m, in \u001b[0;36mBackbone.from_preset\u001b[0;34m(cls, preset, load_weights, **kwargs)\u001b[0m\n\u001b[1;32m    183\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(preset_cls, \u001b[38;5;28mcls\u001b[39m):\n\u001b[1;32m    184\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    185\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPreset has type `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpreset_cls\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` which is not a \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    186\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124ma subclass of calling class `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`. Call \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    187\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`from_preset` directly on `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpreset_cls\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    188\u001b[0m     )\n\u001b[0;32m--> 190\u001b[0m backbone \u001b[38;5;241m=\u001b[39m \u001b[43mload_serialized_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCONFIG_FILE\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    191\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m load_weights:\n\u001b[1;32m    192\u001b[0m     jax_memory_cleanup(backbone)\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/utils/preset_utils.py:569\u001b[0m, in \u001b[0;36mload_serialized_object\u001b[0;34m(preset, config_file, config_overrides)\u001b[0m\n\u001b[1;32m    567\u001b[0m config \u001b[38;5;241m=\u001b[39m load_config(preset, config_file)\n\u001b[1;32m    568\u001b[0m config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconfig\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconfig\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_overrides}\n\u001b[0;32m--> 569\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mkeras\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msaving\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdeserialize_keras_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras/src/saving/serialization_lib.py:718\u001b[0m, in \u001b[0;36mdeserialize_keras_object\u001b[0;34m(config, custom_objects, safe_mode, **kwargs)\u001b[0m\n\u001b[1;32m    716\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m custom_obj_scope, safe_mode_scope:\n\u001b[1;32m    717\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 718\u001b[0m         instance \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43minner_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    719\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    720\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m    721\u001b[0m             \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m could not be deserialized properly. Please\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    722\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ensure that components that are Python object\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    726\u001b[0m             \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mconfig=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mException encountered: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    727\u001b[0m         )\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/models/backbone.py:119\u001b[0m, in \u001b[0;36mBackbone.from_config\u001b[0;34m(cls, config)\u001b[0m\n\u001b[1;32m    115\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m    116\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_config\u001b[39m(\u001b[38;5;28mcls\u001b[39m, config):\n\u001b[1;32m    117\u001b[0m     \u001b[38;5;66;03m# The default `from_config()` for functional models will return a\u001b[39;00m\n\u001b[1;32m    118\u001b[0m     \u001b[38;5;66;03m# vanilla `keras.Model`. We override it to get a subclass instance back.\u001b[39;00m\n\u001b[0;32m--> 119\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/models/gemma/gemma_backbone.py:171\u001b[0m, in \u001b[0;36mGemmaBackbone.__init__\u001b[0;34m(self, vocabulary_size, num_layers, num_query_heads, num_key_value_heads, hidden_dim, intermediate_dim, head_dim, query_head_dim_normalize, use_post_ffw_norm, use_post_attention_norm, attention_logit_soft_cap, final_logit_soft_cap, use_sliding_window_attention, sliding_window_size, layer_norm_epsilon, dropout, dtype, **kwargs)\u001b[0m\n\u001b[1;32m    165\u001b[0m token_id_input \u001b[38;5;241m=\u001b[39m keras\u001b[38;5;241m.\u001b[39mInput(\n\u001b[1;32m    166\u001b[0m     shape\u001b[38;5;241m=\u001b[39m(\u001b[38;5;28;01mNone\u001b[39;00m,), dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloat32\u001b[39m\u001b[38;5;124m\"\u001b[39m, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtoken_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    167\u001b[0m )\n\u001b[1;32m    168\u001b[0m padding_mask_input \u001b[38;5;241m=\u001b[39m keras\u001b[38;5;241m.\u001b[39mInput(\n\u001b[1;32m    169\u001b[0m     shape\u001b[38;5;241m=\u001b[39m(\u001b[38;5;28;01mNone\u001b[39;00m,), dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloat32\u001b[39m\u001b[38;5;124m\"\u001b[39m, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpadding_mask\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    170\u001b[0m )\n\u001b[0;32m--> 171\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtoken_embedding\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken_id_input\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    172\u001b[0m x \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m*\u001b[39m ops\u001b[38;5;241m.\u001b[39mcast(ops\u001b[38;5;241m.\u001b[39msqrt(hidden_dim), x\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[1;32m    173\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m transformer_layer \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransformer_layers:\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    119\u001b[0m     filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m    120\u001b[0m     \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m    121\u001b[0m     \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m    124\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/keras_nlp/src/layers/modeling/reversible_embedding.py:115\u001b[0m, in \u001b[0;36mReversibleEmbedding.build\u001b[0;34m(self, inputs_shape)\u001b[0m\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mbuild\u001b[39m(\u001b[38;5;28mself\u001b[39m, inputs_shape\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 115\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs_shape\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    117\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtie_weights:\n\u001b[1;32m    118\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreverse_embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madd_weight(\n\u001b[1;32m    119\u001b[0m             name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreverse_embeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    120\u001b[0m             shape\u001b[38;5;241m=\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_dim, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_dim),\n\u001b[1;32m    121\u001b[0m             initializer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membeddings_initializer,\n\u001b[1;32m    122\u001b[0m             dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdtype,\n\u001b[1;32m    123\u001b[0m         )\n",
      "\u001b[0;31mResourceExhaustedError\u001b[0m: {{function_node __wrapped__StatelessRandomNormalV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[256000,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:StatelessRandomNormalV2]"
     ]
    }
   ],
   "source": [
    "# Set Kaggle API credentials\n",
    "import os\n",
    "os.environ[\"KAGGLE_USERNAME\"] = \"rogerkorantenng\"\n",
    "os.environ[\"KAGGLE_KEY\"] = \"9a33b6e88bcb6058b1281d777fa6808d\"\n",
    "import keras_nlp\n",
    "gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset(\"gemma_2b_en\")\n",
    "gemma_lm.generate(\"Keras is a\", max_length=30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5488db1-48b2-4d0c-a4cd-1329a333b3f9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}