{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "029e3b0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from os import listdir\n",
    "from pickle import dump\n",
    "from keras.applications.vgg16 import VGG16, preprocess_input\n",
    "from tensorflow.keras.preprocessing.image import img_to_array, load_img\n",
    "from keras.models import Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "id": "bec9f820",
   "metadata": {},
   "outputs": [],
   "source": [
    "# extract feature from each photo in directory\n",
    "def extract_feature(directory):\n",
    "    model = VGG16()\n",
    "    #restructure model, here we remove last softmax layer from this model\n",
    "    model.layers.pop\n",
    "    model = Model(inputs=model.inputs, outputs=model.layers[-1].output)\n",
    "    print(model.summary)\n",
    "    \n",
    "    #extract feature from each photo\n",
    "    feature = dict()\n",
    "    for name in listdir(directory):\n",
    "        filename = directory + '/' + name\n",
    "        image = load_img(filename, target_size=(224,224))\n",
    "        #convert img pixels to numpy array\n",
    "        image = img_to_array(image)\n",
    "        #reshape data for the model\n",
    "        image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))\n",
    "        #preprocess img for preprocess model\n",
    "        image = preprocess_input(image)\n",
    "        #get features\n",
    "        features = model.predict(image, verbose=0)\n",
    "        #get img id\n",
    "        img_id = name.split('.')[0]\n",
    "        #storing features\n",
    "        feature[img_id] = features\n",
    "        print(\">%s\" % name)\n",
    "    return feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "035ed4b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "directory = \"img_captioning_dataset/Images\"\n",
    "features = extract_feature(directory)\n",
    "# print(\"Extracted Features: %d\" %len(features))\n",
    "# dump(features, open('img_captioning_features/features.pkl', 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 385,
   "id": "89135fbf",
   "metadata": {},
   "outputs": [],
   "source": [
    "import string\n",
    "from nltk.tokenize import word_tokenize\n",
    "\n",
    "def load_doc(filename):\n",
    "    # open the file as read only\n",
    "    file = open(filename, 'r')\n",
    "    # read all text\n",
    "    text = file.read()\n",
    "    # close the file\n",
    "    file.close()\n",
    "    return text\n",
    "\n",
    "#extract description of image\n",
    "def load_description(doc):\n",
    "    mapping = dict()\n",
    "    for line in doc.split('\\n'):\n",
    "        token = word_tokenize(line)\n",
    "        if len(line) < 2:\n",
    "            continue\n",
    "        image_id, image_desc = token[0], token[1:]\n",
    "        image_id = image_id.split('.')[0]\n",
    "        image_desc = ' '.join(image_desc)\n",
    "        if image_id not in mapping:\n",
    "            mapping[image_id] = list()\n",
    "        mapping[image_id].append(image_desc)\n",
    "    return mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 386,
   "id": "74ffda4f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def clean_descriptions(descriptions):\n",
    "    # prepare translation table for removing punctuation\n",
    "    table = str.maketrans('', '', string.punctuation)\n",
    "    for key, desc_list in descriptions.items():\n",
    "        for i in range(len(desc_list)):\n",
    "            desc = desc_list[i]\n",
    "            # tokenize\n",
    "            desc = desc.split()\n",
    "            # convert to lower case\n",
    "            desc = [word.lower() for word in desc]\n",
    "            # remove punctuation from each token\n",
    "            desc = [w.translate(table) for w in desc]\n",
    "            # remove hanging 's' and 'a'\n",
    "            desc = [word for word in desc if len(word)>1]\n",
    "            # remove tokens with numbers in them\n",
    "            desc = [word for word in desc if word.isalpha()]\n",
    "            # store as string\n",
    "            desc_list[i] =  ' '.join(desc)\n",
    "def to_vocabulary(descriptions):\n",
    "    # build a list of all description strings\n",
    "    all_desc = set()\n",
    "    for key in descriptions.keys():\n",
    "        [all_desc.update(d.split()) for d in descriptions[key]]\n",
    "    return all_desc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 387,
   "id": "6ae0e204",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_descriptions(descriptions, filename):\n",
    "    lines = list()\n",
    "    for key, desc_list in descriptions.items():\n",
    "        for desc in desc_list:\n",
    "            lines.append(key + \" \" + desc)\n",
    "    data = '\\n'.join(lines)\n",
    "    file = open(filename, 'w')\n",
    "    file.write(data)\n",
    "    file.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 388,
   "id": "ad625117",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded: 8092\n"
     ]
    }
   ],
   "source": [
    "filename = \"Flickr8k.token.txt\"\n",
    "doc = load_doc(filename)\n",
    "descriptions = load_description(doc)\n",
    "print(\"Loaded: %d\" %len(descriptions))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 389,
   "id": "7b06b1b5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Vocab size: 8761\n"
     ]
    }
   ],
   "source": [
    "#clean desc\n",
    "clean_descriptions(descriptions)\n",
    "vocab = to_vocabulary(descriptions)\n",
    "print(\"Vocab size: %d\" %len(vocab))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 390,
   "id": "c4c867ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "save_descriptions(descriptions, \"another-way/descriptions1.txt\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d4079267",
   "metadata": {},
   "source": [
    "### Extract Identifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 281,
   "id": "898c84a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pickle import dump"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 391,
   "id": "dd687334",
   "metadata": {},
   "outputs": [],
   "source": [
    "#load into memory\n",
    "def load_doc(filename):\n",
    "    with open(filename, 'r') as f:\n",
    "        content = f.read()\n",
    "        return content\n",
    "#pre-defined list of photo identifier\n",
    "def load_set(filename):\n",
    "    doc = load_doc(filename)\n",
    "    dataset = list()\n",
    "    for line in doc.split(\"\\n\"):\n",
    "        if len(line) < 1:\n",
    "            continue\n",
    "        identifier = line.split('.')[0]\n",
    "        dataset.append(identifier)\n",
    "    return set(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 392,
   "id": "2c612418",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_clean_descripitions(filename, dataset):\n",
    "    doc = load_doc(filename)\n",
    "    descriptions = dict()\n",
    "    for line in doc.split('\\n'):\n",
    "        tokens = word_tokenize(line)\n",
    "        image_id, image_desc = tokens[0], tokens[1:]\n",
    "        if image_id in dataset:\n",
    "            if image_id not in descriptions:\n",
    "                descriptions[image_id] = list()\n",
    "            #wrap description in token\n",
    "            desc = 'startseq ' + ' '.join(image_desc) + ' endseq'\n",
    "            descriptions[image_id].append(desc)\n",
    "    return descriptions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 393,
   "id": "4d22db3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_photo_features(features, dataset):\n",
    "    all_features = load(open(features, 'rb'))\n",
    "    features = {k: all_features[k] for k in dataset if k in all_features}\n",
    "    return features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 394,
   "id": "0c3a8e25",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dataset: 6000\n",
      "Descriptions: train=6000\n",
      "Photos: train=6000\n"
     ]
    }
   ],
   "source": [
    "from pickle import load\n",
    "\n",
    "features = \"Flickr_8k.trainImages.txt\"\n",
    "train = load_set(features)\n",
    "print(\"dataset: %d\" %len(train))\n",
    "train_descriptions = load_clean_descripitions(\"descriptions1.txt\", train)\n",
    "print(\"Descriptions: train=%d\" %len(train_descriptions))\n",
    "train_features = load_photo_features(\"features.pkl\", train)\n",
    "print('Photos: train=%d' % len(train_features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 444,
   "id": "437278aa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'1191338263_a4fa073154': ['startseq little old lady sitting next to an advertisement endseq',\n",
       "  'startseq an asian woman waiting at an underground train stop endseq',\n",
       "  'startseq an old woman sits in transit station next to backlit advertisement endseq',\n",
       "  'startseq woman sits in subway station endseq',\n",
       "  'startseq woman with an umbrella is sitting at station with an aquos commercial on the wall endseq'],\n",
       " '218342358_1755a9cce1': ['startseq cyclist wearing red helmet is riding on the pavement endseq',\n",
       "  'startseq girl is riding bike on the street while wearing red helmet endseq',\n",
       "  'startseq person on bike wearing red helmet riding down street endseq',\n",
       "  'startseq woman wears red helmet and blue shirt as she goes for bike ride in the shade endseq',\n",
       "  'startseq person in blue shirt and red helmet riding bike down the road endseq'],\n",
       " '2187222896_c206d63396': ['startseq boy in red shirt in front of long blue wall raises his eyebrow at the camera endseq',\n",
       "  'startseq boy in red shirt with stripes standing near blue brick wall with handicap signs endseq',\n",
       "  'startseq an african american boy stands in front of blue building in the handicapped space endseq',\n",
       "  'startseq the boy in the orange shirt looks backwards endseq',\n",
       "  'startseq the boy in the red shirt is next to blue wall endseq'],\n",
       " '2276499757_b44dc6f8ce': ['startseq dog looks warily at the brown dog investigating his area endseq',\n",
       "  'startseq large brown dog is looking at medium sized black dog endseq',\n",
       "  'startseq small black dog looks at larger brown dog in grassy field endseq',\n",
       "  'startseq the big brown dog looks at the small black dog in tall grass endseq',\n",
       "  'startseq there is big dog looking at little dog endseq'],\n",
       " '2294598473_40637b5c04': ['startseq dog catches frisbee in midair endseq',\n",
       "  'startseq dog catching frisbee endseq',\n",
       "  'startseq terrier mix catches frisbee in the air endseq',\n",
       "  'startseq white and black dog catching frisbee endseq',\n",
       "  'startseq white dog is leaping in the air with green object in its mouth endseq'],\n",
       " '2380765956_6313d8cae3': ['startseq blond girl wearing green jacket walks on trail along side metal fence endseq',\n",
       "  'startseq girl in green coat walks down rural road playing flute endseq',\n",
       "  'startseq young girl in parka playing flute while walking by fenced in field endseq',\n",
       "  'startseq girl in green and blue jacket walking past an enclosed field endseq',\n",
       "  'startseq girl playing flute as she walks by fence in rural area endseq'],\n",
       " '2501968935_02f2cd8079': ['startseq man dressed in purple shirt and red bandanna smiles at the people watching him endseq',\n",
       "  'startseq man on the street wearing leather chaps and chainmail codpiece endseq',\n",
       "  'startseq man wearing purple shirt and black leather chaps poses for the camera endseq',\n",
       "  'startseq man dressed in leather chaps and purple shirt stands in front of onlookers endseq',\n",
       "  'startseq there is man in purple shirt leather chaps and red bandanna standing near other men endseq'],\n",
       " '2506892928_7e79bec613': ['startseq three children in field with white flowers endseq',\n",
       "  'startseq three children one with stuffed kitten in field of flowers endseq',\n",
       "  'startseq three children play in the garden endseq',\n",
       "  'startseq three children pose among wildflowers endseq',\n",
       "  'startseq three kids palying with toy cat in garden endseq'],\n",
       " '2513260012_03d33305cf': ['startseq black dog is running after white dog in the snow endseq',\n",
       "  'startseq black dog chasing brown dog through snow endseq',\n",
       "  'startseq two dogs chase each other across the snowy ground endseq',\n",
       "  'startseq two dogs play together in the snow endseq',\n",
       "  'startseq two dogs running through low lying body of water endseq'],\n",
       " '2638369467_8fc251595b': ['startseq girl in white dress endseq',\n",
       "  'startseq little girl in white is looking back at the camera while carrying water grenade endseq',\n",
       "  'startseq smiling young girl in braids is playing ball endseq',\n",
       "  'startseq young girl wearing white looks at the camera as she plays endseq',\n",
       "  'startseq the girl is holding green ball endseq'],\n",
       " '2644326817_8f45080b87': ['startseq black and white dog with red frisbee standing on sandy beach endseq',\n",
       "  'startseq dog drops red disc on beach endseq',\n",
       "  'startseq dog with red frisbee flying in the air endseq',\n",
       "  'startseq dog catching red frisbee endseq',\n",
       "  'startseq the black dog is dropping red disc on beach endseq'],\n",
       " '2699342860_5288e203ea': ['startseq boy wearing red tshirt is running through woodland endseq',\n",
       "  'startseq child runs near some trees endseq',\n",
       "  'startseq young boy is dancing around endseq',\n",
       "  'startseq young boy with red short sleeved shirt and jeans runs by some trees endseq',\n",
       "  'startseq the little boy in the red shirt stops to smile for the camera endseq'],\n",
       " '2851304910_b5721199bc': ['startseq photographer looks over the hills endseq',\n",
       "  'startseq woman in red jacket is videotaping natural landscape endseq',\n",
       "  'startseq woman with camera looks out over rolling hills endseq',\n",
       "  'startseq woman with camera on tripod is looking at the view endseq',\n",
       "  'startseq lady in red shirt has her camera set up in the field to record something endseq'],\n",
       " '2903617548_d3e38d7f88': ['startseq little baby plays croquet endseq',\n",
       "  'startseq little girl plays croquet next to truck endseq',\n",
       "  'startseq the child is playing croquette by the truck endseq',\n",
       "  'startseq the kid is in front of car with put and ball endseq',\n",
       "  'startseq the little boy is playing with croquet hammer and ball beside the car endseq'],\n",
       " '2926786902_815a99a154': ['startseq skier in yellow jacket is airborne above the mountains endseq',\n",
       "  'startseq skier jumps high in the air with view of the mountains endseq',\n",
       "  'startseq skiing man in fluorescent jacket jumps very high and it looks as though he is flying endseq',\n",
       "  'startseq somone is high in the air doing ski jump endseq',\n",
       "  'startseq the skier in the green jacket and white pants appears to almost fly into the sky endseq'],\n",
       " '3119887967_271a097464': ['startseq man in sweater pointing at the camera endseq',\n",
       "  'startseq one man is posing with arms outstretched and finger pointed while another stares from behind him endseq',\n",
       "  'startseq the man in the black hat stands behind the man who is pointing his finger endseq',\n",
       "  'startseq two men look toward the camera while the one in front points his index finger endseq',\n",
       "  'startseq two men one wearing black hat while the one in front points standing in hallway endseq'],\n",
       " '3197891333_b1b0fd1702': ['startseq family of nine people including four children pose in front of brick fireplace with white mantle endseq',\n",
       "  'startseq family poses in front of the fireplace and christmas tree endseq',\n",
       "  'startseq family posing by the mantle and christmas tree endseq',\n",
       "  'startseq happy family poses by the fireplace endseq',\n",
       "  'startseq two couples and four kids pose for family picture endseq'],\n",
       " '3338291921_fe7ae0c8f8': ['startseq brown dog in the snow has something hot pink in its mouth endseq',\n",
       "  'startseq brown dog in the snow holding pink hat endseq',\n",
       "  'startseq brown dog is holding pink shirt in the snow endseq',\n",
       "  'startseq dog is carrying something pink in its mouth while walking through the snow endseq',\n",
       "  'startseq dog with something pink in its mouth is looking forward endseq'],\n",
       " '3356369156_074750c6cc': ['startseq blue boat with yellow canopy is floating on calm waters endseq',\n",
       "  'startseq boat in the water endseq',\n",
       "  'startseq boat with roof on green water endseq',\n",
       "  'startseq the boat is in the middle of the water endseq',\n",
       "  'startseq the solitude boat floats on the lake endseq'],\n",
       " '3423802527_94bd2b23b0': ['startseq bunch of girls in cheerleader outfits endseq',\n",
       "  'startseq large group of cheerleaders walking in parade endseq',\n",
       "  'startseq cheerleaders perform endseq',\n",
       "  'startseq many cheerleaders wearing black walk down the street endseq',\n",
       "  'startseq parade of cheerleaders wearing black pink and white uniforms endseq'],\n",
       " '488416045_1c6d903fe0': ['startseq brown dog is running along beach endseq',\n",
       "  'startseq brown dog wearing black collar running across the beach endseq',\n",
       "  'startseq dog walks on the sand near the water endseq',\n",
       "  'startseq brown dog running on the beach endseq',\n",
       "  'startseq the large brown dog is running on the beach by the ocean endseq']}"
      ]
     },
     "execution_count": 444,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_descriptions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b80bb437",
   "metadata": {},
   "source": [
    "### Now going to use keras tokenizer to change text to numeric form"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 396,
   "id": "c7e2130c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# dict to clean list\n",
    "def to_lines(descriptions):\n",
    "    all_desc = list()\n",
    "    for key in descriptions.keys():\n",
    "        [all_desc.append(d) for d in descriptions[key]]\n",
    "    return all_desc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 397,
   "id": "a91092cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_tokenizer(descriptions):\n",
    "    lines = to_lines(descriptions)\n",
    "    tokenizer = Tokenizer()\n",
    "    tokenizer.fit_on_texts(lines)\n",
    "    return tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 398,
   "id": "69996870",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Vocabulary Size: 7577\n"
     ]
    }
   ],
   "source": [
    "tokenizer = create_tokenizer(train_descriptions)\n",
    "vocab_size = len(tokenizer.word_index) + 1\n",
    "print('Vocabulary Size: %d' % vocab_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 399,
   "id": "9f0b5246",
   "metadata": {},
   "outputs": [],
   "source": [
    "#len of description\n",
    "def max_length(description):\n",
    "    lines = to_lines(description)\n",
    "    return max(len(d.split()) for d in lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 462,
   "id": "191d71d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# create input and output sequence\n",
    "def create_sequences(tokenizer, max_length, desc_list, photo):\n",
    "    X1, X2, y = list(), list(), list()\n",
    "    # walk through each description for the image\n",
    "    for desc in desc_list:\n",
    "        # encode the sequence\n",
    "        seq = tokenizer.texts_to_sequences([desc])[0]\n",
    "        # split one sequence into multiple X,y pairs\n",
    "        for i in range(1, len(seq)):\n",
    "            # split into input and output pair\n",
    "            in_seq, out_seq = seq[:i], seq[i]\n",
    "            # pad input sequence\n",
    "            in_seq = pad_sequences([in_seq], maxlen=max_length)[0]\n",
    "            # encode output sequence\n",
    "            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]\n",
    "            # store\n",
    "            X1.append(photo)\n",
    "            X2.append(in_seq)\n",
    "            y.append(out_seq)\n",
    "    return array(X1), array(X2), array(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 401,
   "id": "4e3e04fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "from numpy import array\n",
    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "from tensorflow.keras.utils import to_categorical, plot_model\n",
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense, Activation, Dropout, Embedding,LSTM, Bidirectional, BatchNormalization\n",
    "from keras.layers.merging import add\n",
    "from keras.callbacks import ModelCheckpoint"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "45c2cfe9",
   "metadata": {},
   "source": [
    "### Model creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93f8f578",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 467,
   "id": "22c7799b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def define_model(vocab_size, max_length):\n",
    "    # feature extractor model\n",
    "    inputs1 = Input(shape=(1000,))\n",
    "    fe1 = Dropout(0.5)(inputs1)\n",
    "    fe2 = Dense(256, activation='relu')(fe1)\n",
    "    # sequence model\n",
    "    inputs2 = Input(shape=(max_length,))\n",
    "    se1 = Embedding(vocab_size,output_dim=256, mask_zero=True)(inputs2)\n",
    "    se2 = Dropout(0.5)(se1)\n",
    "    se3 = LSTM(256)(se2)\n",
    "    # decoder model\n",
    "    decoder1 = concatenate([fe2, se3])\n",
    "    decoder2 = Dense(256, activation='relu')(decoder1)\n",
    "    outputs = Dense(vocab_size, activation='softmax')(decoder2)\n",
    "    # tie it together [image, seq] [word]\n",
    "    model = Model(inputs=[inputs1, inputs2], outputs=outputs)\n",
    "    model.compile(loss='categorical_crossentropy', optimizer='adam')\n",
    "    # summarize model\n",
    "    print(model.summary())\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 463,
   "id": "6ad11b1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load batch of data\n",
    "def data_generator(descriptions, photos, tokenizer, max_length):\n",
    "    # loop for ever over images\n",
    "    while 1:\n",
    "        for key, desc_list in descriptions.items():\n",
    "            # retrieve the photo feature\n",
    "            photo = photos[key][0]\n",
    "            in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo)\n",
    "            yield [[in_img, in_seq], out_word]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 464,
   "id": "a999a0db",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset: 6000\n",
      "train_descriptions= 6000\n",
      "photos: train= 6000\n",
      "Vocab size: 7577\n",
      "Description Length: 34\n"
     ]
    }
   ],
   "source": [
    "#load train dataset\n",
    "import tensorflow as tf\n",
    "filename = \"Flickr_8k.trainImages.txt\"\n",
    "train = load_set(filename)\n",
    "print(\"Dataset: %d\" %len(train))\n",
    "\n",
    "train_descriptions = load_clean_descripitions(\"descriptions1.txt\", train)\n",
    "print(\"train_descriptions= %d\" %len(train_descriptions))\n",
    "\n",
    "train_feature = load_photo_features(\"features.pkl\", train)\n",
    "print(\"photos: train= %d\" %len(train_feature))\n",
    "\n",
    "tokenizer = create_tokenizer(train_descriptions)\n",
    "vocab_size = len(tokenizer.word_index)+1\n",
    "print(\"Vocab size: %d\" %vocab_size)\n",
    "\n",
    "max_len = max_length(train_descriptions)\n",
    "print('Description Length: %d' % max_len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 468,
   "id": "9936fc7f",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"model_47\"\n",
      "__________________________________________________________________________________________________\n",
      " Layer (type)                   Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      " input_121 (InputLayer)         [(None, 34)]         0           []                               \n",
      "                                                                                                  \n",
      " input_120 (InputLayer)         [(None, 1000)]       0           []                               \n",
      "                                                                                                  \n",
      " embedding_52 (Embedding)       (None, 34, 256)      1939712     ['input_121[0][0]']              \n",
      "                                                                                                  \n",
      " dropout_109 (Dropout)          (None, 1000)         0           ['input_120[0][0]']              \n",
      "                                                                                                  \n",
      " dropout_110 (Dropout)          (None, 34, 256)      0           ['embedding_52[0][0]']           \n",
      "                                                                                                  \n",
      " dense_151 (Dense)              (None, 256)          256256      ['dropout_109[0][0]']            \n",
      "                                                                                                  \n",
      " lstm_52 (LSTM)                 (None, 256)          525312      ['dropout_110[0][0]']            \n",
      "                                                                                                  \n",
      " concatenate_8 (Concatenate)    (None, 512)          0           ['dense_151[0][0]',              \n",
      "                                                                  'lstm_52[0][0]']                \n",
      "                                                                                                  \n",
      " dense_152 (Dense)              (None, 256)          131328      ['concatenate_8[0][0]']          \n",
      "                                                                                                  \n",
      " dense_153 (Dense)              (None, 7577)         1947289     ['dense_152[0][0]']              \n",
      "                                                                                                  \n",
      "==================================================================================================\n",
      "Total params: 4,799,897\n",
      "Trainable params: 4,799,897\n",
      "Non-trainable params: 0\n",
      "__________________________________________________________________________________________________\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "#train model\n",
    "model = define_model(vocab_size, max_len)\n",
    "epochs = 10\n",
    "steps = len(train_descriptions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 469,
   "id": "6d10533f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 514/6000 [=>............................] - ETA: 25:52 - loss: 5.8238"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[469], line 4\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(epochs):\n\u001b[0;32m      2\u001b[0m     \u001b[38;5;66;03m#create data generator\u001b[39;00m\n\u001b[0;32m      3\u001b[0m     generator \u001b[38;5;241m=\u001b[39m data_generator(train_descriptions, train_feature, tokenizer, max_len)\n\u001b[1;32m----> 4\u001b[0m     \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgenerator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msteps_per_epoch\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43msteps\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m      5\u001b[0m     model\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(i) \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mh5\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[1;32m~\\miniconda3\\lib\\site-packages\\keras\\utils\\traceback_utils.py:65\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m     63\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m     64\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 65\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m     66\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m     67\u001b[0m     filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
      "File \u001b[1;32m~\\miniconda3\\lib\\site-packages\\keras\\engine\\training.py:1685\u001b[0m, in \u001b[0;36mModel.fit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m   1677\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m tf\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mexperimental\u001b[38;5;241m.\u001b[39mTrace(\n\u001b[0;32m   1678\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   1679\u001b[0m     epoch_num\u001b[38;5;241m=\u001b[39mepoch,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   1682\u001b[0m     _r\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[0;32m   1683\u001b[0m ):\n\u001b[0;32m   1684\u001b[0m     callbacks\u001b[38;5;241m.\u001b[39mon_train_batch_begin(step)\n\u001b[1;32m-> 1685\u001b[0m     tmp_logs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43miterator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1686\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m data_handler\u001b[38;5;241m.\u001b[39mshould_sync:\n\u001b[0;32m   1687\u001b[0m         context\u001b[38;5;241m.\u001b[39masync_wait()\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\util\\traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m    149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 150\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m    152\u001b[0m   filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\eager\\polymorphic_function\\polymorphic_function.py:894\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    891\u001b[0m compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxla\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnonXla\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    893\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m OptionalXlaContext(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile):\n\u001b[1;32m--> 894\u001b[0m   result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m    896\u001b[0m new_tracing_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexperimental_get_tracing_count()\n\u001b[0;32m    897\u001b[0m without_tracing \u001b[38;5;241m=\u001b[39m (tracing_count \u001b[38;5;241m==\u001b[39m new_tracing_count)\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\eager\\polymorphic_function\\polymorphic_function.py:926\u001b[0m, in \u001b[0;36mFunction._call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    923\u001b[0m   \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[0;32m    924\u001b[0m   \u001b[38;5;66;03m# In this case we have created variables on the first call, so we run the\u001b[39;00m\n\u001b[0;32m    925\u001b[0m   \u001b[38;5;66;03m# defunned version which is guaranteed to never create variables.\u001b[39;00m\n\u001b[1;32m--> 926\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_no_variable_creation_fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)  \u001b[38;5;66;03m# pylint: disable=not-callable\u001b[39;00m\n\u001b[0;32m    927\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_variable_creation_fn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    928\u001b[0m   \u001b[38;5;66;03m# Release the lock early so that multiple threads can perform the call\u001b[39;00m\n\u001b[0;32m    929\u001b[0m   \u001b[38;5;66;03m# in parallel.\u001b[39;00m\n\u001b[0;32m    930\u001b[0m   \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\eager\\polymorphic_function\\tracing_compiler.py:143\u001b[0m, in \u001b[0;36mTracingCompiler.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    140\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock:\n\u001b[0;32m    141\u001b[0m   (concrete_function,\n\u001b[0;32m    142\u001b[0m    filtered_flat_args) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maybe_define_function(args, kwargs)\n\u001b[1;32m--> 143\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconcrete_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_flat\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    144\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfiltered_flat_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcaptured_inputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcrete_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcaptured_inputs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\eager\\polymorphic_function\\monomorphic_function.py:1757\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m   1753\u001b[0m possible_gradient_type \u001b[38;5;241m=\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[0;32m   1754\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (possible_gradient_type \u001b[38;5;241m==\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[0;32m   1755\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m executing_eagerly):\n\u001b[0;32m   1756\u001b[0m   \u001b[38;5;66;03m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[1;32m-> 1757\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_call_outputs(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_inference_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1758\u001b[0m \u001b[43m      \u001b[49m\u001b[43mctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcancellation_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcancellation_manager\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m   1759\u001b[0m forward_backward \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[0;32m   1760\u001b[0m     args,\n\u001b[0;32m   1761\u001b[0m     possible_gradient_type,\n\u001b[0;32m   1762\u001b[0m     executing_eagerly)\n\u001b[0;32m   1763\u001b[0m forward_function, args_with_tangents \u001b[38;5;241m=\u001b[39m forward_backward\u001b[38;5;241m.\u001b[39mforward()\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\eager\\polymorphic_function\\monomorphic_function.py:381\u001b[0m, in \u001b[0;36m_EagerDefinedFunction.call\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m    379\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _InterpolateFunctionError(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m    380\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m cancellation_manager \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 381\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m \u001b[43mexecute\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    382\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msignature\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    383\u001b[0m \u001b[43m        \u001b[49m\u001b[43mnum_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_num_outputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    384\u001b[0m \u001b[43m        \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    385\u001b[0m \u001b[43m        \u001b[49m\u001b[43mattrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    386\u001b[0m \u001b[43m        \u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mctx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    387\u001b[0m   \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    388\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m execute\u001b[38;5;241m.\u001b[39mexecute_with_cancellation(\n\u001b[0;32m    389\u001b[0m         \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msignature\u001b[38;5;241m.\u001b[39mname),\n\u001b[0;32m    390\u001b[0m         num_outputs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_outputs,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    393\u001b[0m         ctx\u001b[38;5;241m=\u001b[39mctx,\n\u001b[0;32m    394\u001b[0m         cancellation_manager\u001b[38;5;241m=\u001b[39mcancellation_manager)\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\tensorflow\\python\\eager\\execute.py:52\u001b[0m, in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m     50\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m     51\u001b[0m   ctx\u001b[38;5;241m.\u001b[39mensure_initialized()\n\u001b[1;32m---> 52\u001b[0m   tensors \u001b[38;5;241m=\u001b[39m \u001b[43mpywrap_tfe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTFE_Py_Execute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     53\u001b[0m \u001b[43m                                      \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_outputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     54\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m core\u001b[38;5;241m.\u001b[39m_NotOkStatusException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m     55\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "for i in range(epochs):\n",
    "    #create data generator\n",
    "    generator = data_generator(train_descriptions, train_feature, tokenizer, max_len)\n",
    "    model.fit(generator, epochs=1, steps_per_epoch = steps, verbose=1)\n",
    "    model.save(\"model_\" + str(i) + \"h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4867e1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# from tensorflow.keras.callbacks import ModelCheckpoint\n",
    "\n",
    "# # Define the number of epochs and steps\n",
    "# epochs = 10\n",
    "# steps_per_epoch = len(train_descriptions)\n",
    "\n",
    "# # Create a data generator\n",
    "# generator = data_generator(train_descriptions, train_feature, tokenizer, max_len)\n",
    "\n",
    "# # Define a checkpoint callback to save the model after each epoch\n",
    "# checkpoint = ModelCheckpoint(filepath=\"model_{epoch}.h5\", save_weights_only=False, save_format=\"h5\")\n",
    "\n",
    "# # Train the model for the specified number of epochs\n",
    "# model.fit(generator, epochs=epochs, steps_per_epoch=steps_per_epoch, verbose=1, callbacks=[checkpoint])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3a998776",
   "metadata": {},
   "outputs": [],
   "source": [
    "def word_for_id(interger, tokenizer):\n",
    "    for word, index in tokenizer.word_index.items():\n",
    "        if index==interger:\n",
    "            return word\n",
    "    return None\n",
    "def generate_desc(model, tokenizer, photo, max_len):\n",
    "    in_text = \"start_seq\"\n",
    "    for i in range(max_len):\n",
    "        sequence = tokenizer.texts_to_sequences([in_text])[0]\n",
    "        sequence = pad_sequence([sequence], maxlen = max_len)\n",
    "        yhat = model.predict([photo, sequence], verbose=1)\n",
    "        yhat = argmax(yhat)\n",
    "        word = word_for_id(yhat, tokenizer)\n",
    "        if word is None:\n",
    "            break\n",
    "        in_text += ' '+word\n",
    "        if word=='endseq':\n",
    "            break\n",
    "    return in_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9f7dfb85",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'corpus_bleu' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[3], line 8\u001b[0m\n\u001b[0;32m      6\u001b[0m         references \u001b[38;5;241m=\u001b[39m [d\u001b[38;5;241m.\u001b[39msplit() \u001b[38;5;28;01mfor\u001b[39;00m d \u001b[38;5;129;01min\u001b[39;00m desc_list]\n\u001b[0;32m      7\u001b[0m         actual\u001b[38;5;241m.\u001b[39mappend(yhat\u001b[38;5;241m.\u001b[39msplit())\n\u001b[1;32m----> 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBLUE-1: \u001b[39m\u001b[38;5;132;01m%f\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\u001b[43mcorpus_bleu\u001b[49m(actual, predicted, weights\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m1.0\u001b[39m,\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m0\u001b[39m)))\n\u001b[0;32m      9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBLUE-2: \u001b[39m\u001b[38;5;132;01m%f\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39mcorpus_bleu(actual, predicted, weights\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m0\u001b[39m)))\n\u001b[0;32m     10\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBLUE-3: \u001b[39m\u001b[38;5;132;01m%f\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39mcorpus_bleu(actual, predicted, weights\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m0.3\u001b[39m,\u001b[38;5;241m0.3\u001b[39m,\u001b[38;5;241m0.3\u001b[39m,\u001b[38;5;241m0\u001b[39m)))\n",
      "\u001b[1;31mNameError\u001b[0m: name 'corpus_bleu' is not defined"
     ]
    }
   ],
   "source": [
    "# evaluated the skill of model\n",
    "def evaluate_model(model, description, photos, tokenizer, max_length):\n",
    "    actual, predicted = list(), list()\n",
    "    for key, desc_list in description.items():\n",
    "        yhat = generate_desc(model, tokenizer, photos[key], max_length)\n",
    "        references = [d.split() for d in desc_list]\n",
    "        actual.append(yhat.split())\n",
    "        predicted.append(yhat.split())\n",
    "print(\"BLUE-1: %f\" %corpus_bleu(actual, predicted, weights=(1.0,0,0,0)))\n",
    "print(\"BLUE-2: %f\" %corpus_bleu(actual, predicted, weights=(0.5,0.5,0,0)))\n",
    "print(\"BLUE-3: %f\" %corpus_bleu(actual, predicted, weights=(0.3,0.3,0.3,0)))\n",
    "print(\"BLUE-4: %f\" %corpus_bleu(actual, predicted, weights=(0.25,0.25,0.25,0.25)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5a1cd9c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1509cb1",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}