import tensorflow as tf from tensorflow.keras.applications import densenet from tensorflow.keras.applications.densenet import preprocess_input from tensorflow.keras.layers import Dense, Dropout, Input, Conv2D from tensorflow.keras.models import Model import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from tqdm import tqdm import os import cv2 import tensorflow as tf import re import pickle from PIL import Image from skimage.transform import resize import warnings warnings.filterwarnings('ignore') import seaborn as sns from tqdm import tqdm import tensorflow as tf from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from sklearn.model_selection import train_test_split import time from tensorflow.keras.models import Model from tensorflow.keras.layers import Dense, LSTM, Input, Embedding, Conv2D, Concatenate, Flatten, Add, Dropout, GRU import random import datetime def getModel(): embedding_matrix_vocab = np.load('my_embedding_matrix.npy') input1 = Input(shape=(2048,), name='Image_input') dense1 = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform(seed = 56), name='dense_encoder')(input1) input2 = Input(shape=(153,), name='Text_Input') embedding_layer = Embedding(input_dim = 1427, output_dim = 300, input_length=153, mask_zero=True, trainable=False, weights=[embedding_matrix_vocab], name="Embedding_layer") emb = embedding_layer(input2) LSTM1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23), recurrent_initializer=tf.keras.initializers.orthogonal(seed=7), bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="LSTM1")(emb) #LSTM1_output = LSTM1(emb) LSTM2 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23), recurrent_initializer=tf.keras.initializers.orthogonal(seed=7), bias_initializer=tf.keras.initializers.zeros(), name="LSTM2") LSTM2_output = LSTM2(LSTM1) dropout1 = Dropout(0.5, name='dropout1')(LSTM2_output) dec = tf.keras.layers.Add()([dense1, dropout1]) fc1 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed = 63), name='fc1') fc1_output = fc1(dec) dropout2 = Dropout(0.4, name='dropout2')(fc1_output) output_layer = Dense(1427, activation='softmax', name='Output_layer') output = output_layer(dropout2) encoder_decoder = Model(inputs = [input1, input2], outputs = output) encoder_decoder.load_weights("encoder_decoder_epoch_5.h5") # encoder encoder_input = encoder_decoder.input[0] encoder_output = encoder_decoder.get_layer('dense_encoder').output encoder_model = Model(encoder_input, encoder_output) # decoder# text_input = encoder_decoder.input[1] enc_output = Input(shape=(256,), name='Enc_Output') text_output = encoder_decoder.get_layer('LSTM2').output add1 = tf.keras.layers.Add()([text_output, enc_output]) fc_1 = fc1(add1) decoder_output = output_layer(fc_1) decoder_model = Model(inputs = [text_input, enc_output], outputs = decoder_output) return encoder_model,decoder_model # def getModel(image): # embedding_matrix_vocab = np.load('my_embedding_matrix.npy') # input1 = Input(shape=(2048), name='Image_input') # dense1 = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform(seed = 56), name='dense_encoder')(input1) # input2 = Input(shape=(153), name='Text_Input') # embedding_layer = Embedding(input_dim = 1427, output_dim = 300, input_length=153, mask_zero=True, trainable=False, # weights=[embedding_matrix_vocab], name="Embedding_layer") # emb = embedding_layer(input2) # LSTM1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, # kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23), # recurrent_initializer=tf.keras.initializers.orthogonal(seed=7), # bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="LSTM1")(emb) # #LSTM1_output = LSTM1(emb) # LSTM2 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, # kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23), # recurrent_initializer=tf.keras.initializers.orthogonal(seed=7), # bias_initializer=tf.keras.initializers.zeros(), name="LSTM2") # LSTM2_output = LSTM2(LSTM1) # dropout1 = Dropout(0.5, name='dropout1')(LSTM2_output) # dec = tf.keras.layers.Add()([dense1, dropout1]) # fc1 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed = 63), name='fc1') # fc1_output = fc1(dec) # dropout2 = Dropout(0.4, name='dropout2')(fc1_output) # output_layer = Dense(1427, activation='softmax', name='Output_layer') # output = output_layer(dropout2) # encoder_decoder = Model(inputs = [input1, input2], outputs = output) # encoder_decoder.load_weights("encoder_decoder_epoch_5.h5") # # encoder # encoder_input = encoder_decoder.input[0] # encoder_output = encoder_decoder.get_layer('dense_encoder').output # encoder_model = Model(encoder_input, encoder_output) # # decoder# # text_input = encoder_decoder.input[1] # enc_output = Input(shape=(256,), name='Enc_Output') # text_output = encoder_decoder.get_layer('LSTM2').output # add1 = tf.keras.layers.Add()([text_output, enc_output]) # fc_1 = fc1(add1) # decoder_output = output_layer(fc_1) # decoder_model = Model(inputs = [text_input, enc_output], outputs = decoder_output) # return encoder_model,decoder_model def greedysearch(image): # Open the pickle file for reading train_data = pd.read_csv('Final_Train_Data.csv') y_train = train_data['Report'] encoder_model, decoder_model = getModel() input_ = 'startseq' image_features = encoder_model.predict(image) result = [] tokenizer = Tokenizer(filters='!"#$%&()*+,-/:;<=>?@[\]^_`{|}~\t\n') tokenizer.fit_on_texts(y_train.values) for i in range(153): input_tok = [tokenizer.word_index[w] for w in input_.split()] input_padded = pad_sequences([input_tok], 153, padding='post') predictions = decoder_model.predict([input_padded, image_features]) arg = np.argmax(predictions) if arg != 7: # endseq result.append(tokenizer.index_word[arg]) input_ = input_ + ' ' + tokenizer.index_word[arg] else: break rep = ' '.join(e for e in result) return rep def get_result(img): pre_Report = greedysearch(img) print('------------------------------------------------------------------------------------------------------') print("Predicted Report : ",pre_Report) print('------------------------------------------------------------------------------------------------------') return pre_Report # with open('/content/Image_features_ecoder_decoder.pickle', 'rb') as f: # Xnet_features = pickle.load(f) # image = Xnet_features["/content/drive/MyDrive/cnn-rnn/NLMCXR_png/CXR545_IM-2149_0"] # get_result(image)