import numpy as np
from typing import Union
import time
import csv

def relu(x:np.ndarray)->np.ndarray:
    '''
    Relu activation function. Returns max(0,value)
    args:
        x: input array of any shape
    output: All negatives clipped to 0 
    '''
    return x * (x > 0)


def add_padding(X:np.ndarray, pad_size:Union[int,list,tuple], pad_val:int=0)->np.ndarray:
    '''
    Pad the input image array equally from all sides
    args:
        x: Input Image should be in the form of [Batch, Width, Height, Channels]
        pad_size: How much padding should be done. If int, equal padding will done. Else specify how much to pad each side (height_pad,width_pad) OR (y_pad, x_pad)
        pad_val: What should be the value to be padded. Usually it os 0 padding
    return:
        Padded Numpy array Image
    '''
    assert (len(X.shape) == 4), "Input image should be form of [Batch, Width, Height, Channels]"
    if isinstance(pad_size,int):
        y_pad = x_pad = pad_size
    else:
        y_pad = pad_size[0]
        x_pad = pad_size[1]

    pad_width = ((0,0), (y_pad,y_pad), (x_pad,x_pad), (0,0)) # Do not pad first and last axis. Pad Width(2nd), Height(3rd) axis with  pad_size
    return np.pad(X, pad_width = pad_width, mode = 'constant', constant_values = (pad_val,pad_val))


class Conv2DLayer:
    '''
    2D Convolution Layer
    '''
    def __init__(self,input_channels:int, num_filters:int, kernel_size:int, stride:int, padding:Union[str,None], activation:Union[None,str]='relu'):
        '''
        Kernal Matrix for the Current Layer having shape [filter_size, filter_size, num_of_features_old, num_of_filters_new]. 'num_of_features_old' are the Channels or features from previous layer
        'filter_size' (or kernel size) is the size of filters which will detect new features. 
        'num_of_filters_new' are the No of new features detected by these kernels on the previous features where Each Kernel/filter will detect a new feature/channel

        args:
            input_channels: No of features/channels present in the incoming input. It'll be equal to Last dimension value from the prev layer output `previous_layer.output.shape[-1]`
            num_filters: Output Channels or How many new features you want this new Layer to Detect. Each Filter/kernel will detect a new Feature /channel
            kernel_size: What is the size of Kernels or Filters. Each Filter a 2D Square Matrix of size kernel_size
            stride: How many pixels you want each kernel to shift. Same shift in X and Y direction OR indirectly, it'll define how many iterations the kernel will take to convolve over the whole image
            padding: How much padding you want to add to the image. If padding='same', it means padding in a way that input and output have the same dimension
            activation: Which activation to use
        '''
        self.kernel_matrices = np.random.randn(kernel_size, kernel_size, input_channels, num_filters) # Complete Weight/Kernel Matrix
        self.biases = np.random.randn(1, 1, 1, num_filters) # 1 Bias per Channel/feature/filter
        self.stride = stride
        self.padding = padding
        self.activation = activation


    def convolution_step(self,image_portion:np.ndarray,kernel_matrix:np.ndarray,bias:np.ndarray)->np.ndarray:
        '''
        Convolve the Filter onto a given portion of the Image. This operation will be done multiple times per image, per kernel. Number of times is dependent on Window size, Stride and Image Size.
        In simple words, Multiply the given filter weight matrix and the area covered by filter and this is repeated for whole image.
        Imagine a slice of matrix  [FxF] from a [PxQ] shaped image. Now imagine [Fxf] filter on top of it. Do matrix multiplication, summation and add bias
        args:
            image_portion: Image Matrix or in other sense, Features. Shape is [filter_size, filter_size, no of channels / Features from previous layer]
            filter: Filter / Kernel weight Matrix which convolves on top of image slice. Size is [filter_size, filter_size, no of channels / Features from previous layer]
            bias: Bias matrix of shape [1,1,1]
        returns: 
            Convolved window output with single floating value inside a [1,1,1] matrix
        '''
        assert image_portion.shape == kernel_matrix.shape , "Image Portion and Filter must be of same shape"
        return np.sum(np.multiply(image_portion,kernel_matrix)) + bias.astype('float')


    def forward(self,features_batch:np.ndarray)->np.ndarray:
        '''
        Forward Pass or the Full Convolution
        Convolve over the batch of Image using the filters. Each new Filter produces a new Feature/channel from the previous Image. 
        So if image had 32 features/channels and you have used 64 as num of filters in this layer, your image will have 64 features/channels
        args:
            features_batch: Batch of Images (Batch of Features) of shape [batch size, height, width, channels]. 
            This is input coming from the previous Layer. If this matrix is output from a previous Convolution Layer, then the channels == (no of features from the previous layer)

        output: Convolved Image batch with new height, width and new detected features
        '''
        padding_size = 0 # How to implement self.padding = 'same'?
        if isinstance(self.padding, int): # If specified padding
            padding_size = self.padding
        
        batch_size, h_old, w_old, num_features_old = features_batch.shape # [batch size, height, width, no of features (channels) from the previous layer]
        filter_size, filter_size, num_features_old, num_of_filters_new = self.kernel_matrices.shape # [filter_size, filter_size, num_features_old, num_of_filters_new]

        # New Height/Width is dependent on the old height/ width, stride, filter size, and amount of padding
        h_new = int((h_old + (2 * padding_size) - filter_size) / self.stride) + 1
        w_new = int((w_old + (2 * padding_size) - filter_size) / self.stride) + 1

        padded_batch = add_padding(features_batch, padding_size) # Pad the current input. third param is 0 by default so it is zero padding

        # This will act as an Input to the layer Next to it
        output = np.zeros([batch_size, h_new, w_new, num_of_filters_new]) # batch size will be same but height, width and no of filters will be changed

        for index in range(batch_size): # index i is the i-th Image or Image Matrix in other terms
            padded_feature = padded_batch[index,:,:,:] # Get Every feature or Channel
            for h in range(h_new): # Used in Vertical slicing or Window's height start and height end
                for w in range(w_new): # Used in Horizontal slicing or Window's width start and width end
                    for filter_index in range(num_of_filters_new): # Feature index. Selects the appropriate kernel one at a time

                        vertical_start = h * self.stride # It is shifted with every loop. Every starts with a new starting point in vertical direction
                        vertical_end = vertical_start + filter_size # Filter Size is the width of window

                        horizontal_start = w * self.stride # Window's Width starting point
                        horizontal_end = horizontal_start + filter_size # Filter is squared so vertical and horizontal window are same so window width == window height

                        image_portion = padded_feature[vertical_start:vertical_end, horizontal_start:horizontal_end,:]  # Sliced window
                        kernel_matrix = self.kernel_matrices[:, :, :, filter_index] # Select appropriate Kernel Matrix
                        bias = self.biases[:,:,:,filter_index] # Select corresponding bias

                        result = self.convolution_step(image_portion, kernel_matrix, bias) # Get 1 value per window and kernel 
                        output[index,h,w,filter_index] = result # Fill the resulting output matrix with corresponding values
        
        
        if self.activation == 'relu': # apply activation Function. 
            return relu(output)

        return output


if __name__== "__main__":
    
    batch_features = np.random.randn(32, 64, 64, 3)

    start_time = time.time()
    cnn = Conv2DLayer(3,8,3,2,2,'relu')
    pre_output = cnn.forward(batch_features)
    end_time = time.time()
    interval_time = end_time - start_time
    print(f"Time taken for execution: {interval_time} s")

    with open("submission.csv", "a+", newline='') as file:
        writer = csv.writer(file, delimiter=';')
        writer.writerow([interval_time])