function net = init_network()
%This function initializes the network architecture and the corresponding
%trainable parameters and meta parameters.
%
%Check this page for the list of the different layer types and their 
%parameters: http://www.vlfeat.org/matconvnet/mfiles/simplenn/vl_simplenn/

%Set the learning rate:
lr = [.1 2] ;
%Initialize the layers as an empty cell array
net.layers = {};

% Block 1 (4 3x3 Conv - batch normalization - ReLu - max pooling)
out = 8;
net.layers{end+1} = struct('type', 'conv', ...
                           'weights', {{0.01*randn(3,3,3,out, 'single'), zeros(1, out, 'single')}}, ...
                           'learningRate', lr, ...
                           'stride', 1, ...
                           'pad', 1); 
%stride: the shift of the convolutional window center after each step of 
%the convolution
%pad: to keep width and height of the the convolutional output same as the
%input, the input should be padded with (convwindowsize-1)/2

%Batch Normalization helps to reduce internal covariate shift and handling
%the vanishing gradient problem during training. It speeds up the training 
%significantly (less steps needed for convergence)
%https://arxiv.org/pdf/1502.03167v3.pdf
net.layers{end+1} = struct('type', 'bnorm', ...
                             'weights', {{ones(out, 1, 'single'), zeros(out, 1, 'single'), ...
                               zeros(out, 2, 'single')}}, ...
                             'epsilon', 1e-4, ...
                             'learningRate', [2 1 0.1], ...
                             'weightDecay', [0 0]);
net.layers{end+1} = struct('type', 'relu');                         
net.layers{end+1} = struct('type', 'pool', ...
                           'method', 'max', ...
                           'pool', [3 3], ...
                           'stride', 2, ...
                           'pad', [0 1 0 1]);


% Block 2 (4 3x3 Conv - batch normalization - ReLu - average pooling)
in = out;
out = 16;
net.layers{end+1} = struct('type', 'conv', ...
                           'weights', {{0.05*randn(3,3,in,out, 'single'), zeros(1,out,'single')}}, ...
                           'learningRate', lr, ...
                           'stride', 1, ...
                           'pad', 1);
net.layers{end+1} = struct('type', 'bnorm', ...
                             'weights', {{ones(out, 1, 'single'), zeros(out, 1, 'single'), ...
                               zeros(out, 2, 'single')}}, ...
                             'epsilon', 1e-4, ...
                             'learningRate', [2 1 0.1], ...
                             'weightDecay', [0 0]);            
net.layers{end+1} = struct('type', 'relu');            
net.layers{end+1} = struct('type', 'pool', ...
                           'method', 'avg', ...
                           'pool', [3 3], ...
                           'stride', 2, ...
                           'pad', [0 1 0 1]);                      

% Block 3 (4 3x3 Conv - batch normalization - ReLu - average pooling)
in = out;
out = 32;
net.layers{end+1} = struct('type', 'conv', ...
                           'weights', {{0.05*randn(3,3,in,out, 'single'), zeros(1,out,'single')}}, ...
                           'learningRate', lr, ...
                           'stride', 1, ...
                           'pad', 1);
net.layers{end+1} = struct('type', 'bnorm', ...
                             'weights', {{ones(out, 1, 'single'), zeros(out, 1, 'single'), ...
                               zeros(out, 2, 'single')}}, ...
                             'epsilon', 1e-4, ...
                             'learningRate', [2 1 0.1], ...
                             'weightDecay', [0 0]);            
net.layers{end+1} = struct('type', 'relu');     
net.layers{end+1} = struct('type', 'dropout', 'rate', drop_rate);
net.layers{end+1} = struct('type', 'pool', ...
                           'method', 'avg', ...
                           'pool', [3 3], ...
                           'stride', 2, ...
                           'pad', [0 1 0 1]); 

% Block 4 (8 4x4 Conv - batch normalization - ReLu)
in = out;
out = 64;
net.layers{end+1} = struct('type', 'conv', ...
                           'weights', {{0.05*randn(4,4,in,out, 'single'), zeros(1,out,'single')}}, ...
                           'learningRate', lr, ...
                           'stride', 1, ...
                           'pad', 0) ;
net.layers{end+1} = struct('type', 'bnorm', ...
                             'weights', {{ones(out, 1, 'single'), zeros(out, 1, 'single'), ...
                               zeros(out, 2, 'single')}}, ...
                             'epsilon', 1e-4, ...
                             'learningRate', [2 1 0.1], ...
                             'weightDecay', [0 0]) ;                           
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'dropout', 'rate', drop_rate);

%Block 4 (10 1x1 Conv - batch normalization)
%Note:  The 1x1 convolution acts as a fully connected layer
in = out;
out = 10;
net.layers{end+1} = struct('type', 'conv', ...
                           'weights', {{0.05*randn(1,1,in,out, 'single'), zeros(1,out,'single')}}, ...
                           'learningRate', .1*lr, ...
                           'stride', 1, ...
                           'pad', 0) ;
net.layers{end+1} = struct('type', 'bnorm', ...
                             'weights', {{ones(out, 1, 'single'), zeros(out, 1, 'single'), ...
                               zeros(out, 2, 'single')}}, ...
                             'epsilon', 1e-4, ...
                             'learningRate', [2 1 0.1], ...
                             'weightDecay', [0 0]) ;                           
%Loss layer
net.layers{end+1} = struct('type', 'softmaxloss');

%Meta parameters
net.meta.inputSize = [32 32 3];
net.meta.trainOpts.weightDecay = 0.0001;
net.meta.trainOpts.learningRate = 0.01; %Affects the gradient update 
%Batch size: sets the number of samples to use for calculating the error 
%before each round of gradient update (see: batch gradient descent algorithm)
net.meta.trainOpts.batchSize = 256;
%The training process is divided into epochs. In each epoch all training
%samples is processed once. Usually in one epoch there are many round of
%parameter updates (the update is done after batchSize number of samples)
net.meta.trainOpts.numEpochs = 50;


%Fill in default values
net = vl_simplenn_tidy(net);
