function [ f ] = D35WQ8_IDCT( F )
    
    %% Kernel base definition
    
    
    % batch can hold G channels or channels*G_batch
    % for improved process time
    
    % Supposing it is still matlab in NHWC
    % Using N*C WH where N*C -> batch, since they have to be processed
    % separately
    
    N = size(F, 1);
    H = size(F, 2);
    W = size(F, 3);
    C = size(F, 4);
    
    batch = N * C;
    
    % NHWC -> NCHW
    F = permute(F, [1, 4, 2, 3]);
    % NCHW -> N*C H*W
    F_flat = reshape(F, batch, H*W);
    
    [U, V] = meshgrid(1:H, 1:W);
    
    % alpha norm function
    n = @(u)(((u~=1)*1)+((u==1)*1/sqrt(2)));
    c = @(u,v) (sqrt(4/H/W)*n(u)*n(v));
    % f(u,v) returns a kernel with index (u,v) on a fixed N1, N2 basis
    % Empirical proof for formula:
    % this doesn't work:
    % f = @(x,y) c(x,y)*cos((2*x-1)*(U-1)*pi./2./H).*cos((2*y-1)*(V-1)*pi/2/W);
    % This works:
    f = @(u,v) c(u,v)*cos((2*U-1)*(u-1)*pi./2./H).*cos((2*V-1)*(v-1)*pi/2/W);
    
    % K will hold all the kernels flattened
    K = zeros(H*W, H*W);
    for x=1:H
        for y=1:W
            % row major order into columns
            K(:, x+(y-1)*H) = reshape(f(x, y), 1, []);
        end
    end
    
    
    %% Computing the output in batches
    
    % [N*C, H*W] x [H*W, H*W] -> [N*C, H*W]
    f_flat = F_flat*K';
    
    % N*C H*W -> NCHW
    f = reshape(f_flat, N, C, H, W);
    
    % NCHW -> NHWC
    f = permute(f, [1,3,4,2]);
end

