function [W,evoRiesgo,paso]=mlpMg_CostesMirrorDO(x,y,W,tAct,parametros,tipoFDP,Niter,paso,pDO);
%
%  [W,bRisk,step]=mlpMg_CostesMirrorDO(x,y,W,tAct,pBay,tDP,Niter,step,pDO)
%
%  Function that trains a MLP network with M hidden layers by minimizing
%  an estimation of the Bayesian risk (based on Parzen windows estimation)
%  by means of gradient descent with an adaptive step size and drop-out.
%  Mirror windows are considered for the two classes and individual cost
%  per pattern can be specified.
%
%     x : input patterns (Nd x Np)
%     y : output labels (1 x Np)
%     W : cell array containing the weights of the hidden layers and the 
%         output layer: 1 x (M+1)
%         - layer k : Nk x (Nv +1), with Nv being the number of neurons of the
%                     previous layer and Nk the number of neurons of layer k
%  tAct : parameter defining the activation functions for the neurons of 
%         each layer in the network. It is a vector of dimension M+1, with 
%         the values defining the activations for the M hidden layer and the
%         output layer. For each layer activations are defined as follows:
%         - 0: linear
%         - 1: tanh
%         - 2: logistic
%         - 3: rectified linear unit (ReLU)
%
%  pBay : Bayesian parameters for the training of the network (Np+1)
%            - pBay(1): P0 (probability of class -1)
%            - pBay(2:end): cost of erroneous decision for each pattern (1 x Np)
%
%  tFDP : parameters of the Parzen window that is used to estimate the risk
%
%    tFDP(1) defines the kind of PDF and the remaining elements are parameters 
%    See the help of function fdpGeneralN for details
%
% Niter : number of iterations of gradient
%  step : parameters of the adaptive step size for gradiente descent [mu,muCrec,muDec]
%         - mu: step size
%         - muCrec: increasing factor if risk decreases in the iteration
%         - muDec: decreasing factor if risk increases in the iteration
%
%   pDO : probability of drop in the drop-out strategy. It is a vector of
%         length M+1, including the drop probabilities for the input and
%         for the M hidden layers. 
%
%  bRisk: evolution of the Bayesian risk during training: 1 + (Niter+1)
%
%       Where Nd and Np are the number of characteristics of a pattern 
%       (dimension of the input space) and the number of patterns of the 
%       train set, respectively.
%
%--------------------------------------------------------------------------
%        Author: Marcelino Lzaro
%      Creation: December 2016
%   Last update: January 2018
%--------------------------------------------------------------------------


%  [W,evoRiesgo,paso]=mlpMg_CostesMirrorDO(x,y,W,tAct,pBay,tFDP,Niter,paso,pDO,dWm)
%
%  Funcin que entrena un MLP de M capas ocultas minimizando mediante
%  descenso de gradiente la estima del riesgo de Bayes utilizando 
%  ventanas de Parzen
%
%     x : patrones de entrada (Ne x Np)
%     y : patrones de salida (Ns x Np)
%     W : cell array con los pesos de las capas ocultas y de salida, 1 x (M+1)
%         capa k : Nk x (Nv +1), siendo Nv en nmero de neuronas de la
%         capa anterior, y Nk el de la capa k
%  tAct : parmetro que define el tipo de funciones de activacin de cada 
%         - 0: lineal
%         - 1: tanh
%         - 2: logistic
%         - 3: lineal rectificada (ReLU)
%         Es un vector de dimensin 6, con los valores correspondientes a 
%         las cinco capas ocultas y a la capa de salida
%  pBay : parmetros del algoritmo de entrenamiento basado en Bayes
%            - pBay(1): P0 (probabilidad clase -1)
%            - pBay(2:end): coste para cada patrn (1 x Np)
%  tFDP : tipo de FDP base para el estimador de Parzen
%
%    tFDP(1) define el tipo de FDP y el resto son parmetros 
%    Vea la ayuda de la funcin fdpGeneralN para ver los tipos disponibles
%
% Niter : nmero de iteraciones de gradiente
%  paso : parmetros para la adaptacin por gradiente [mu,muCrec,muDec]
%         - mu: paso de adaptacin
%         - muCrec: factor por el que se multiplica a mu si el coste decrece
%         - muDec: factor por el que se divide a mu si el coste crece
%
%       Donde Ne, Ns y Np son respectivamente el nmero de entradas
% 		el nmero de salidas y el de patrones.
%
%--------------------------------------------------------------------------
%         Autor: Marcelino Lzaro
%      Creacin: diciembre 2016
% Actualizacin: enero 2017
%--------------------------------------------------------------------------

[nada,Ncapas]=size(W);
Nbatch=Niter(2);
Niter=Niter(1);
%-----------------------------
% Lectura parmetros Bayes
%-----------------------------
Prob0 = parametros(1);
Prob1 = 1-Prob0;
Cx=parametros(2:end);
%-----------------------------
% Organizacion datos
%-----------------------------
[Ne,Np]=size(x);
[Ns,Np]=size(y);
v=find(y==0);
if length(v)==0
    v=find(y==-1);
end
N0=length(v);
x0=x(:,v);
C0t=Cx(:,v)*Prob0;
v=find(y==1);
N1=length(v);
x1=x(:,v);
C1t=Cx(:,v)*Prob1;
%-----------------------------
[nada,Ms]=size(W);

if nargin < 4, tAct = [ones(1,Ms-1),0];end
if nargin < 7, Niter = 1000; end
if nargin < 8
    paso=1e-3;		% Parmetro de aprendizaje
end
if length(paso)==1
    mu=paso;
    muCrec=1.05;	% Parmetro de Crecimiento de muc y mus
    muDec=2;		% Parmetro de Decrecimiento de muc y mus
else
    mu=paso(1);
    muCrec=paso(2);
    muDec=paso(3);
end

[Ne,Np]=size(x);
[Ns,Np]=size(y);

ws=W{1,end};

if length(ws(:))== 0
    NnS=[Ne,cell2mat(W),Ns];
    for ko=1:Ms
        %W{1,ko} = sqrt(2/NnS(ko))*randn(NnS(ko+1),NnS(ko)+1);
        W{1,ko} = sqrt(1/NnS(ko))*(2*rand(NnS(ko+1),NnS(ko)+1)-1);
    end
end

if nargin < 10
    dWm=cell(1,Ncapas);
    for ko=1:Ncapas
        dWm{1,ko}=zeros(size(W{1,ko}));
    end
end

evoRiesgo = zeros(1,Niter+1);
[ye,O] = mlpM([x0,x1],W,tAct);
y0=ye(1:N0);
y1=ye(N0+1:end);

auxcoste0=intGeneralN(y0,tipoFDP);
auxcoste1=intGeneralN(-y1,tipoFDP);
evoRiesgo(1)=C0t*transpose(auxcoste0)/N0+C1t*transpose(auxcoste1)/N1;

coste = evoRiesgo(1);

for ko=1:Ncapas
    W{1,ko}=W{1,ko}/(1-pDO(ko));
end

mX=ones(1,Ne);
Wn=cell(1,Ms);
mW=cell(1,Ncapas);
mW{1,Ncapas}=ones(size(W{1,Ncapas}));
dW=cell(1,Ms);
Wdo=cell(1,Ncapas);
Wdon=cell(1,Ncapas);
%--------------------------------------------------------------------------
% Comienzo de las iteraciones de adaptacin de los pesos
%--------------------------------------------------------------------------
x0Total=x0;
x1Total=x1;
N0Total=N0;
N1Total=N1;
C0tTotal=C0t;
C1tTotal=C1t;
N0=floor(Nbatch*N0/(N0+N1));
N1=Nbatch-N0;
Np=Nbatch;
for kiter=1:Niter
    v0=randperm(N0Total,N0);
    v1=randperm(N1Total,N1);
    %v0=1:N0Total;v1=1:N1Total;
    x0=x0Total(:,v0);
    x1=x1Total(:,v1);
    C0t=C0tTotal(:,v0);
    C1t=C1tTotal(:,v1);
    
    mX=diag(double(rand(1,Ne)>pDO(1)));
    for ko=1:Ncapas-1
        [Nb,Na]=size(W{1,ko});
        mW{1,ko}=diag(double(rand(1,Nb)>pDO(ko+1)))*ones(Nb,Na);
        Wdo{1,ko}=W{1,ko}.*mW{1,ko};
    end
    Wdo{1,Ncapas}=W{1,Ncapas};
    
    [ye,O] = mlpM(mX*[x0,x1],Wdo,tAct);
    y0=ye(1:N0);
    y1=ye(N0+1:end);
    
    auxcoste0=intGeneralN(y0,tipoFDP);
    auxcoste1=intGeneralN(-y1,tipoFDP);
    coste=C0t*transpose(auxcoste0)/N0+C1t*transpose(auxcoste1)/N1;
    
    %----------------------------------------------------------------------
    % Clculo del gradiente
    %----------------------------------------------------------------------
    %dW=adapmlpMb(x,y,W,tAct);
    %----------------------------------------------------------------------
    z0=fdpGeneralN(+ye(1:N0),tipoFDP);
    z1=fdpGeneralN(-ye(N0+1:end),tipoFDP);

    d = [z0.*C0t/N0, -z1.*C1t/N1];

    if tAct(end) == 1
        d = d .*(1-ye.^2);
    elseif tAct(end) == 2
        d = d .*(ye-ye.^2);
    elseif tAct(end) == 3
        v=find(ye<=0);
        d(v)=0;    
    end
    dW{1,end}=d*transpose([O{1,end};ones(1,Np)]);

    dp=d;
    o=O{1,end};
    for ko=Ms-1:-1:1
        wp=W{1,ko+1};    
        d=transpose(wp(:,1:end-1))*dp;    
        if tAct(ko) == 1
            d=d.*(1-o.^2);
        elseif tAct(ko) == 2
            d=d.*(o-o.^2);
        elseif tAct(ko) == 3
            mascara=o>0;
            d=d.*mascara;
        end
        if ko==1
            o=[x0,x1];
        else
            o=O{1,ko-1};
        end
        dW{1,ko}=d*transpose([o;ones(1,Np)]);
        dp=d;
    end
    %----------------------------------------------------------------------
    for ko=1:Ncapas
        %Wn{1,ko}=W{1,ko}-mu*dW{1,ko}.*mW{1,ko};
        Wdon{1,ko}=Wdo{1,ko}-mu*dW{1,ko}.*mW{1,ko};
    end
    %for ko=1:Ncapas
    %    dWm{1,ko}=dWm{1,ko}*momento+mu*dW{1,ko}.*mW{1,ko};
    %    W{1,ko}=W{1,ko}-dWm{1,ko};
    %end
    [ye,O] = mlpM(mX*[x0,x1],Wdon,tAct);
    y0=ye(1:N0);
    y1=ye(N0+1:end);

    auxcoste0=intGeneralN(y0,tipoFDP);
    auxcoste1=intGeneralN(-y1,tipoFDP);
    costen=C0t*transpose(auxcoste0)/N0+C1t*transpose(auxcoste1)/N1;
    
    if (costen >= coste)
        aumenta=1;        
      
		while aumenta
            % Se va a disminuir mu
			mu = mu / muDec;
            
            for ko=1:Ncapas
                %Wn{1,ko}=W{1,ko}-mu*dW{1,ko};
                Wdon{1,ko}=Wdo{1,ko}-mu*dW{1,ko}.*mW{1,ko};
            end
            %Wn = cellfun(@(b,c)[b-mu*c], W, dW, 'uni',0);
                                               
            [ye,O] = mlpM(mX*[x0,x1],Wdon,tAct);
            y0=ye(1:N0);
            y1=ye(N0+1:end);

            auxcoste0=intGeneralN(y0,tipoFDP);
            auxcoste1=intGeneralN(-y1,tipoFDP);
            costen=C0t*transpose(auxcoste0)/N0+C1t*transpose(auxcoste1)/N1;
           
            if ((costen < coste)|(mu<1e-30));
                aumenta = 0;
                if mu < 1e-30
                    disp('mlpMg_CostesMirrorDO - Step size reached the limit...')
                    evoRiesgo(kiter+1:end)=coste;
                    paso=[mu,muCrec,muDec];
                    return
                end
            end
		end  % Del While
	  
	end	% Del If (errn > erra) principal

    mu = mu * muCrec;
    
    for ko=1:Ncapas
        W{1,ko}=W{1,ko}-mu*dW{1,ko}.*mW{1,ko};
        %Wdon{1,ko}=Wdo{1,ko}-mu*dW{1,ko}.*mW{1,ko};
    end
    %----------------------------------------------------------------------
    % Actualizacin del coste
    %----------------------------------------------------------------------
    for ko=1:Ncapas
        Wn{1,ko}=W{1,ko}*(1-pDO(ko));
    end
    [ye,O] = mlpM([x0Total,x1Total],Wn,tAct);
    y0=ye(1:N0Total);
    y1=ye(N0Total+1:end);

    auxcoste0=intGeneralN(y0,tipoFDP);
    auxcoste1=intGeneralN(-y1,tipoFDP);
    costen=C0tTotal*transpose(auxcoste0)/N0Total+C1tTotal*transpose(auxcoste1)/N1Total;
    %----------------------------------------------------------------------
    
    coste = costen;
    evoRiesgo(kiter+1)=coste;    
end

paso=[mu,muCrec,muDec];
for ko=1:Ncapas
    W{1,ko}=W{1,ko}*(1-pDO(ko));
end

