function [TRANS, alpha, beta, logl, smtd] = mseqnbh(cbr, furl, lurl, misrd, TRANS, alpha, beta, Nit)

% Check input arguments
narginchk(8, 8);

count = cat(1, cbr{:,3}); % vectorize a cell array of counts

T = length(count); % number of locations in CLIP clusters.
mrlen = length(misrd);
n0 = sum(misrd .* (1:mrlen)');
nr = sum(misrd);
Td = cat(1,cbr{:,1});
ccnt = cbr(:,3);
cnorm = cbr(:,4);

count = reshape(count, T, 1);
Q = nbh_chk(TRANS, alpha, beta); % Number of HMM states
alpha = reshape(alpha, 1, Q); % the shape parameter
beta  = reshape(beta, 1, Q); % the scale parameter
psimat = zeros(4*Q - 1,Nit);

% Variables
logl = zeros(1,Nit); % log-likelihood
csmtd = cell(lurl, 1); % smoothing distributions p(q_t | n_{1:T}, q_0)
loglike = zeros(lurl,1);
pTRANS = zeros(Q,Q,lurl); % transition matrix

% Main loop of the EM algorithm
for nit = 1:Nit
    %%% 1: E-Step, compute density values and forward recursion and likelihood computation
    parfor d = 1:lurl
        [loglike(d), pTRANS(:,:,d) ,csmtd{d}] = ...
            cfbeq(cfemis( Td(d), ccnt{d}, cnorm{d}, furl(d), Q, alpha, beta ), Q, TRANS, Td(d), furl(d));
    end
    smtd = cat(1,csmtd{:});
    
    % log-likelihood: non-clustered reads
    mldens = n0 * alpha(1).*log(beta(1)./(1+beta(1)));
    
    % Compute log-likelihood
    logl(nit) = sum(loglike) + mldens;  %\sum log p(q_{T_d+1}^d, n_1:T_d ^d | q_0 ^d)
    if mod(nit-1,10) == 0
        fprintf(1, 'Iteration %d:\t%.3f\n', (nit-1), logl(nit));
    end
    
    psimat(:,nit) = [diag(TRANS)',diag(TRANS,1)',diag(TRANS,-1)',alpha(1),beta];
    if nit > 1 && norm(logl(nit) - logl(nit-1))<0.1
        fprintf('The algorithm converged. \n');
        break;
    end
    
    %%% 2: M-Step, reestimation of the transition matrix    
    TRANS = TRANS .* (sum(pTRANS,3) + [n0 + nr, zeros(1,Q-1); zeros(Q-1,Q)]);
    
    % Normalization of the transition matrix
    TRANS = TRANS ./ (sum(TRANS.').' *ones(1,Q));
    
    %%% 3: CM-Step 1
    % Reestimate beta conditioning on alpha
    
    eq_count = sum(smtd) + [n0 , zeros(1, Q-1)];
    beta = alpha./((count.'*smtd)./eq_count);
    if all(beta(1:(Q-1)) ./ beta(2:Q) < 1)
        fprintf(1, 'Warning: the order constraint was violoated\n');
    end
    %%% 4: CM-Step 2, reestimation of the shape parameters with beta fixed
    % gradient
    grad = eq_count .* (log(beta./(1+beta)) - digamma(alpha)) ...
        + sum(smtd .* digamma(count*ones(1,Q) + ones(T,1)*alpha)) + ...
        [n0 * digamma(alpha(1)), zeros(1, Q-1)];
    
    % Hessian
    hess = -eq_count.*trigamma(alpha) ...
        + sum(smtd .* trigamma(count*ones(1,Q) + ones(T,1)*alpha)) + ...
        [n0 * trigamma(alpha(1)), zeros(1, Q-1)];
    
    % Newton step
    tmp_step = - sum(grad)./sum(hess);
    tmp = alpha + tmp_step * ones(1,Q);
    
    while (any(tmp <= 0))
        fprintf(1, 'Warning: could not update alpha\n');
        tmp_step = tmp_step/10;
        tmp = alpha + tmp_step * ones(1,Q);
        %         tmp = alpha + tmp_step;
    end
    alpha = tmp;
end

end

% subfunction to evalutate emission probabilites
function [ dens ] = cfemis(Td, count, dnorm, nreg, Q, alpha, beta )
T = Td * nreg;
dens = exp(ones(T,1)*(alpha.*log(beta./(1+beta)) - gammaln(alpha)) ...
    - count*log(1+beta) + gammaln(count*ones(1,Q) + ones(T,1)*alpha) ...
    - dnorm*ones(1,Q));
end

% Forward and backward equations to find filtering and smoothing distributions
function [ loglike, pTRANS, smtd ] = cfbeq( dens, Q, TRANS, T, nreg)
% Initial state is set to be 0, and the terminal state is 0.
% For multiple sequences, apply this function on each sequence

nel = T*nreg; % number of locations
scale = zeros(nel,1); %scale(t) = p(n_t | n_{1:t-1}, q_0)

bdx = (0:(nreg-1))*T;

% The forward and backward variables
fltd = zeros(nel, Q); % filtering distribution p(q_t | n_{1:t}, q_0)
bwrd = zeros(nel, Q); % backward variable p(q_{T+1} , n_{t+1:T} | q_t) / p(n_{t+1:T} | n_{1:t}, q_0)

%%% 2: E-Step, forward recursion and likelihood computation
% The initial state is 0
%     fwrd(1,:) = dens(1,:) .* TRANS(1,:);
t = 1;
idx = bdx + t;

fltd(idx,:) = dens(idx,:) * [TRANS(1,:); zeros(Q-1, Q)];
scale(idx) = sum(fltd(idx,:),2);
fltd(idx,:) = fltd(idx,:) ./ (scale(idx) * ones(1,Q));

for t = 2:T
    idx = bdx + t;
    fltd(idx,:) = (fltd(idx-1,:) * TRANS) .* dens(idx,:);
    % Systematic scaling
    scale(idx) = sum(fltd(idx,:),2);
    fltd(idx,:) = fltd(idx,:) ./ (scale(idx) * ones(1,Q));
end

ssmc = fltd(idx,:) * TRANS(:,1);
loglike = sum(log(ssmc)) + sum(log(scale));

vsmc = (ssmc * ones(1,T))';
vsmc = vsmc(:);

%%% 3: E-Step, backward recursion
% Scale the backward variable with the forward scale factors (this ensures
% that the reestimation of the transition matrix is correct)
% The terminal state is 0.
%%% bckwrd = kappa
bwrd(idx, :) = (TRANS(:,1) * ones(1,nreg))';

for t = (T-1):-1:1
    idx = bdx + t;
    bwrd(idx, :) = (bwrd(idx+1,:).* dens(idx+1,:)) * TRANS';
    % Apply scaling
    bwrd(idx,:) = bwrd(idx,:) ./ (scale(idx+1) * ones(1,Q));
end

smtd = bwrd .* fltd ./ (vsmc * ones(1,Q));

if nreg > 1
    % first locations
    fl = bdx + 1;
    % last locations
    ll = bdx + T;
    
    % sequences of locations with no first locations.
    nfl = 1:nel;
    nfl = nfl(~ismember(nfl,fl));
    % sequences of locations with no last locations.
    nll = nfl - 1;
    
    % \frac{1}{c \times c_{t}}\kappa_{t,T}(v)\psi_{t}(v)\pi_{t-1}(r)
    pTRANS =  ((fltd(nll,:) ./ ((vsmc(nfl,:) .* (scale(nfl)) * ones(1,Q)))).' * ...
        (dens(nfl,:) .* bwrd(nfl,:))) + ...
        [sum(smtd(fl, :)); zeros(Q-1, Q)] + [sum(smtd(ll, :))', zeros(Q, Q-1)];
else
    nfl = 2:T;
    nll = nfl - 1;
    
    pTRANS =  ((fltd(nll,:) ./ ((vsmc(nfl,:) .* (scale(nfl)) * ones(1,Q)))).' * ...
        (dens(nfl,:) .* bwrd(nfl,:))) + ...
        [smtd(1, :); zeros(Q-1, Q)] + [smtd(T, :)', zeros(Q, Q-1)];
end

end

% The trigamma function
function p =  trigamma(x)

%trigamma  Computes the trigamma function (d^2 log gamma).
%          Use:  p = trigamma(x) where x is a scalar or vector.

% H2M/cnt Toolbox, $Revision: 2.2 $
% Olivier Capp? 16/08/2001
% http://www.tsi.enst.fr/~cappe/h2m/

% Adapted from GAUSS code by Paul L. Fackler available at
%  http://www.american.edu/academic.depts/cas/econ/gaussres/pdf/loggamma.src

% Formulas 6.4.11 and 6.4.12 with recurrence formula 6.4.6 from
% Abromowitz and Stegun, Dover (1965)

if (any(x <= 0))
    error('trigamma requires positive arguments.');
end

x = x+6;
p = 1./(x.^2);
p = (((((0.075757575757576*p-0.033333333333333).*p+0.0238095238095238) ...
    .*p-0.033333333333333).*p+0.166666666666667).*p+1)./x+0.5*p;
p = 1./((x-1).^2)+1./((x-2).^2)+1./((x-3).^2)+1./((x-4).^2)+...
    1./((x-5).^2)+1./((x-6).^2)+p;
end

% The digamma function
function p = digamma(x)

%digamma   Computes the digamma (also called psi) function (d log gamma).
%          Use:  p = digamma(x) where x is a scalar or vector.

% H2M/cnt Toolbox, $Revision: 2.2 $
% Olivier Capp? 16/08/2001 - 17/08/2001
% http://www.tsi.enst.fr/~cappe/h2m/

% Adapted from GAUSS code by Paul L. Fackler available at
%  http://www.american.edu/academic.depts/cas/econ/gaussres/pdf/loggamma.src

% Formula 6.3.18 with recurrence formula 6.3.5 from Abromowitz and
% Stegun, Dover (1965)

if (any(x <= 0))
    error('digamma requires positive arguments.');
end

x = x+6;
p = 1./(x.^2);
p = (((0.004166666666667*p-0.003968253986254).*p+ ...
    0.008333333333333).*p-0.083333333333333).*p;
p = p+log(x)-0.5./x-1./(x-1)-1./(x-2)-1./(x-3)-1./(x-4)-1./(x-5)-1./(x-6);
end
