% An example code for sample data sets

% The MATLAB functions are coded for parallel computing.
% matlabpool open [?]; can be used to specify the number of workers [?]
matlabpool open;

clfath = './Chi2009/clustered.txt'; % a file path of clustered reads
ncfath = './Chi2009/non-clustered.txt'; % a file path of NON-clustered reads

% Retreive variables from data sets.
[mfreq, tfreq, mis, zs, dat, agg, ureg, treg, ustrn] = processdata( clfath, ncfath );

% INITIALIZATION of the HMM
% transition matrix. [TRANS]rv = p(q_t+1 = v | q_t = r)
TRANS = [.95 .05 .00 .00;
         .09 .90 .01 .00;
         .00 .09 .90 .01;
         .00 .00 .10 .90]; 
b = [10 1 .5 .1]; % the inverse scale parameter on the NB
nhs = int32(length(beta)); % number of hidden states.
a = repmat(50,1,nhs); % the shape parameter on the NB

% Max iteration limit for the EM
Nit = 1000;

% Implement the Baum-Welch algorithm followed by the Viterbi algorithm 
[ cls, TRANS, a, b, loglike] = stateinference( agg.rd, agg.reg, ureg, mis, TRANS, alpha, b, Nit);

% INITIALIZATION of mixture models
w = 0.001; % mixture proportion
alp = 4; % mutation signal strength. strictly positive initial value

% Implement the EM algorithm for the mixture model.
[ results, w, alp, ep, lobs ] = findbinding( mfreq, tfreq, mis, zs, dat, agg, cls, nhs, w, alp, Nit );

% Save results in a txt file
% the file will contain binding sites whose posterior prob. > cutoff
% For each binding site, following variables are reported.
% posterior prob. & chromosome & region_ID & genomic location & read counts
% & mutation counts & # of zero state & strand
cut = 0.5; % posterior probability cut off to generate report.
tsite = sum(results{1} >= cut);
fileID = fopen('results.txt','w');
fprintf(fileID,'postprob \t chr \t reg \t loc \t read \t Mut \t ZS \t Strand \n');
for k = 1:tsite
    fprintf(fileID,'%.20f \t %4s \t %u \t %u \t %u \t %u \t %u \t %s \n',...
        results{1}(k), char(results{2}(k)), results{3}(k), results{4}(k),...
        results{5}(k), results{6}(k), results{7}(k), char(ustrn(results{8}(k)+1)));
end
fclose(fileID);

matlabpool close;