function [chr, reg, spos, epos, mut, strn, rlen, tfreq, mfreq, trd, ureg, treg] = loadreads( filepath )
% load aligned read sequences

% we set strn = '-' to be TRUE, and strn ='+' to be FALSE.
[reg,~,chr,spos,epos,strn,strmut] = textread(filepath,'%u %s %s %u %u %s %s','delimiter','\t');

reg = int32(reg); %region id
chr = nominal(chr); % chromosome
spos = int32(spos); % start locations of reads
epos = int32(epos); % end locations of reads
strn = nominal(strn); % strand
trd = length(reg); % total read number
ureg = unique(reg); % unique region id
treg = length(ureg); % total number of unique region ids

numut = cell(1,trd);
% mut = cell(3,1);
% mpos = cell(3,1);
rlen = int8(epos - spos + 1);

for j = 1:trd
    numut{j} = str2num(strmut{j});
end

nm = cellfun(@length,numut);
mm = max(nm);

mut = zeros(trd,mm);
for j = 1:mm
    mut(nm == j,1:j) =  reshape([(numut{nm == j})],j,sum(nm == j))';
end
mut = int32(mut);

mpos = mut - repmat(spos,1,mm) + 1;
mpos(strn == '-',:) = repmat(int32(rlen(strn == '-')),1,mm) - mpos(strn == '-',:) + 1;

% strand specific
% 1: negative, 2: positive
% or false: negative, true: positive
ustrn = nominal({'-','+'});
mrlen = max(rlen);
tfreq = int32(zeros(mrlen,2));

for i = 1:mrlen
    tfreq(i,:)= [sum(rlen==i & strn == ustrn(1)), sum(rlen==i & strn == ustrn(2))];
end

temp = int32(zeros(mrlen,mrlen));

% frequency of mutations in each read length and position combination
% strand specific
mfreq = cell(2,1);
mfreq{1} = temp; %negative strand
mfreq{2} = temp; %positive strand

for k = 1:2
    for j = 1:mrlen
        mfreq{k}(j,1:j) = sum(histc(mpos(rlen == j & strn == ustrn(k),:), 1:j),2);
    end
end

end