// this function is for extracting the fields to fill in the Read struct
// only part of the Read struct will be filled, not like in the read.h file
struct Read *get_Read_cluster(char *line)
{ 
  struct Read *read_p=ini_Read();
  if (read_p==NULL) {return NULL;}

  hard_copy(&read_p->name,strtok(line,"\t")); // name
  read_p->group=atoi(strtok(NULL,"\t")); // group
  hard_copy(&read_p->chr,strtok(NULL,"\t")); // chr
  read_p->strand=strtok(NULL,"\t")[0]; // strand  
  hard_copy(&read_p->seq,strtok(NULL,"\t")); // seq
  hard_copy(&read_p->CIGAR,strtok(NULL,"\t"));  // CIGAR
  hard_copy(&read_p->MD,strtok(NULL,"\t")); // MD
  sscanf(strtok(NULL,"\t"),"%lu",&(read_p->pos)); // pos
  read_p->len=atoi(strtok(NULL,"\t")); // len

  if (read_p->name==NULL || read_p->chr==NULL || read_p->CIGAR==NULL || read_p->seq==NULL || read_p->MD==NULL)
  {
    free_leaf(read_p,NULL);
	return NULL;
  }

  return read_p; 
} 

// this function is for printing total tag and mutant tag count for one cluster
int print_cluster(struct Read *head_p,FILE *file2,unsigned int parsed_muts,int GROUP,unsigned long end)
{ 
  struct Read *tmp=head_p;
  int count=0,i;
  unsigned long len,j;
  struct Matrix *counts;
  int muts[MAX_MUT],num_muts;

  if (head_p==NULL) {return 1;}
  while (tmp=tmp->bigger) {count++;}
  // To do: add an option here to control the minimum number of tags for a cluster to be considered
  if (count<20) {return 1;} // write to file if a cluster has at least 20 tags

  // print header for each cluster
  fprintf(file2,"#\t%s\t%c\t%lu\t%lu\n",head_p->chr,head_p->strand,head_p->pos,end);
 
  // create matrix
  len=end-head_p->pos+1;
  counts=create_matrix(len,GROUP*2);
  if (counts==NULL) {return 0;}
  tmp=head_p;
  
  // total and mutant tag count
  do
  {
	for (i=0;i<tmp->len;i++) {counts->values[tmp->pos+i-head_p->pos][tmp->group-1]++;}
	num_muts=get_muts(tmp,muts,parsed_muts);
	for (i=0;i<num_muts;i++) {counts->values[muts[i]+tmp->pos-head_p->pos][tmp->group-1+GROUP]++;}
	tmp=tmp->bigger;
  }while (tmp!=NULL);

  // print and free matrix
  for (j=0;j<len;j++)
  {
	for (i=0;i<2*GROUP-1;i++) {fprintf(file2,"%lf\t",counts->values[j][i]);}
  	fprintf(file2,"%lf\n",counts->values[j][2*GROUP-1]);
  }
  
  free_matrix(counts);
  return 1;
}

// this function is for clustering short sequencing reads and write the total tag count and mutant tag count
unsigned long cluster(FILE *file1,FILE *file2,unsigned int parsed_muts,int GROUP)
{
  char line[MAX_LINE];
  struct Read *read_p,*head_p=NULL,*current_p;
  char *chr,strand='n';
  unsigned long end;
  unsigned long total_cluster=0; // total number of clusters

  while (fgets(line,sizeof line,file1)!=NULL)
  {
	// get read from line
	read_p=get_Read_cluster(line);
	if (read_p==NULL) {return 0;}

	// if the new read does not belong to the previous cluster
	if (strand!=read_p->strand || strcmp(chr,read_p->chr)!=0 || end<read_p->pos-1)
	{
	  if (print_cluster(head_p,file2,parsed_muts,GROUP,end)==0) {return 0;}
	  total_cluster++;
	  free_leaf(head_p,NULL);
	  head_p=read_p;
	  current_p=read_p;
	  chr=read_p->chr;
	  strand=read_p->strand;
	  end=read_p->pos+read_p->len-1;
	}else
	// if the new read belongs to the previous cluster
	{
	  if (end<read_p->pos+read_p->len-1) {end=read_p->pos+read_p->len-1;}
	  current_p->bigger=read_p;
	  current_p=read_p;
	}
  }

  // print the last one
  if (print_cluster(head_p,file2,parsed_muts,GROUP,end)==0) {return 0;}
  total_cluster++;
  free_leaf(head_p,NULL);

  return total_cluster;
}
