// this function translates the mutation type into bit number
unsigned int mut2bit(char *mut)
{
  if (strcmp(mut,"A2G")==0) {return 1;}
  else if (strcmp(mut,"A2C")==0) {return 2;}
  else if (strcmp(mut,"A2T")==0) {return 4;}
  else if (strcmp(mut,"G2A")==0) {return 8;}
  else if (strcmp(mut,"G2C")==0) {return 16;}
  else if (strcmp(mut,"G2T")==0) {return 32;}
  else if (strcmp(mut,"T2G")==0) {return 64;}
  else if (strcmp(mut,"T2A")==0) {return 128;}
  else if (strcmp(mut,"T2C")==0) {return 256;}
  else if (strcmp(mut,"C2T")==0) {return 512;}
  else if (strcmp(mut,"C2A")==0) {return 1024;}
  else if (strcmp(mut,"C2G")==0) {return 2048;}
  else if (strcmp(mut,"Ins")==0) {return 4096;}
  else if (strcmp(mut,"Del")==0) {return 8192;}
  else {return 0;}
}

// this function is for parsing the mutation information string to a binary number
unsigned int parse_muts(char* muts)
{
  unsigned int i=0;
  char *temp=strtok(muts,",");
  
  while (temp!=NULL) 
  {
    i=i|mut2bit(temp);
	temp=strtok(NULL,",");
  }

  return i;
}

// this function counts the number of mutation of desired types and put them into the muts matrix
// the returned value is the number of mutations found in a read
int get_muts(struct Read *read_p,int *muts,unsigned int parsed_muts)
{
  int i,j=0,at=0,count=0,soft_clip=0,deleted=0;
  char tmp[MAX_LINE],seq[MAX_LINE],tmp1[MAX_LINE];

  for (i=0;i<MAX_MUT;i++) {muts[i]=0;} //initialize

  // if there are soft clippings at the left end of read
  for(i=0;i<strlen(read_p->CIGAR);i++) 
  {
    if (read_p->CIGAR[i]>=48 && read_p->CIGAR[i]<=57) {tmp[j++]=read_p->CIGAR[i];} // numbers
    else if (read_p->CIGAR[i]=='M') {break;} // S must be before M
	else if (read_p->CIGAR[i]=='S') // S
    {
	  tmp[j]='\0';
	  soft_clip=atoi(tmp);
	  break;
	}
	else {j=0;} // if not S or M  
  }

  // soft clippings have been deleted 
  j=0;
  substr(seq,read_p->seq,soft_clip,0); 

  for(i=0;i<strlen(read_p->CIGAR);i++) // Del and Ins
  {
    if (read_p->CIGAR[i]>=48 && read_p->CIGAR[i]<=57) {tmp[j++]=read_p->CIGAR[i];} // numbers
    else if (read_p->CIGAR[i]=='M' || read_p->CIGAR[i]=='D') // read match or deletion
    {
	  if ((read_p->CIGAR[i]=='D') && ((parsed_muts & mut2bit("Del"))>0)) {muts[count++]=at;} // Del
	  if (count==MAX_MUT-1) {return count;}
	  tmp[j]='\0';
	  at+=atoi(tmp);
	  j=0;
	}else // if not match or deletion 
	{
	  if (read_p->CIGAR[i]=='I') // Ins
	  {
        if ((parsed_muts & mut2bit("Ins"))>0) {muts[count++]=at;} // If Ins is desired
		tmp[j]='\0';
		substr(tmp1,seq,at+atoi(tmp),0); // copy the sequence after the insertion
		substr(seq+at,tmp1,0,0); // copy the copied sequence to the end of sequence before insertion
	  } 
	  if (count==MAX_MUT-1) {return count;}
	  j=0;
	}
  }

  // check for substitution
  if (read_p->MD[0]=='0') {return count;} // no valid MD field
  j=0;
  at=0;

  for(i=5;i<strlen(read_p->MD);i++)
  {
    if (read_p->MD[i]=='^') // Del
	{
	  do 
	  {
	    i++;
		deleted++; 
	  } 
	  while (read_p->MD[i]>=65 && read_p->MD[i]<=90); // disregard deletions, but recorded in deleted
	  deleted--;
	  tmp[j]='\0';
	  at+=atoi(tmp);
	  j=0;
	}else if (read_p->MD[i]>=65 && read_p->MD[i]<=90) // substitution
	{
	  tmp[j]='\0';
	  at+=atoi(tmp);
	  j=0;

	  if (read_p->strand=='-') // compile the sub mut type
	  {
	    tmp1[0]=complement(read_p->MD[i]);
		tmp1[2]=complement(seq[at]);
	  }else
	  {
		tmp1[0]=read_p->MD[i];
		tmp1[2]=seq[at];
	  }

	  tmp1[1]='2';
	  tmp1[3]='\0';

	  if ((parsed_muts & mut2bit(tmp1))>0) {muts[count++]=at+deleted;} // record mutation position
	  if (count==MAX_MUT-1) {return count;}
	  at++;
	}else {tmp[j++]=read_p->MD[i];} // matches
  }

  return count;
}

