//this function is used to parse the flag
char parse_flag(int flag)
{
  if (flag==0) {return '+';}
  else if (flag==16) {return '-';}
  else {return 'n';}
}

// this function is for parsing the quality string into mean base-wise quality score
float parse_qual(char *qual)
{
  int len=strlen(qual);
  float sum=0;
  int i;
  for(i=0;i<=len;i++) {sum+=qual[i];}
  return sum/len;
}

// this function checks whether there is an "N", "P", "=" or "X" in the CIGAR field, 1 yes, 0 otherwise
int check_CIGAR(char *CIGAR)
{
  int len=strlen(CIGAR);
  int i;

  // this if command helps check the validity of the return value of strtok
  if (CIGAR==NULL) {return 1;} 
  for(i=0;i<=len;i++) {if (CIGAR[i]=='N' || CIGAR[i]=='P' || CIGAR[i]=='=' || CIGAR[i]=='X') return 1;}
  return 0;
}

// this function calculates the length of mapping from the CIGAR field
int get_len(char *CIGAR)
{
  int len=0;
  char tmp[100];
  int i;
  int j=0;

  for(i=0;i<=strlen(CIGAR);i++) 
  {
    if (CIGAR[i]>=48 && CIGAR[i]<=57) {tmp[j++]=CIGAR[i];} // read numbers
	else if (CIGAR[i]=='M' || CIGAR[i]=='D') // read match or deletion
	{
	  tmp[j]='\0';
	  len+=atoi(tmp);
	  j=0;
	}else {j=0;} // if not match or deletion
  }

  return len;
}

// this function initialize a Read struct and set all default values
struct Read *ini_Read()
{
  struct Read *read_p=(struct Read*)malloc(sizeof(struct Read));
  if (read_p==NULL) {return NULL;}

  read_p->bigger=NULL;
  read_p->smaller=NULL;
  read_p->chr=NULL;
  read_p->CIGAR=NULL;
  read_p->seq=NULL;
  read_p->group=0;
  read_p->len=0;
  read_p->MD=NULL;
  read_p->name=NULL;
  read_p->pos=0;
  read_p->qual=0;
  read_p->strand='n';

  return read_p;
}

// the get_Read function parses a string line into a struct Read object 
// if the first character is @, returns 0, otherwise returns 1 
struct Read *get_Read(char *line,int group)
{ 
  struct Read *read_p;
  int i;
  char *tmp;

  // check header line
  if (line[0]=='@') {return NULL;} 
  
  // create Read struct
  read_p=ini_Read();
  if (read_p==NULL) {return NULL;}

  hard_copy(&read_p->name,strtok(line,"\t")); // name
  
  read_p->strand=parse_flag(atoi(strtok(NULL,"\t"))); // strand  
  // check if the read is properly mapped
  if (read_p->strand=='n') 
  {
	free_leaf(read_p,NULL);
	return NULL;
  }

  hard_copy(&read_p->chr,strtok(NULL,"\t")); // chr
  sscanf(strtok(NULL,"\t"),"%lu",&(read_p->pos)); // pos
  strtok(NULL,"\t"); 

  hard_copy(&read_p->CIGAR,strtok(NULL,"\t"));  // CIGAR
  // check if the CIGAR string has Ns (or rarely, if strtok return a NULL)
  if (check_CIGAR(read_p->CIGAR)==1) 
  {
	free_leaf(read_p,NULL);
	return NULL;
  }

  read_p->len=get_len(read_p->CIGAR); // len
  for (i=0;i<3;i++) {strtok(NULL,"\t");}
  hard_copy(&read_p->seq,strtok(NULL,"\t")); // seq
  read_p->qual=parse_qual(strtok(NULL,"\t")); // qual
  read_p->group=group; // group
  
  // look for MD field
  while (tmp=strtok(NULL,"\t"))
  {
    if (tmp[0]=='M' && tmp[1]=='D') 
    {
      hard_copy(&read_p->MD,tmp);
	  if (read_p->MD[strlen(read_p->MD)-1]=='\n') {read_p->MD[strlen(read_p->MD)-1]='\0';}
      break;
    }
  }

  if (read_p->MD==NULL) {hard_copy(&read_p->MD,"0");}

  if (read_p->name==NULL || read_p->chr==NULL || read_p->CIGAR==NULL || read_p->seq==NULL || read_p->MD==NULL)
  {
    free_leaf(read_p,NULL);
	return NULL;
  }

  return read_p; 
}  
