#!/usr/bin/perl
use strict;
use warnings;
use POSIX qw/floor/;
$SIG{__WARN__}=sub{die "SAM file format error!\n"};

my ($fr,$suffix,$file_tag,$file_merge)=@ARGV;
my (%fr,$i,%tags,@items,$tag,$name);
my ($suffix_f,$suffix_r);
my $md_field=-1;

if ($fr eq "fr-second") {%fr=("flag83"=>"-","flag99"=>"+");}
if ($fr eq "fr-first") {%fr=("flag83"=>"+","flag99"=>"-");}

##################  find mates  ######################################

print "    Read tags\n";

($suffix_f,$suffix_r)=split(",",$suffix);
$suffix_f='^(.*)'.$suffix_f.'$';
$suffix_r='^(.*)'.$suffix_r.'$';
$suffix_f=qr/$suffix_f/;
$suffix_r=qr/$suffix_r/;

open(FILE_IN,$file_tag) or die "read alignment file ".$file_tag." failed!\n";

while (<FILE_IN>) # find mates
{
  if ($_=~/^@/) {next;}
  @items=split("\t",$_);
  unless ($items[5]!~/N/ && ($items[1]==147 || $items[1]==163 || $items[1]==83 || $items[1]==99)) {next;} # delete gapped mapping and keep only mapped reads
  if ($_=~/MD\:Z\:(.+?)($|\t)/) {$md_field=$1;} else {$md_field=-1;} # find MD field
  
  # information is stored as pos, CIGAR, seq and MD
  if ($items[1]==83)
  {
    $items[0]=~/$suffix_f/;
    $name=$1."_".($items[3]+$items[7]);
    $tags{$name}->{"strand"}=$fr{"flag83"};
    $tags{$name}->{"chr"}=$items[2];
    $tags{$name}->{"f"}=[$items[3],$items[5],$items[9],$md_field];
  }elsif ($items[1]==99)
  {
    $items[0]=~/$suffix_f/;
    $name=$1."_".($items[3]+$items[7]);
    $tags{$name}->{"strand"}=$fr{"flag99"};
    $tags{$name}->{"chr"}=$items[2];
    $tags{$name}->{"f"}=[$items[3],$items[5],$items[9],$md_field];
  }elsif ($items[1]==147)
  {
    $items[0]=~/$suffix_r/;
    $name=$1."_".($items[3]+$items[7]);
    $tags{$name}->{"r"}=[$items[3],$items[5],$items[9],$md_field];
  }else
  {
    $items[0]=~/$suffix_r/;
    $name=$1."_".($items[3]+$items[7]);
    $tags{$name}->{"r"}=[$items[3],$items[5],$items[9],$md_field];
  }
}

close(FILE_IN);

##################  find mates  ######################################

print "    Write merged mates\n";

open(FILE_OUT,">".$file_merge) or die "can't write to file ".$file_merge."!\n";

foreach $tag (keys %tags) # write mates into file
{
  if (defined $tags{$tag}->{"f"} && $tags{$tag}->{"r"} && abs($tags{$tag}->{"f"}->[0]-$tags{$tag}->{"r"}->[0])<500)
  {
    # tag name, chr, strand, f_pos, f_CIGAR, f_seq, f_MD, r_pos, r_CIGAR, r_seq and r_MD
    print FILE_OUT $tag."\t".$tags{$tag}->{"chr"}."\t".$tags{$tag}->{"strand"}."\t".join("\t",@{$tags{$tag}->{"f"}})."\t".join("\t",@{$tags{$tag}->{"r"}})."\n";
  }
}

close(FILE_OUT);

#####################################################################

#10010011 Forward->second 147
#10100011 Reverse->second 163
#01010011 Reverse->first 83
#01100011 Forward->first 99
