#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use Cwd 'abs_path';
use File::Temp qw/tempfile/;

##########  get options  ##################

my ($fr,$clip,$control,$dir,$step,$mut_type,$pair,$count_only,$help,$dup,$link);

$dir="."; # directory of output files
$clip=""; # string of CLIP-Seq files
$control=""; # string of control files
$step=5; # step size
$mut_type=""; # mutation type, e.g. "T2C","Del","A2G,T2C"
$pair=""; # suffix in paired-end mode
$count_only=0; # whether to only give total/mutant tag counts 
$help=0; # print help message
$dup='c'; # whether to remove duplicate reads

GetOptions
(
  'clip=s'=>\$clip,
  'control=s'=>\$control,
  'dir=s'=>\$dir,
  'step=s'=>\$step,
  'duplicate=s'=>\$dup,
  'mut=s'=>\$mut_type,
  'pair=s'=>\$pair,
  'only'=>\$count_only,
  'help'=>\$help,
  'link=s'=>\$link,
  'fr=s'=>\$fr,
  'h'=>\$help
);

if ($help) {help();}
if ($clip eq "" || $control eq "" || $mut_type eq "") {die "Input parameters not complete!\n";}
$dir=sub_home($dir);
unless (-d $dir) {mkdir $dir or die "Can't create output directory!\n";}
if (($dup ne 'c') && ($dup ne 'a') && ($dup ne 's')) {die "Unrecognized option for -duplicate!\n";}

############  preprocess reads  ######################

my (@clip,@control,@combined);
my ($preprocess_exe,$inference_exe);
my ($config,$src_path,$bin_path);
my ($fh,@merged,$tempfile1,$tempfile2,$temp);

# get the name of the executables
$bin_path=abs_path($0);
$bin_path=~s/\/MiClip.pl//;
$src_path=$bin_path;
$src_path=~s/bin\/normalize/src/;

open(CONFIGURE,$src_path."/configure.txt");
$config=<CONFIGURE>;
($preprocess_exe,$inference_exe)=split(" ",$config);
close(CONFIGURE);

# get each replicate
@clip=split(",",$clip);
@control=split(",",$control);
map {$_=~s/^\s*|\s*$//g;$_=sub_home($_);} @clip;
map {$_=~s/^\s*|\s*$//g;$_=sub_home($_);} @control;
@combined=(@clip,@control);

# if the reads are in paired-end mode
@merged=();
if ($pair=~/,/)
{
  foreach (@combined)
  {
    ($fh,$temp)=tempfile(DIR=>$dir,SUFFIX=>".txt");
    @merged=(@merged,$temp);
    system("perl ".$bin_path."/merge_pair.pl \"".$pair."\" ".$_." ".$merged[$#merged]." ".$mut_type." ".$fr); # paired-end files are merged to single-end files
  }

  print "Merging paired-end reads finished\n";
  $mut_type="Del"; # mutation type is changed to the dummy "Del"
  @combined=@merged;
}

# run c executables

($fh,$tempfile1)=tempfile(DIR=>$dir,SUFFIX=>".txt"); # cluster file 
$tempfile2=$link; # count file

print "  Preprocessing reads\n";
system($preprocess_exe." ".$tempfile1." ".$tempfile2." ".$mut_type." ".$dup." ".join(" ",@combined));

###############  finding enriched bins  #################

#print "  Normalize tag intensity\n";
#system($inference_exe." ".$tempfile2." ".($#combined+1)." ".$step);

###############  clean up  ############################

END
{
  map {unlink($_);} @merged;
  if (defined $tempfile1) {unlink($tempfile1);}
#  if (defined $tempfile2) {unlink($tempfile2);}
}

exit;

############  print help message  ####################

sub help
{
  my $path=abs_path($0);
  $path=~s/bin[\/\\]+MiClip.pl//;

  open(HELP,$path."/README.txt");
  while (<HELP>) {if ($_=~/USER MANUAL/) {last;}}
  while (<HELP>)
  {
    if ($_=~/#/) {last;}
    print $_;
  }
  close(HELP);

  exit;
}

###########  other subroutines  ######################

# substitute the "~" with the $HOME variable
sub sub_home
{
  my $path=$_[0];
  my $home=$ENV{"HOME"};

  $path=~s/^~/$home/;
  return $path;
}



