# this script converts the MiClip format to dCLIP format
#!/usr/bin/perl
use strict;
use warnings;
use POSIX qw/floor/;
use List::Util qw/sum/;

my ($step,$file_in)=@ARGV;
my ($tmp,$i,$flag,@items,@buffer,$id,$chr,$strand,$start,$end);
my ($head,@tag1,@mut1,@tag2,@mut2);

open(FILE_IN,$file_in); # MiClip format
open(FILE_OUT,">".$file_in.".convert"); # dCLIP format
$id=0;

while (<FILE_IN>)
{
  # header
  @items=split("\t",$_); 
  $id++;
  $chr=$items[1];
  $strand=$items[2];
  $start=$items[3];
  $end=$items[4];
  @buffer=();
  $flag=0; # mark whether the CLIP-Seq conditions have at least one tag intensity
  
  # data
  for $i (floor($start/$step)*$step..floor($end/$step)*$step+$step-1)
  {
    if ($i % $step==0) # form header 
    {
      @tag1=();@mut1=();@tag2=();@mut2=();
      $head=join("\t",$id,$chr,$i,$strand);
    }

    if ($i<$start || $i>$end) # append extra 0s 
    {
      @tag1=(@tag1,0);@mut1=(@mut1,0);@tag2=(@tag2,0);@mut2=(@mut2,0); 
    }else
    {
      $tmp=<FILE_IN>; # read data line
      @items=split("\t",$tmp); # split data line
      @tag1=(@tag1,$items[0]/($items[2]+5)); # the 5 here is chosen so as to mask regions with very small counts
      @tag2=(@tag2,$items[1]/($items[3]+5)); # can be added as an option in future version
      @mut1=(@mut1,$items[4]);
      @mut2=(@mut2,$items[5]);
    }

    if (($i+1) % $step==0) # finished reading $step bases of data
    {
      @buffer=(@buffer,join("\t",$head,@tag1,@mut1,@tag2,@mut2)."\n");
      if (sum(@tag1,@tag2)>0) {$flag=1;}
    }
  }
  
  # print only if the CLIP-Seq samples have at least one tag count
  if ($flag>0) {print FILE_OUT @buffer;}
}

close(FILE_IN);
close(FILE_OUT);
unlink($file_in);
rename($file_in.".convert",$file_in);

