#!/usr/bin/perl
use strict;
use warnings;

# the first paramter is barcode in the format of TGNNN, NNCGNNCGA, etc
# the second parameter is the input fastq file (must be single stranded)
# the output file will be in the same folder as the input file, but with the suffix ".barcode" attached

my ($barcode,$fastq)=@ARGV;
my (%reads,$line1,$line2,$line3,$line4,$bc,$seq,$key1,$key2);

###############  compile regex for barcode  ####################################
$barcode=uc $barcode;
$barcode=~s/N/\[ATCG\]/g;
$barcode="^(".$barcode.")(.*)\n";
$barcode=qr/$barcode/;

###############  read fastq file  #########################

open(FILE_IN,$fastq) or die "Can't open input fastq file!\n";

while ($line1=<FILE_IN>) # read name
{
  $line2=<FILE_IN>; # sequence
  $line3=<FILE_IN>; # dummy line
  $line4=<FILE_IN>; # quality score

  unless ($line2=~/$barcode/) {next;} # the line does not match barcode]
  $bc=$1;
  $seq=$2;
  $line4=substr $line4,length($bc); # remove quality scores corresponding to barcode
  $reads{$bc}->{$seq}={name=>$line1,quality=>$line4}; 
}

close(FILE_IN);

##############  write to new fastq file  #####################

open(FILE_OUT,">".$fastq.".barcode") or die "Can't write to output fastq file!\n";

foreach $key1 (keys %reads)
{
  foreach $key2 (keys $reads{$key1})
  {
    print FILE_OUT $reads{$key1}->{$key2}->{"name"};
    print FILE_OUT $key2."\n";
    print FILE_OUT "+\n";
    print FILE_OUT $reads{$key1}->{$key2}->{"quality"};
  }
}

close(FILE_OUT);







