#!/usr/bin/perl -w
use strict;

#person in charge of this script
#this should be the person currently in charge of scripts for this project
my $script_maintainer='Dan Ilut <dci1@cornell.edu>';


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}

my $in_dir=$args{'i'};
my $out_dir=$args{'o'};
my $moveto_dir=$args{'m'};
my $project=$args{'p'};


$project or die "Please specifiy a project (fgn, cgn or pgn) with the -p flag\n";

#set defaults for io, script filename, etc
$in_dir ||= "/data/shared/pgn_data_processing/incoming_files/".$project."/new_files";
$out_dir ||= "/data/shared/pgn_data_processing/trace_files/".$project;
$moveto_dir ||= "/data/shared/pgn_data_processing/incoming_files/".$project."/done_files";


#uniform trailing slashes
$out_dir=~/\/$/ or $out_dir.='/';
$in_dir=~/\/$/ or $in_dir.='/';
$moveto_dir=~/\/$/ or $moveto_dir.='/';


#main body of script
#####################


#make a list of zip files in the incoming directory
#unzip them into the target dir, making subdirs as needed
#remove sequence files
#zip individual tracefiles and remove original trace

my @all_files=&traverse_dir($in_dir);


foreach (@all_files){

    my $start_file=$_;

#process only zip files
    /\.zip$/i or next;

    /\/([^\/]+)$/;
    my $filename=$1;

    my $folder_name=$filename;
    $folder_name=~s/\.zip$//i;


    print "Processing plate $folder_name.\n";

    my $new_location=$out_dir.$folder_name;
    my $syscmd="mkdir $new_location";
    system($syscmd);
#    print "$syscmd\n";

    $syscmd="unzip $_ -d $new_location/";
    system($syscmd);
#    print "$syscmd\n";


#####################################
# START CGN BRANCH                  #
#####################################
    if ($project eq 'cgn'){

#deal with special MGW files
	if ($new_location=~/03092Lin/){

#	    $new_location=~s/\/CCCP[34]//;
	    $syscmd="mv $new_location/CCCP3/*.abi $new_location/";
	    system($syscmd);
	    $syscmd="mv $new_location/CCCP4/*.abi $new_location/";
	    system($syscmd);
	    $syscmd="rm -rf $new_location/CCCP3/*";
	    system($syscmd);
	    $syscmd="rm -rf $new_location/CCCP4/*";
	    system($syscmd);
	    $syscmd="rmdir $new_location/CCCP3";
	    system($syscmd);
	    $syscmd="rmdir $new_location/CCCP4";
	    system($syscmd);
	}
	else{
	    $syscmd="rm $new_location/*.seq";
	    system($syscmd);
	}
    }
#####################################
# END CGN BRANCH                    #
#####################################



    my @traces=&traverse_dir("$new_location/");

    foreach (@traces){ 


#####################################
# START CGN BRANCH                  #
#####################################
	if ($project eq 'cgn'){

#select only trace files
	    /\.ab[i1]$/i or next;

#remove the primer info and sequencer suffix from the name
	    my $file_name=$_;
	    s/_M13R\./\./i;
	    s/\.ab[i1]//i;
	    $syscmd="mv $file_name $_";
	    system($syscmd);

	}
#####################################
# END CGN BRANCH                    #
#####################################




#	my $zip_file=$_;
#	$zip_file=~s/([^\.\/]+)\.[^\/]+$/$1.gz/;
#	$syscmd="zip -j -q -9 $zip_file $_";
#	$system($syscmd);
#	$syscmd="rm $_";
#	system($syscmd);

#use gzip instead of zip so phred can read it on the fly
#if this seems to create errors for people downloading the traces
#revert to the zip compression from above and decompress manually for phred

	$syscmd="gzip -9 $_";
	system($syscmd);


    }

    my $moveto_file=$start_file;
    $moveto_file=~s/^.+\/([^\/]+)$/$moveto_dir$1/;
    $syscmd="mv $start_file $moveto_file";
    system($syscmd);

    print "Done processing $folder_name .\n";

}






#sub to recursively traverse directories and build a list of file names
#######################
sub traverse_dir{
    my ($dirarg)=@_;
    opendir (THISDIR, $dirarg) or die "Couldn't open directory $dirarg\n";
#skip any files starting with .
    my @dir_list= grep !/^\./, readdir THISDIR;
    my ($filename, @filelist);
    foreach $filename (@dir_list){
	$filename=$dirarg.$filename;
	if(-d $filename){
	    $filename.='/';
	   push @filelist,  &traverse_dir($filename); 
       }
	else{
	    push @filelist, $filename;
	}
    }
    return @filelist;
}



