#!/usr/bin/perl -w
use strict;

#person in charge of this script
#this should be the person currently in charge of scripts for the SGN project
my $script_maintainer='Dan Ilut <dci1@cornell.edu>';

use lib '/data/shared/pgn_data_processing/scripts/perllib';

#local packages to use
use runtime;


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}
my $seq_out_file=$args{'o'};
my $qual_out_file=$args{'q'};
my $in_dir=$args{'i'};
my $move_dir=$args{'m'};
my $tmp_dir=$args{'t'};
my $project=$args{'p'};
my $phred_param_file=$args{'e'};
my $dye_chem=$args{'d'};
my $source_info_dir=$args{'s'};

$project or die "Please specify a project (cgn, fgn, pgn) using the -p flag\n";

#set defaults for io, script filename, etc
$seq_out_file ||="/tmp/basecall_seqs_fasta";
$qual_out_file ||="/tmp/basecall_seqs_fasta.qual";
$in_dir ||= "/data/shared/pgn_data_processing/trace_files/".$project;
$move_dir ||= "/data/shared/pgn_data_processing/processed_traces/".$project;
$source_info_dir ||= '/soldb/pgn_data_processing/incoming_files/'.$project.'/file_source/';
$tmp_dir ||="/tmp/";
$phred_param_file ||= '/usr/local/etc/phredpar.dat';


#uniform trailing slashes
$in_dir=~/\/$/ or $in_dir.='/';
$move_dir=~/\/$/ or $move_dir.='/';
$source_info_dir=~/\/$/ or $source_info_dir.='/';


my $trace_list_file=$tmp_dir."tracelist.txt";




#main body of script
#####################


#create a list of files to read
open TRACELIST, ">$trace_list_file";

my @all_files=&traverse_dir($in_dir);

print "Found ".int(@all_files)." files\n";

my @move_files=();

#####################################
# START CGN BRANCH                  #
#####################################
if ($project eq 'cgn'){

    foreach (@all_files){
#default dye
	$dye_chem ||= 'i';

#skip control traces
	/\/pgem/ and next;
	
#clean up sequence file name
	
#change all to lowercase, eliminate underscores, and add chemistry info
#add folder name to name as well, for parsing out later
	
	my $syscmd="mv $_ ";
	
	/(.+\/)([^\/]+)$/;
	my ($path, $filename)=($1,$2);

#skip changes on those already processed
	unless($filename=~/\#\#/){

#lowercase the filename and append primer info
	    $filename=~tr/A-Z_/a-z/d;
	    $filename=~/^[^\.]+\.$dye_chem/
		or $filename=~s/([^\.]+)\./$1.$dye_chem./;

#append the folder name to the filename
	    $path=~/^.+\/([^\/]+)\/$/
		and $filename=$1 . '##' . $filename;
	}

	$syscmd.=$path.$filename;
	
#rename the file if needed
	unless ($_ eq $path.$filename){
	    system($syscmd);
	}

	push @move_files, "$path$filename";
	print TRACELIST "$path$filename\n";
	
    }
}
#####################################
# END CGN BRANCH                    #
#####################################


#####################################
# START FGN BRANCH                  #
#####################################
if ($project eq 'fgn'){

    foreach (@all_files){

# We assume that at this point all the files have the proper nomenclature
# consisting of library-plate-location.dye
# If that's not the case, contact Kerr Wall (pkerrwall@psu.edu) and ask him
# to fix it.

	my $syscmd="mv $_ ";

	/(.+\/)([^\/]+)$/;
	my ($path, $filename)=($1,$2);


#skip changes on those already processed
	unless($filename=~/\#\#/){

#lowercase the file name
	    $filename=~tr/A-Z/a-z/d;
		
#add folder name to file
	    $path=~/^.+\/([^\/]+)\/$/
		and $filename=$1 . '##' . $filename;
	}
	
	$syscmd.=$path.$filename;
	

#rename the file if needed
	unless ($_ eq $path.$filename){
	    
	    system($syscmd);
#	    print "$syscmd\n";
	    
	}
	push @move_files, "$path$filename";

#########DEBUG#########
	print "$path$filename\n";

	print TRACELIST "$path$filename\n";
	
    }
}
#####################################
# END FGN BRANCH                    #
#####################################



#####################################
# START PGN BRANCH                  #
#####################################
if ($project eq 'pgn'){

    foreach (@all_files){

	my $syscmd=" mv $_ ";

	/(.+\/)([^\/]+)$/;
	my ($path, $filename)=($1,$2);


#skip changes on those already processed
	unless($filename=~/\#\#/){

#lowercase the file name
	    $filename=~tr/A-Z/a-z/d;
		
#add folder name to file
	    $path=~/^.+\/([^\/]+)\/$/
		and $filename=$1 . '##'. $filename;
	}
	
	$syscmd.=$path.$filename;
	

#rename the file if needed
	unless ($_ eq $path.$filename){
	    
	    system($syscmd);
#	    print "$syscmd\n";
	    
	}
	push @move_files, "$path$filename";
	print TRACELIST "$path$filename\n";



    }
}
#####################################
# END PGN BRANCH                    #
#####################################




close TRACELIST;




#do the basecalling

#remove trailing slash from temp dir name
$tmp_dir=~s/\/$//;



#make sure the environment knows where the param file is
#this needs to be done before running phred
$ENV{PHRED_PARAMETER_FILE}=$phred_param_file;

my $syscmd="phred -if $trace_list_file -sa $seq_out_file -qa $qual_out_file -zt $tmp_dir";
my $start_time=time;

system($syscmd) and die "Phred run failed.\n";

 runtime::runtime_print($start_time, "Basecalling");


#move processed files
#the script will not move files for which it can't create the directories needed
print "Moving processed files to $move_dir\n";
my ($file, $filepath, $dir, @dirpath, $dir_error, @move_errors);

foreach $file (@move_files){
    $filepath=$file;
    $filepath =~ s/$in_dir//;
    @dirpath = split '/', $filepath;
  print "Trying to move file ".  (pop @dirpath) . "...\n";

    $dir=$move_dir;

#create the necessary directories to move the file into
    while (@dirpath){
	$dir.=shift @dirpath;
	$dir_error="";
	if (-e $dir){
	    unless (-d $dir){
		$dir_error="mkdir error - non-directory file $dir exists";
		last;
	    }
	}
	else{
	    unless (mkdir $dir, 0755) {
		$dir_error="mkdir error - failed to create directory $dir";
		last;
	    }
	}
	$dir.='/'; 
   }


    if ($dir_error){
	push @move_errors, "$file : $dir_error";
	print "Failed to move file : $dir_error\n";
	next;
    }

#move the file
#this could probably be done better, using the system 'mv' utility for now

#checking to make sure we're not overwriting files from previous uploads
#add a duplicate index number at the end of the file if filename exists

    my $dest=$file;
    $dest=~ s/$in_dir/$move_dir/;
    $dest=~ s/\/[^\#\/]+\#\#/\//;
    my $index=0;
    while(-e $dest){
	if($index){
	    $dest=~s/\.[0-9]+$//;
	}
	$dest.= '.' . ++$index;
    }

#    print "Moving $file to $dest\n";
    $syscmd = "mv -f $file $dest";
    system ($syscmd);
    print "File moved.\n";

}





#sub to recursively traverse directories and build a list of file names
#######################
sub traverse_dir{
    my ($dirarg)=@_;
    opendir THISDIR, $dirarg;
#skip any files starting with .
    my @dir_list= grep !/^\./, readdir THISDIR;
    my ($filename, @filelist);
    foreach $filename (@dir_list){
	$filename=$dirarg.$filename;
	if(-d $filename){
	    $filename.='/';
	   push @filelist,  &traverse_dir($filename); 
       }
	else{
	    push @filelist, $filename;
	}
    }
    return @filelist;
}



