q#!/usr/bin/perl -w
use strict;

#person in charge of this script
#this should be the person currently in charge of scripts for this project
my $script_maintainer='Dan Ilut <dci1@cornell.edu>';


#Program designed to log in and retrieve sequences from the PSU (Penn State University) LIMS system
#Logs into the PSU machine via ssh using key-based authentication to retrieve the list of files, and uses scp with the same authentication mechanism to do the actual copying



@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}

my $new_dir=$args{'i'};
my $done_dir=$args{'d'};

my $login_user = 'transfer';
my $login_identification = '/data/shared/pgn_data_processing/scripts/processing_components/data_files/fgn_psu_ssh/identification';
my $psu_machine_ip = '146.186.29.44';

#set defaults for io, script filename, etc
$new_dir ||= "/data/shared/pgn_data_processing/incoming_files/fgn/new_files/";
$done_dir ||= "/data/shared/pgn_data_processing/incoming_files/fgn/done_files/";


#uniform trailing slashes
$new_dir=~/\/$/ or $new_dir.='/';
$done_dir=~/\/$/ or $done_dir.='/';




#main body of script
#####################


#Get a list of files we already have

my @local_files=&traverse_dir($done_dir);

my %local_filenames=();
foreach (@local_files){
    /\/([^\/]+\.zip)/
	and $local_filenames{$1}++;
}


#Get a list of the remote files
open PS_FILES, "ssh -l $login_user -i $login_identification $psu_machine_ip 'ls *.zip' |";

#get a list of the new ones
my @files_toget=();

while(<PS_FILES>){
    chomp;

    $local_filenames{$_} and next;
    push @files_toget, $_;

}

close PS_FILES;


#get the new files
my $filecounter=0;
my $totalcount=@files_toget;


if ($totalcount){
    print "Retrieving $totalcount new files.\n";
}
else{
    print "No new files to transfer.\n";
    exit;
}

foreach (@files_toget){

    $filecounter++;

    print "Getting $_ (file $filecounter of $totalcount)\n";
    my $syscmd="scp -i $login_identification $login_user\@$psu_machine_ip:$_ $new_dir";
    system($syscmd);
}



print "Done retrieving sequences\n";


#sub to recursively traverse directories and build a list of file names
#######################
sub traverse_dir{
    my ($dirarg)=@_;
    opendir (THISDIR, $dirarg) or die "Couldn't open directory $dirarg\n";
#skip any files starting with .
    my @dir_list= grep !/^\./, readdir THISDIR;
    my ($filename, @filelist);
    foreach $filename (@dir_list){
	$filename=$dirarg.$filename;
	if(-d $filename){
	    $filename.='/';
	   push @filelist,  &traverse_dir($filename); 
       }
	else{
	    push @filelist, $filename;
	}
    }
    return @filelist;
}



