#!/usr/bin/perl -w
use strict;

#person in charge of this script
#this should be the person currently in charge of scripts for the SGN project
my $script_maintainer='Dan Ilut <dci1@cornell.edu>';

use lib '/data/shared/pgn_data_processing/scripts/perllib';

#local packages to use
use runtime;
use db_link;
use projects;


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}

my $seq_in_file=$args{'i'};
my $qual_in_file=$args{'a'};
my $seq_out_file=$args{'o'};
my $qual_out_file=$args{'q'};
my $vector_file=$args{'f'};
my $project=$args{'p'};
my $cross_match=$args{'c'};
my $verbose=$args{'v'};
my $screening_output=$args{'v'};

$project or die  "Please specify a project (cgn, fgn, pgn) using the -p flag\n";

my ($db, $usr) = @{projects::get_db_info($project)};

$db or die "No known database for project $project";


#set defaults for io, script filename, etc
$seq_in_file ||="/tmp/seqs_fasta";
$qual_in_file ||="/tmp/seqs_fasta.qual";
$seq_out_file ||="/tmp/seqs_fasta.screen";
$qual_out_file ||="/tmp/seqs_fasta.screen.qual";
$vector_file ||= "./data_files/vector.seq";
$cross_match ||= "/usr/local/bin/cross_match";
$screening_output ||= "/tmp/screen.out";


my $minmatch=12;
my $minscore=20;


#main body of script
#####################


#get the sequences from the database

$verbose and print "Getting sequences\n";

# try to open the database
my $dbh = db_link::connect_db($db, $usr) or die "couldn't open database link\n";
my ($stm,$sth,$rv,$rc);

#get the raw sequences that haven't been processed yet
my %raw_seq=();
my ($raw_seq_id, $seq_data);
$stm = "select r.raw_seq_id, r.sequence_data from raw_sequence as r left join trimmed_sequence as t on r.seq_id=t.seq_id where t.seq_id is null";
$sth = $dbh->prepare($stm) 
    || die "Can't prepare statement: $DBI::errstr";
$rv = $sth->execute
    || die "Can't execute statement: $DBI::errstr";
$rc = $sth->bind_columns(\$raw_seq_id, \$seq_data);
while($sth->fetch){
    $raw_seq{$raw_seq_id}=$seq_data;
}

$verbose and print "Sequences retrieved, getting quality data.\n";

#get the raw quality values
my %raw_qual=();
my ($q_raw_seq_id, $qual_val);
$stm = "select q.raw_seq_id, q.quality_values from raw_sequence_quality as q left join trimmed_sequence_quality as t on q.seq_id=t.seq_id where t.seq_id is null";
$sth = $dbh->prepare($stm) 
    || die "Can't prepare statement: $DBI::errstr";
$rv = $sth->execute
    || die "Can't execute statement: $DBI::errstr";
$rc = $sth->bind_columns(\$q_raw_seq_id, \$qual_val);
while($sth->fetch){
    $raw_qual{$q_raw_seq_id}=$qual_val;
}


 db_link::disconnect_db($dbh);


foreach (keys %raw_seq){
    $raw_qual{$_} or print "Warning: No qual for $_\n";
}


$verbose and print "Quality values retrieved, outputing to file.\n";

#write it out to file
open SEQOUT, ">$seq_in_file";
open QUALOUT, ">$qual_in_file";

foreach (keys %raw_seq){
    print SEQOUT ">$_ \n$raw_seq{$_}\n";
    print QUALOUT ">$_ \n$raw_qual{$_}\n";
}

close SEQOUT;
close QUALOUT;


#do the vector screening

$verbose and print "Running cross_match for vector screening.\n";

my $syscmd="$cross_match $seq_in_file $vector_file -minmatch $minmatch -minscore $minscore -screen > $screening_output";
my $start_time=time;
system($syscmd);
$syscmd="cp $qual_in_file $qual_out_file";

system ($syscmd);

 runtime::runtime_print($start_time, "Vector screening");

