#!/usr/bin/perl -w
use strict;

#person in charge of this script
#this should be the person currently in charge of scripts for the SGN project
my $script_maintainer='Dan Ilut <dci1@cornell.edu>';

use lib '/data/shared/pgn_data_processing/scripts/perllib';

#local packages to use
use runtime;
use db_link;
use projects;


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}

my $seq_in_file=$args{'i'};
my $blast_result=$args{'b'};
my $contamination_target=$args{'c'};
my $project=$args{'p'};

$project or die "Please specify a project (cgn, fgn, pgn) using the -p flag\n";

my ($db, $usr) = @{projects::get_db_info($project)};
$db or die "No known database for project $project";

#set defaults for io, script filename, etc
$seq_in_file ||= "/tmp/seq_to_eval.fasta";
$blast_result ||= "/tmp/seq_vs_ecoli";
$contamination_target ||="/data/shared/pgn_data_processing/E.coli_K12/Ecoli_genome";


my $blast_script="./helper_scripts/batch_blast_generic.pl";
my $min_evalue = '1e-100';

#main body of script
#####################

my $start_time=time;

open FILEOUT, ">$seq_in_file";


# try to open the database
my $dbh = db_link::connect_db($db, $usr) or die "couldn't open database link\n";
my ($stm, $sth, $rv, $rc);

#read in the sequences that haven't been evaluated
my ($trim_id, $seq);
$stm = "select t.trimmed_seq_id, t.sequence_data from trimmed_sequence as t left join quality_evaluation as q on t.trimmed_seq_id=q.trimmed_seq_id where q.qual_criteria_id='1'";
$sth = $dbh->prepare($stm) 
    || die "Can't prepare statement: $DBI::errstr";
$rv = $sth->execute
    || die "Can't execute statement: $DBI::errstr";
$rc = $sth->bind_columns(\$trim_id, \$seq);

while ($sth->fetch){
    print FILEOUT ">$trim_id\n$seq\n";
}

close FILEOUT;


my $syscmd="$blast_script -i $seq_in_file -o $blast_result -t $contamination_target -p blastn -d f -e $min_evalue";

system($syscmd);


open FILEIN, "$blast_result.blast_clean_results";

my %matches=();
my $query;
while (<FILEIN>){
    /^\s*Query= ([0-9]+)/
	and $query=$1;
    /^\s*Sequences/
	and $matches{$query}=1;
}

close FILEIN;

foreach (keys %matches){

#update the quality evaluation
    $stm = "update quality_evaluation set qual_criteria_id='4' where trimmed_seq_id='$_'";
    $sth = $dbh->prepare($stm) 
	|| die "Can't prepare statement: $DBI::errstr";
    $rv = $sth->execute
	|| die "Can't execute statement: $DBI::errstr";

}



db_link::disconnect_db($dbh);



 runtime::runtime_print($start_time, "E.coli contamination check");


