#!/usr/bin/perl -w
use strict;

#set the location for custom packages used by the scripts
use lib '/soldb/website/perllib';
use runtime;
use db_link;
use projects;


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}

my $filein = $args{'i'};
my $project = $args{'p'};
my $target_id = $args{'t'};


$project or die "Please specify a project (cgn, fgn, pgn) using the -p flag\n";
$target_id or die "Please specify the blast target database id using the -t flag\n";


my ($db, $usr) = @{projects::get_db_info($project)};
$db or die "No known database for project $project";

my %annotated_ests=();


#parse the cleaned blast results
open FILEIN, $filein or die "Couldn't opne $filein for read\n" ;

my $est='';
my $hit_summary='';
my $hit_details='';
my $detail='';

while(<FILEIN>){

    chomp;

#skip blank lines
    if (/^\s*$/){
	next;
    }

#set the current clone name
    if(/^Query=/){

#pull out "Singlet" name
	s/Singlet\-//;

#parse singlets
	if (/^Query=\s+CGN-EST-([0-9]+)\s+/){
	    $est = $1;  #this stores the seq_id value
	    next;
	}

#parse contigs
	if (/^Query=\s+Contig([0-9]+)\-Cluster\-([0-9]+)/){
	    $est = $2 . '_' . $1;  #this stores the contig identifier
	    next;
	}

#shouldn't reach this unless it couldn't match
	print "Error: couldn't extract identifier from $_\n";
	next;
    }

#check to see if we are in the summary or details section
    if (/^\s*Sequences\s/){
	$hit_summary = 'true';
	next;
    }

    if (/^>/){
	$hit_summary = 'false';
	$hit_details = 'true';
    }

    if (/^\s*Length\s+=\s+[0-9]+/){
	$hit_details='false';
    }


#parse the summary
    if ($hit_summary eq 'true'){
	my ($defline, $spacer1, $score, $spacer2, $evalue) = unpack ("A67a3A3a3A5", $_);

#prepare for database load
	$score=~s/\'/\\\'/g;
	$score=~s/\"/\\\"/g;
	$evalue=~s/\'/\\\'/g;
	$evalue=~s/\"/\\\"/g;

	push @{$annotated_ests{$est}{'summary'}}, [$score, $evalue];
	next;
    }


#parse the detail
    if ($hit_details eq 'true'){
	if (/^>(.+)$/){

	    if($detail){

#cleanup spacing issues
		$detail =~ s/\s{3,}/  /g;
		$detail =~ s/\'/\\\'/g;
		$detail =~ s/\"/\\\"/g;
		push @{$annotated_ests{$est}{'detail'}}, $detail;
	    }

	    $detail = $1;
	}
	else{
	    $detail .= $_;
	}
	next;
    }

}

close FILEIN;





#    foreach (keys %annotated_ests){
#	print "$_:\n";
#	my $i;
#	for ($i=0;$i<@{$annotated_ests{$_}{'summary'}};$i++){
#	    print "$i:\tScore=" . $annotated_ests{$_}{'summary'}[$i][0] . "\tEvalue=" . $annotated_ests{$_}{'summary'}[$i][1] . "\n";
#	    print "Detail: " . $annotated_ests{$_}{'detail'}[$i] . "\n";
#	}
#	print '='x80 . "\n"; 
#    }




# try to open the database
my $dbh = db_link::connect_db($db, $usr) or die "couldn't open database link\n";
my ($stm, $sth, $rv, $rc);

foreach (keys %annotated_ests){

    my $est_id;


#get the unigene id for contigs
    if (/\_/){
	$stm="select unigene_id from unigenes as u, unigene_assembly as a where u.unigene_element_id=a.unigene_assembly_id and a.contig='$_' and u.unigene_element_type_id='1' and u.unigene_build_id='$est_build'";
	$sth = $dbh->prepare($stm) 
	    || die "Can't prepare statement: $DBI::errstr";
	$rv = $sth->execute
	    || die "Can't execute statement: $DBI::errstr";
	$rc = $sth->bind_columns(\$est_id);
	unless($sth->fetch){
	    print "couldn't find contig id for $_\n";
	    next;
	}
    }

#or get the unigene_id for singlets
    else{
	$stm="select unigene_id from unigenes as u, unigene_singleton as s where u.unigene_element_id=s.unigene_singleton_id and s.seq_id='$_' and u.unigene_element_type_id='2' and u.unigene_build_id='$est_build'";
	$sth = $dbh->prepare($stm) 
	|| die "Can't prepare statement: $DBI::errstr";
    $rv = $sth->execute
	|| die "Can't execute statement: $DBI::errstr";
    $rc = $sth->bind_columns(\$est_id);
    unless($sth->fetch){
	print "couldn't find singlet id for $_\n";
	next;
    }

}


    my $i;
    for ($i=0;$i<@{$annotated_ests{$_}{'summary'}};$i++){
	$stm="insert into unigene_blast_result (unigene_id, blast_target_id, match_description, match_score, evalue) values ('$est_id', '$target_id', '$annotated_ests{$_}{detail}[$i]', '$annotated_ests{$_}{summary}[$i][0]', '$annotated_ests{$_}{summary}[$i][1]')";
    $sth = $dbh->prepare($stm) 
	|| die "Can't prepare statement: $DBI::errstr";
    $rv = $sth->execute
	|| die "Can't execute statement: $DBI::errstr";

    }

}

#close the db link

db_link::disconnect_db($dbh);

