#!/usr/bin/perl -w
use strict;

#set the location for custom packages used by the scripts
use runtime;
use db_link;
use projects;


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}


############################################################
#
#  NOTE:  
#  This assumes that the query identifier for the search
#  sequence was the unigene id, and only the unigene id.
#  So "Query= 10023" in the input file should mean that
#  this is the result of blasting with unigene 10023.
#
############################################################       


my $filein = $args{'i'};
my $project = $args{'p'};
my $target_id = $args{'t'};


$project or die "Please specify a project (cgn, fgn, pgn) using the -p flag\n";
$target_id or die "Please specify the blast target database id using the -t flag\n";


my ($db, $usr) = @{projects::get_db_info($project)};
$db or die "No known database for project $project";

my %annotated_unigenes=();



# try to open the database
my $dbh = db_link::connect_db($db, $usr) or die "couldn't open database link\n";
my ($stm, $sth, $rv, $rc);


#parse the cleaned blast results
open FILEIN, $filein or die "Couldn't opne $filein for read\n" ;

my $unigene='';
my $hit_summary='';
my $hit_details='';
my $detail='';

while(<FILEIN>){

    chomp;

#skip blank lines
    if (/^\s*$/){
	next;
    }

#set the current unigene id
    if(/^Query=\s+([0-9]+)\s*/){

	my $new_id=$1;

#store the last entry from the previous query (if any)
#clear the temp details variable
#change the unigene to the new query

	if($unigene){
	    if($detail){

#cleanup spacing issues
		$detail =~ s/\s{3,}/  /g;
		$detail =~ s/\'/\\\'/g;
		$detail =~ s/\"/\\\"/g;
		push @{$annotated_unigenes{$unigene}{'detail'}}, $detail;
	    }

	    $detail = '';
	


#load and forget this unigene

#	    $unigene or die "<Simpsons>Unpossible!!!</Simpsons>\n";
#	    $annotated_unigenes{$unigene}{'summary'} or die "No summary for $unigene\n";
#	    $annotated_unigenes{$unigene}{'detail'} or die "No detail for $unigene\n";

#only load if there is a match

	    if ($annotated_unigenes{$unigene}{'summary'}){
		my $i;
		for ($i=0;$i<@{$annotated_unigenes{$unigene}{'summary'}};$i++){
		    

		    $annotated_unigenes{$unigene}{detail}[$i] or next;

		    $stm="insert into unigene_blast_result (unigene_id, blast_target_id, match_description, match_score, evalue) values ('$unigene', '$target_id', '$annotated_unigenes{$unigene}{detail}[$i]', '$annotated_unigenes{$unigene}{summary}[$i][0]', '$annotated_unigenes{$unigene}{summary}[$i][1]')";
		    
#		    print "$stm\n";
		
		    $sth = $dbh->prepare($stm) 
			|| die "Can't prepare statement: $DBI::errstr";
		    $rv = $sth->execute
			|| die "Can't execute statement: $DBI::errstr";

		}
	    }

	    delete $annotated_unigenes{$unigene};

	}


#set the new unigene id
	$unigene=$new_id;
	next;
    }

#check to see if we are in the summary or details section
    if (/^\s*Sequences\s/){
	$hit_summary = 'true';
	next;
    }

    if (/^>/){
	$hit_summary = 'false';
	$hit_details = 'true';
    }

    if (/^\s*Length\s+=\s+[0-9]+/){
	$hit_details='false';
    }


#parse the summary
    if ($hit_summary eq 'true'){
	my ($defline, $spacer1, $score, $spacer2, $evalue) = unpack ("A67a3A3a3A5", $_);

#prepare for database load
	$score=~s/\'/\\\'/g;
	$score=~s/\"/\\\"/g;
	$evalue=~s/\'/\\\'/g;
	$evalue=~s/\"/\\\"/g;

	push @{$annotated_unigenes{$unigene}{'summary'}}, [$score, $evalue];
	next;
    }


#parse the detail
    if ($hit_details eq 'true'){
	if (/^>(.+)$/){

	    if($detail){

#cleanup spacing issues
		$detail =~ s/\s{3,}/  /g;
		$detail =~ s/\'/\\\'/g;
		$detail =~ s/\"/\\\"/g;
		push @{$annotated_unigenes{$unigene}{'detail'}}, $detail;
	    }

	    $detail = $1;
	}
	else{
	    $detail .= $_;
	}
	next;
    }

}

close FILEIN;



#close the db link

db_link::disconnect_db($dbh);

