#!/usr/bin/perl -w
use strict;

#person in charge of this script
#this should be the person currently in charge of scripts for the SGN project
my $script_maintainer='Dan Ilut <dci1@cornell.edu>';

use lib '/data/shared/pgn_data_processing/scripts/perllib';

#local packages to use
use runtime;
use db_link;
use projects;


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}

my $in_file=$args{'i'};
my $project=$args{'p'};
my $out_file=$in_file . '.fasta';

$in_file or die "Please specify a list of unigenes to grab\n";
$project or die "Please specify a project (cgn, fgn, pgn) using the -p flag\n";

my ($db, $usr) = @{projects::get_db_info($project)};
$db or die "No known database for project $project";


#main body of script
#####################

my $start_time=time;

# try to open the database
my $dbh = db_link::connect_db($db, $usr) or die "couldn't open database link\n";
my ($stm, $sth, $rv, $rc);


open FILEIN, $in_file or die "Couldn't open $in_file for read\n";
open FILEOUT, ">$out_file" or die "Couldn't open $out_file for write\n";

while (<FILEIN>){
    chomp;
    my $seq='';
#try to get contig first, if not found get singlets
    $stm = "select sequence_data from unigenes as u left join unigene_assembly_sequence as s on (u.unigene_element_id=s.unigene_assembly_id) where u.unigene_id='$_' and u.unigene_element_type_id='1'";
    $sth = $dbh->prepare($stm) 
	|| die "Can't prepare statement: $DBI::errstr";
    $rv = $sth->execute
	|| die "Can't execute statement: $DBI::errstr";
    $rc = $sth->bind_columns(\$seq);
    unless ($sth->fetch){
	my $stm2 = "select sequence_data from unigenes as u left join unigene_singleton as us on (u.unigene_element_id=us.unigene_singleton_id) left join trimmed_sequence as ts using (trimmed_seq_id) where u.unigene_id='$_' and u.unigene_element_type_id='2'";
	my $sth2 = $dbh->prepare($stm2) 
	    || die "Can't prepare statement: $DBI::errstr";
	my $rv2 = $sth2->execute
	    || die "Can't execute statement: $DBI::errstr";
	my $rc2 = $sth2->bind_columns(\$seq);
	unless ($sth2->fetch){
	    print "No sequence found for unigene $_\n";
	}
    }

    print FILEOUT ">$_\n$seq\n";
}

close FILEIN;
close FILEOUT;

db_link::disconnect_db($dbh);




