#!/usr/bin/perl -w
use strict;

#person in charge of this script
#this should be the person currently in charge of scripts for the FGN project
my $script_maintainer='Dan Ilut <dci1@cornell.edu>';

use lib '/data/shared/pgn_data_processing/scripts/perllib';

#local packages to use
use runtime;
use db_link;
use projects;


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/(^|\W)-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}

my $lib=$args{'l'};
my $out_dir=$args{'o'};
my $run_date=$args{'d'};

$lib or die "No library specified, try '-l library_name (-l all to process all libraries)'\n";
$run_date or die "Please specify a date using the -d flag and YYYY-MM-DD format\n";

#set defaults for io, script filename, etc
$out_dir ||= "/tmp/";

#uniform trailing slashes
$out_dir=~/\/$/ or $out_dir.='/';

#create defaults
my $project = 'fgn';
my $seq_out_file=$out_dir."${lib}_EST.txt";
my $script_file=__FILE__;
my $standard_delay_months=3;
my $extended_delay_months=6;


my ($db, $usr) = @{projects::get_db_info($project)};
$db or die "No known database for project $project";



#main body of script
#####################
my $start_time=time;

#calculate the $submit_up_to_date
my ($std_run_year, $std_run_month, $std_run_day)= split (/-/, $run_date);
$std_run_month -= $standard_delay_months;
if ($std_run_month <= 0){
    $std_run_month+=12;
    $std_run_year--;
}
if ($std_run_day > 28){
    $std_run_day=28;
}
if (length $std_run_month == "1"){
	$std_run_month = "0".$std_run_month;
}
if (length $std_run_day == "1"){
	$std_run_day = "0".$std_run_day;
}

my $submit_up_to_date= $std_run_year . $std_run_month . $std_run_day . '000000';


#calculate the $delay_submit_up_to_date
my ($ext_run_year, $ext_run_month, $ext_run_day) = split (/-/, $run_date);
$ext_run_month -= $extended_delay_months;
if ($ext_run_month <= 0){
    $ext_run_month+=12;
    $ext_run_year--;
}
if ($ext_run_day > 28){
    $ext_run_day=28;
}
if (length $ext_run_month == "1"){
	$ext_run_month = "0".$ext_run_month;
}
if (length $ext_run_day == "1"){
	$ext_run_day = "0".$ext_run_day;
}

my $delay_submit_up_to_date= $ext_run_year . $ext_run_month . $ext_run_day . '000000';

# print "normal:\n\tyear: $std_run_year\n\tmonth: $std_run_month\n\tday:$std_run_day\n";
# print "extended:\n\tyear: $ext_run_year\n\tmonth: $ext_run_month\n\tday:$ext_run_day\n";
# print "normal: $submit_up_to_date\nextended: $delay_submit_up_to_date\n";

#create the lookup hash for library info.  This should really be in the database
my %genbank_est_info=();

$genbank_est_info{'nad01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'nad01'}{'CITATION'}='Generation of ESTs from early flower buds of Nuphar advena';
$genbank_est_info{'nad01'}{'LIBRARY'}='Nad01';
$genbank_est_info{'nad01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'nad01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'nad01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'nad01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'nad01'}{'P_END'}="5'";
$genbank_est_info{'nad01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'nad03'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'nad03'}{'CITATION'}='Generation of ESTs from early flower buds of Nuphar advena';
$genbank_est_info{'nad03'}{'LIBRARY'}='Nad03';
$genbank_est_info{'nad03'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'nad03'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'nad03'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'nad03'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'nad03'}{'P_END'}="5'";
$genbank_est_info{'nad03'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'atr01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'atr01'}{'CITATION'}='Generation of ESTs from early male flower buds of Amborella trichopoda';
$genbank_est_info{'atr01'}{'LIBRARY'}='Atr01';
$genbank_est_info{'atr01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'atr01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'atr01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'atr01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'atr01'}{'P_END'}="5'";
$genbank_est_info{'atr01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'atr02'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'atr02'}{'CITATION'}='Generation of ESTs from early female flower buds of Amborella trichopoda';
$genbank_est_info{'atr02'}{'LIBRARY'}='Atr02';
$genbank_est_info{'atr02'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'atr02'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'atr02'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'atr02'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'atr02'}{'P_END'}="5'";
$genbank_est_info{'atr02'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'eca01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'eca01'}{'CITATION'}='Generation of ESTs from early flower buds of Eschscholzia californica';
$genbank_est_info{'eca01'}{'LIBRARY'}='Eca01';
$genbank_est_info{'eca01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'eca01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'eca01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'eca01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'eca01'}{'P_END'}="5'";
$genbank_est_info{'eca01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'pam01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'pam01'}{'CITATION'}='Generation of ESTs from early flower buds of Persea americana';
$genbank_est_info{'pam01'}{'LIBRARY'}='Pam01';
$genbank_est_info{'pam01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'pam01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'pam01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'pam01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'pam01'}{'P_END'}="5'";
$genbank_est_info{'pam01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'wmi01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'wmi01'}{'CITATION'}='Generation of ESTs from young male strobilus of Welwitschia mirabilis';
$genbank_est_info{'wmi01'}{'LIBRARY'}='Wmi01';
$genbank_est_info{'wmi01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'wmi01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'wmi01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'wmi01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'wmi01'}{'P_END'}="5'";
$genbank_est_info{'wmi01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'wmi02'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'wmi02'}{'CITATION'}='Generation of ESTs from young female strobilus of Welwitschia mirabilis';
$genbank_est_info{'wmi02'}{'LIBRARY'}='Wmi02';
$genbank_est_info{'wmi02'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'wmi02'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'wmi02'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'wmi02'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'wmi02'}{'P_END'}="5'";
$genbank_est_info{'wmi02'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'ltu01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'ltu01'}{'CITATION'}='Generation of ESTs from early flower buds of Liriodendron tulipifera';
$genbank_est_info{'ltu01'}{'LIBRARY'}='Ltu01';
$genbank_est_info{'ltu01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'ltu01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'ltu01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'ltu01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'ltu01'}{'P_END'}="5'";
$genbank_est_info{'ltu01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'zfi01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'zfi01'}{'CITATION'}='Generation of ESTs from young male strobilus of Zamia fischeri';
$genbank_est_info{'zfi01'}{'LIBRARY'}='Zfi01';
$genbank_est_info{'zfi01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'zfi01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'zfi01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'zfi01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'zfi01'}{'P_END'}="5'";
$genbank_est_info{'zfi01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'csa01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'csa01'}{'CITATION'}='Generation of ESTs from early male flower buds of Cucumis sativus';
$genbank_est_info{'csa01'}{'LIBRARY'}='Csa01';
$genbank_est_info{'csa01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'csa01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'csa01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'csa01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'csa01'}{'P_END'}="5'";
$genbank_est_info{'csa01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'csa02'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'csa02'}{'CITATION'}='Generation of ESTs from early female flower buds of Cucumis sativus';
$genbank_est_info{'csa02'}{'LIBRARY'}='Csa02';
$genbank_est_info{'csa02'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'csa02'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'csa02'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'csa02'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'csa02'}{'P_END'}="5'";
$genbank_est_info{'csa02'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'aam01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'aam01'}{'CITATION'}='Generation of ESTs from early flower buds of Acorus americanus';
$genbank_est_info{'aam01'}{'LIBRARY'}='Aam01';
$genbank_est_info{'aam01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'aam01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'aam01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'aam01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'aam01'}{'P_END'}="5'";
$genbank_est_info{'aam01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

$genbank_est_info{'she01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'she01'}{'CITATION'}='Generation of ESTs from early flower buds of Saruma henryi';
$genbank_est_info{'she01'}{'LIBRARY'}='She01';
$genbank_est_info{'she01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'she01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'she01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'she01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'she01'}{'P_END'}="5'";
$genbank_est_info{'she01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';
$genbank_est_info{'eca03'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'eca03'}{'CITATION'}='Generation of ESTs from cell culture suspension of Eschscholzia californica';
$genbank_est_info{'eca03'}{'LIBRARY'}='eca03';
$genbank_est_info{'eca03'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'eca03'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'eca03'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'eca03'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'eca03'}{'P_END'}="5'";
$genbank_est_info{'eca03'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';
$genbank_est_info{'wmi02'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'wmi02'}{'CITATION'}='Generation of ESTs from early flower buds of Welwitschia mirabilis';
$genbank_est_info{'wmi02'}{'LIBRARY'}='wmi02';
$genbank_est_info{'wmi02'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'wmi02'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'wmi02'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'wmi02'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'wmi02'}{'P_END'}="5'";
$genbank_est_info{'wmi02'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';
$genbank_est_info{'aof01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'aof01'}{'CITATION'}='Generation of ESTs from early male inflorescences of Asparagus officinalis';
$genbank_est_info{'aof01'}{'LIBRARY'}='aof01';
$genbank_est_info{'aof01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'aof01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'aof01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'aof01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'aof01'}{'P_END'}="5'";
$genbank_est_info{'aof01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';
$genbank_est_info{'aof02'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'aof02'}{'CITATION'}='Generation of ESTs from early female inflorescences of Asparagus officinalis';
$genbank_est_info{'aof02'}{'LIBRARY'}='aof02';
$genbank_est_info{'aof02'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'aof02'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'aof02'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'aof02'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'aof02'}{'P_END'}="5'";
$genbank_est_info{'aof02'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';
$genbank_est_info{'ipa01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'ipa01'}{'CITATION'}='Generation of ESTs from early flower buds of Illicium parviflorum';
$genbank_est_info{'ipa01'}{'LIBRARY'}='ipa01';
$genbank_est_info{'ipa01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'ipa01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'ipa01'}{'SEQ_PRIMER'}="5' TriplEx2 amplifying primer";
$genbank_est_info{'ipa01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'ipa01'}{'P_END'}="5'";
$genbank_est_info{'ipa01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';
$genbank_est_info{'vco01'}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
$genbank_est_info{'vco01'}{'CITATION'}='Generation of ESTs from early flower buds with budscales of Vaccinium corymbosa';
$genbank_est_info{'vco01'}{'LIBRARY'}='vco01';
$genbank_est_info{'vco01'}{'SOURCE'}='Mueller Laboratory, Penn State University';
$genbank_est_info{'vco01'}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
$genbank_est_info{'vco01'}{'SEQ_PRIMER'}='M13F';
$genbank_est_info{'vco01'}{'DNA_TYPE'}='cDNA';
$genbank_est_info{'vco01'}{'P_END'}="5'";
$genbank_est_info{'vco01'}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';
# blank version - for easy copying
#$genbank_est_info{''}{'CONT_NAME'}='Claude dePamphilis or James Leebens-Mack';
#$genbank_est_info{''}{'CITATION'}='';
#$genbank_est_info{''}{'LIBRARY'}='';
#$genbank_est_info{''}{'SOURCE'}='Mueller Laboratory, Penn State University';
#$genbank_est_info{''}{'DBNAME'}='Plant Genome Network (http://pgn.cornell.edu)';
#$genbank_est_info{''}{'SEQ_PRIMER'}='';
#$genbank_est_info{''}{'DNA_TYPE'}='cDNA';
#$genbank_est_info{''}{'P_END'}="5'";
#$genbank_est_info{''}{'COMMENT'}='The sequence provided is trimmed of vector and low quality regions.  Full sequence and original trace file are available from the Plant Genome Network website (http://pgn.cornell.edu)';

# try to open the database
my $dbh = db_link::connect_db($db, $usr) or die "couldn't open database link\n";
my ($stm, $sth, $rv, $rc);

#pull all sequences before the given date that weren't submitted to genbank and weren't held
my ($seq_id, $seq, $clone);
my %sequences=();

$stm = "select sg.seq_id, ts.sequence_data, o.external_id from trimmed_sequence as ts, est_info as e, other_identifier as o, quality_evaluation as q, genbank_submission as gb, sequence_group as sg";

unless ($lib eq 'all'){
    $stm .=", est_library as l";
}

 $stm.=" where ts.seq_id=e.seq_id and ts.seq_id=sg.seq_id and ts.seq_id=gb.seq_id and ts.seq_id=o.local_db_id and ts.trimmed_seq_id=q.trimmed_seq_id and q.qual_criteria_id='6' and gb.submitted is null and gb.delay_for_cause is null and sg.creation_date < '$submit_up_to_date'";

unless ($lib eq 'all'){
    $stm.=" and e.est_library_id=l.est_library_id and l.library_name='$lib'";
}

$sth = $dbh->prepare($stm) 
    || die "Can't prepare statement: $DBI::errstr";
$rv = $sth->execute
    || die "Can't execute statement: $DBI::errstr";
$rc = $sth->bind_columns(\$seq_id, \$seq, \$clone);

while ($sth->fetch){
    push @{$sequences{$seq_id}}, ($seq, $clone); 
}



#pull all sequences before the given date that weren't submitted to genbank and were held
$stm = "select sg.seq_id, ts.sequence_data, o.external_id from trimmed_sequence as ts, est_info as e, other_identifier as o, quality_evaluation as q, genbank_submission as gb, sequence_group as sg";

unless ($lib eq 'all'){
    $stm .=", est_library as l";
}

 $stm.=" where ts.seq_id=e.seq_id and ts.seq_id=sg.seq_id and ts.seq_id=gb.seq_id and ts.seq_id=o.local_db_id and ts.trimmed_seq_id=q.trimmed_seq_id and q.qual_criteria_id='6' and gb.submitted is null and gb.delay_for_cause is not null and sg.creation_date < '$delay_submit_up_to_date'";

unless ($lib eq 'all'){
    $stm.=" and e.est_library_id=l.est_library_id and l.library_name='$lib'";
}

$sth = $dbh->prepare($stm) 
    || die "Can't prepare statement: $DBI::errstr";
$rv = $sth->execute
    || die "Can't execute statement: $DBI::errstr";
$rc = $sth->bind_columns(\$seq_id, \$seq, \$clone);

while ($sth->fetch){
    push @{$sequences{$seq_id}}, ($seq, $clone); 
}



#write out the files for submission
open (SEQ_OUT, ">$seq_out_file") or die "Couldn't open $seq_out_file: $!";

print "Found ".int(keys %sequences)." sequences.\n";

foreach (keys %sequences){

	my ($lib_name, $plate_name, $well_name) = split /-/, $sequences{$_}[1];

	$lib_name = lc $lib_name;
	
	$genbank_est_info{$lib_name}{'CITATION'} or die "no citation for lib $lib_name for $_ ($sequences{$_}[1])\n";

    my $row=$well_name;
    $row =~ tr/0-9//d;
    my $column=$well_name;
    $column =~ tr/a-z//d;


#print out the entry for the EST in proper genbank format
    print SEQ_OUT 'TYPE: '. 'EST'. "\n";
    print SEQ_OUT 'STATUS: ' . 'New'. "\n";
    print SEQ_OUT 'CONT_NAME: ' . $genbank_est_info{$lib_name}{'CONT_NAME'}. "\n";
    print SEQ_OUT 'CITATION: ' . "\n" . $genbank_est_info{$lib_name}{'CITATION'}. "\n";
    print SEQ_OUT 'LIBRARY: ' . $genbank_est_info{$lib_name}{'LIBRARY'}. "\n";
    print SEQ_OUT 'EST#: ' . $lib_name . '-' . $plate_name . '-' . $well_name. "\n";
    print SEQ_OUT 'CLONE: ' . $lib_name . '-' . $plate_name . '-' . $well_name. "\n";
    print SEQ_OUT 'SOURCE: ' . $genbank_est_info{$lib_name}{'SOURCE'} . "\n";
    print SEQ_OUT 'SOURCE_INHOST: ' . $lib_name . '-' . $plate_name . '-' . $well_name. "\n";
    print SEQ_OUT 'DBNAME: ' . $genbank_est_info{$lib_name}{'DBNAME'}. "\n";
    print SEQ_OUT 'DBXREF: ' . $lib_name . '-' . $plate_name . '-' . $well_name. "\n";
    print SEQ_OUT 'PLATE: ' . $lib_name . '-' . $plate_name. "\n";
    print SEQ_OUT 'ROW: ' . $row. "\n";
    print SEQ_OUT 'COLUMN: ' . $column. "\n";
    print SEQ_OUT 'SEQ_PRIMER: ' . $genbank_est_info{$lib_name}{'SEQ_PRIMER'}. "\n";
    print SEQ_OUT 'P_END: ' . $genbank_est_info{$lib_name}{'P_END'}. "\n";
    print SEQ_OUT 'DNA_TYPE: ' . $genbank_est_info{$lib_name}{'DNA_TYPE'}. "\n";
    print SEQ_OUT 'PUBLIC: ' . ''. "\n";
    print SEQ_OUT 'COMMENT: ' . "\n" . $genbank_est_info{$lib_name}{'COMMENT'}. "\n";
    print SEQ_OUT 'SEQUENCE: ' . "\n" . $sequences{$_}[0]. "\n";
    print SEQ_OUT "||\n\n";
    

    my @datetime=localtime;
    my $submit_date = $datetime[5] . '-' . $datetime[4] . '-' . $datetime[3];

#mark them in the db as submitted
#    $stm = "update genbank_submission set submitted='1', date_submitted='$submit_date' where seq_id='$_'";
    $stm = "update genbank_submission set submitted='1', date_submitted='$run_date' where seq_id='$_'";
    $sth = $dbh->prepare($stm) 
	|| die "Can't prepare statement: $DBI::errstr";
    $rv = $sth->execute
	|| die "Can't execute statement: $DBI::errstr";
    

}

close SEQ_OUT;




#close the database link
db_link::disconnect_db($dbh);

runtime::runtime_print($start_time, "genbank data pull");

