#!/usr/bin/perl -w
use strict;

#person in charge of this script
#this should be the person currently in charge of scripts for the SGN project
my $script_maintainer='Teri Solow <tms45@cornell.edu>';

#local packages to use
use runtime;
use db_link;
use projects;


@ARGV or print "No input parameters, proceeding with default.\n";

my @arg_pairs = split (/\-/, (join ' ', @ARGV));

my %args=();

foreach (@arg_pairs){

    $_ or next;
    my ($flag, $val)=split /\s+/;
    $args{$flag}=$val;
}

my $in_file=$args{'i'};
my $project=$args{'p'};
my $build_id=$args{'b'};
my $out_file=$in_file . '.tab';

$in_file or die "Please specify a file with trace names and their dates\n";
$build_id or die "Please specify a build id\n";
$project or die "Please specify a project (cgn, fgn, pgn) using the -p flag\n";

my ($db, $usr) = @{projects::get_db_info($project)};
$db or die "No known database for project $project";


#main body of script
#####################


my %sequencing_order=();


# expected format for the file is:
# trace_name(all lowercase)**tab**date(mm-dd-yy)
open FILEIN, $in_file or die "Couldn't open $in_file for read\n";

while (<FILEIN>){
    chomp;
    my ($t_name, $t_date) = split /\t/;

    my ($lib, $plate, $well) = split /\-/, $t_name;
    my ($month, $day, $year) = split /\-/, $t_date;
    my $sort_id=$year.$month.$day;

#    $plate =~ /\dcs\d/ or next;

    $sequencing_order{$sort_id}{$lib}{$plate}{$well}=0;
}

#print "Read chronological file, getting unigene membership info.\n";

close FILEIN;


# try to open the database
my $dbh = db_link::connect_db($db, $usr) or die "couldn't open database link\n";
my ($stm, $sth, $rv, $rc);


#get the build membership info
my %unigene_members=();
my ($unigene_id, $clone_id);

#get the singlets
$stm = "select u.unigene_id, o.external_id from unigenes as u left join unigene_singleton as us on (u.unigene_element_id=us.unigene_singleton_id) left join other_identifier as o on (us.seq_id=o.local_db_id) where u.unigene_build_id='$build_id' and unigene_element_type_id='2' and o.external_id_type='1'";
$sth = $dbh->prepare($stm) 
    || die "Can't prepare statement: $DBI::errstr";
$rv = $sth->execute
    || die "Can't execute statement: $DBI::errstr";
$rc = $sth->bind_columns(\$unigene_id, \$clone_id);
while ($sth->fetch){
    $unigene_members{$clone_id}='singlet';   
 
}


#get the contigs
$stm = "select u.unigene_id, o.external_id from unigenes as u left join unigene_assembly_component as ua on (u.unigene_element_id=ua.unigene_assembly_id) left join other_identifier as o on (ua.seq_id=o.local_db_id) where u.unigene_build_id='$build_id' and unigene_element_type_id='1' and o.external_id_type='1'";
$sth = $dbh->prepare($stm) 
    || die "Can't prepare statement: $DBI::errstr";
$rv = $sth->execute
    || die "Can't execute statement: $DBI::errstr";
$rc = $sth->bind_columns(\$unigene_id, \$clone_id);
while ($sth->fetch){
    $unigene_members{$clone_id}=$unigene_id; 
   
}


db_link::disconnect_db($dbh);


#print "Got unigene info for " . int(keys %unigene_members) . " traces\n";



#    $sequencing_order{$sort_id}{$lib}{$plate}{$well}

my ($date_sort, $lib_sort, $plate_sort, $well_sort);
my %unigenes_found=();

foreach $date_sort (sort keys %sequencing_order){
    foreach $lib_sort (sort keys %{$sequencing_order{$date_sort}}){
	foreach $plate_sort (sort keys %{$sequencing_order{$date_sort}{$lib_sort}}){
	    foreach $well_sort (sort keys %{$sequencing_order{$date_sort}{$lib_sort}{$plate_sort}}){

		my $clone_id=$lib_sort . '-' . $plate_sort . '-' . $well_sort;

		my $unig_id=$unigene_members{$clone_id};

		unless ($unig_id){
		    $sequencing_order{$date_sort}{$lib_sort}{$plate_sort}{$well_sort}='failed';
		    next;
		}

		if ($unig_id eq 'singlet'){
		    $sequencing_order{$date_sort}{$lib_sort}{$plate_sort}{$well_sort}='singlet';
		    next;
		}
		elsif ($unigenes_found{$unig_id}){
		    $sequencing_order{$date_sort}{$lib_sort}{$plate_sort}{$well_sort}='repeat';
		    next;
		}
		else{
		    $sequencing_order{$date_sort}{$lib_sort}{$plate_sort}{$well_sort}='first';
		    $unigenes_found{$unig_id}='true';
		    next;
		}
	    }
	}
    }
}


my %new_by_plate=();
my %failed_by_plate=();

foreach $date_sort (sort keys %sequencing_order){

    foreach $lib_sort (sort keys %{$sequencing_order{$date_sort}}){

	foreach $plate_sort (sort keys %{$sequencing_order{$date_sort}{$lib_sort}}){

	    foreach $well_sort (sort keys %{$sequencing_order{$date_sort}{$lib_sort}{$plate_sort}}){
		
		my $plate= $lib_sort . '-' . $plate_sort;
		my $status=$sequencing_order{$date_sort}{$lib_sort}{$plate_sort}{$well_sort};
		if (($status eq 'first') or ($status eq 'singlet')){
		    $new_by_plate{$plate}++;
		}
		if ($status eq 'failed'){
		    $failed_by_plate{$plate}++;
		}
	    }
	}
    }
}


my $plate_nr=0;


foreach $date_sort (sort keys %sequencing_order){

    foreach $lib_sort (sort keys %{$sequencing_order{$date_sort}}){

	foreach $plate_sort (sort keys %{$sequencing_order{$date_sort}{$lib_sort}}){
	    $plate_nr++;
	    my $plate= $lib_sort . '-' . $plate_sort;
	    my $new_genes=$new_by_plate{$plate};
	    my $failed_wells=$failed_by_plate{$plate};

	    $new_genes ||= '0';
	    $failed_wells ||= '0';
	    my $good_wells=96-$failed_wells;

	    $good_wells or next;

	    my $pct_new= int($new_genes/$good_wells * 100);

	    print "$date_sort\t$plate\t$new_genes\t$good_wells\n";
	}
    }
}

