#!/usr/bin/perl -I /home/beth/pgn_processing/scripts/perllib
use strict;
$|=1;
use db_link;
my $old_dbh = db_link::connect_db('pgn-20050810', 'koni') or die "couldn't open database link\n";
my $pgn_dbh = db_link::connect_db('pgn', 'koni') or die "couldn't open database link\n";
my $fgn_dbh = db_link::connect_db('fgn', 'koni') or die "couldn't open database link\n";

my $seqs_list = '/data/shared/pgn_data_processing/misc_data/build_57_seq_ids';
open (my $fh, $seqs_list) or die "can't open $seqs_list: $!\n";
my @old_seqs = <$fh>;
close $fh;

my $seqlist = join ', ', @old_seqs;

my $new_seqs = $fgn_dbh->selectcol_arrayref("select seq_id from est_info where seq_id not in ($seqlist) and est_library_id=32");

my $to_delete = join ', ', @$new_seqs;

#$fgn_dbh->do("delete from est_info where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from adaptor_location where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from polya_trim where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from quality_trim where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from raw_sequence where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from raw_sequence_quality where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from sequence_group where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from tracefile_location where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from trimmed_sequence where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from trimmed_sequence_quality where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from vector_pieces where seq_id in ($to_delete)");
#$fgn_dbh->do("delete from other_identifier where local_db_id in ($to_delete)");
#$fgn_dbh->do("delete from quality_evaluation where seq_id in ($to_delete)");

##$fgn_dbh->do("delete from quality_evaluation using quality_evaluation, est_info where est_info.seq_id=quality_evaluation.seq_id and est_library_id=32 and quality_evaluation.seq_id in ($to_delete)");

#$fgn_dbh->do("delete from est_info where est_library_id=32 and seq_id not in ($seqlist)");
#$fgn_dbh->do("delete from sequence_group using sequence_group, est_info where sequence_group.seq_id not in ($seqlist) and est_library_id=32 and est_info.seq_id=sequence_group.seq_id");
#$fgn_dbh->do("delete from raw_sequence using raw_sequence, est_info where raw_sequence.seq_id not in ($seqlist) and est_library_id=32 and est_info.seq_id=raw_sequence.seq_id");
#$fgn_dbh->do("delete from trimmed_sequence using trimmed_sequence, est_info where trimmed_sequence.seq_id not in ($seqlist) and est_library_id=32 and est_info.seq_id=trimmed_sequence.seq_id");
#$fgn_dbh->do("delete from adaptor_location using adaptor_location, est_info where adaptor_location.seq_id not in ($seqlist) and est_library_id=32 and est_info.seq_id=adaptor_location.seq_id");
#$fgn_dbh->do("delete from polya_trim using polya_trim, est_info where est_info.seq_id=polya_trim.seq_id and est_library_id=32 and polya_trim.seq_id not in ($seqlist)");
#$fgn_dbh->do("delete from quality_trim using quality_trim, est_info where est_info.seq_id=quality_trim.seq_id and est_library_id=32 and quality_trim.seq_id not in ($seqlist)");
#$fgn_dbh->do("delete from raw_sequence_quality using raw_sequence_quality, est_info where est_library_id=32 and est_info.seq_id=raw_sequence_quality.seq_id and raw_sequence_quality.seq_id not in ($seqlist)");
#$fgn_dbh->do("delete from tracefile_location using tracefile_location, est_info where est_library_id=32 and tracefile_location.seq_id=est_info.seq_id and tracefile_location.seq_id not in ($seqlist)");
#$fgn_dbh->do("delete from trimmed_sequence_quality using trimmed_sequence_quality, est_info where est_library_id=32 and est_info.seq_id=trimmed_sequence_quality.seq_id and trimmed_sequence_quality.seq_id not in ($seqlist)");
#$fgn_dbh->do("delete from vector_pieces using vector_pieces, est_info where est_library_id=32 and vector_pieces.seq_id=est_info.seq_id and vector_pieces.seq_id not in ($seqlist)");
#$fgn_dbh->do("delete from quality_evaluation using quality_evaluation, est_info where est_info.seq_id=quality_evaluation.seq_id and est_library_id=32 and quality_evaluation.seq_id not in ($seqlist)");
#$fgn_dbh->do("delete from other_identifier using other_identifier, est_info where est_library_id=32 and est_info.seq_id=other_identifier.local_db_id and local_db_id not in ($seqlist)");

#my $trimmed_seq_list = $fgn_dbh->selectcol_arrayref("select trimmed_seq_id from trimmed_sequence where seq_id in($seqlist)");

#my $trimlist = join ', ', @$trimmed_seq_list;
#$fgn_dbh->do("delete from trimmed_sequence where trimmed_seq_id not in ($trimlist)");


my $get_real_qual_id = $fgn_dbh->prepare("select min(qual_eval_id) from quality_evaluation where seq_id=?");

my $delete_wrong_qual_ids = $fgn_dbh->prepare("delete from quality_evaluation WHERE seq_id=? AND qual_eval_id != ?");

foreach my $seq_id (@old_seqs){
    $get_real_qual_id->execute($seq_id);
    my ($qual_id) = $get_real_qual_id->fetchrow_array();

    if ($qual_id){
	print "deleting $qual_id\n";
    } else {
	#die "no qual_id for seq $seq_id\n";
	next;
    }

    $delete_wrong_qual_ids->execute($seq_id, $qual_id);

}





