//August 2005, by Noah Whitman
//this program groups markers according to LOD thresholds
//then uses the RECORD algorithm to find a good marker ordering
//The code is provided under the GNU public license (http://www.gnu.org/copyleft/gpl.html#TOC1)
//usage: record filename 




//TO DO, BUGS:
//fix the efficiency of the satellite placement
//BUG:SMOOTH weights aren't normalized for markers near the edges of their groups
//ack, ordering is slow with SMOOTH, perhaps use fast reverses...
//Rmk: SMOOTH does not interfere in some pernicious way with RECORD because RECORD is run normally on the last SMOOTH iteration

#include <stdio.h>
#include <fstream.h>
#include <string>
#include <vector>
#include <list>
#include <cstdlib>
#include <algorithm>
#include <math>
#include <stdlib.h>


int poprand(list <int> &list_);
int marker2int(char m);
char int2marker(int a);
int scorepair(vector<vector <int> > &markers_, int loc1, int loc2);
void reverse(vector<int> &v, int a, int b);
int read_loc(string &locstr_, string &locname, float mv_thresh, float chisq_thresh_11, float chisq_thresh_121, float chisq_thresh_13, vector<int> &markers_);
int totalscore(vector<vector <int> > &markers_, vector <int> &order_);
int totalscore(vector<vector <int> > &markers_, list <int> &order_);
int read_mat(vector <int> &matrow, string &line);
bool operator<(const vector <int> &a, const vector <int> &b);
float LOD( vector<vector <int> > &markers_, int loc1, int loc2, float &r);
float LOD2(vector<vector <int> > &markers_, int loc1, int loc2, float &r);
void record(vector <vector <int> > &dmat, vector<int> &ordr);
void satellites(vector<vector <int> > &markers_, vector<vector <int> > &grouping_, float &lod_thresh, int chromosomes);
int initial_placement(vector <vector <int> > &dmat, vector<int> &ordr);
int remove_singletons(vector <vector <int> > &dmat, vector<int> &ordr, list<int> &singleton_markers);
void hammer_markers(vector <vector <int> > &dmat, vector<int> &ordr, list<int> &singleton_markers);
void window_improvements(vector <vector <int> > &dmat, vector<int> &ordr, int score);

static bool use_log=true;
static bool verbose_log = false; //0=not verbose, 1=verbose

fstream map_log=fstream();


//const int recordCycles=15;
//const float d_increment=.02;


//the default threshold percentage of difference between y and yhat
//for a datapoint to be considered a singleton
const float default_d=0.95;


//set delta to zero to turn off SMOOTH
//delta is the vertical search length for SMOOTH
//use 1 for nearest neighbor ( with 0.5 <default_d <1.0 )
int delta=0;


//the percentage of datapoints above which a marker
//is considered a singleton marker
const float singleton_threshold=.05;


const unsigned int singleton_group_threshold=100;
const float smooth_matrix[6][6]={{ 0, 0, 0, 0, 0, 0},
								 { 0, 0, 1, 1, 0, 1},
								 { 0, 1, 0, 0, 1, 1},
								 { 0, 1, 0, 0, 0, 0},
								 { 0, 0, 1, 0, 0, 0},
								 { 0, 1, 1, 0, 0, 0}};
int penalty_matrix[6][6]={{ 0, 0, 0, 0, 0, 0},
						  { 0, 0, 0, 0, 0, 0},
						  { 0, 0, 0, 0, 0, 0},
						  { 0, 0, 0, 0, 0, 0},
						  { 0, 0, 0, 0, 0, 0},
						  { 0, 0, 0, 0, 0, 0}};
const int weights_length=15;
float weights[weights_length]={0.998,0.981,0.934,0.857,0.758,0.647,0.537,0.433,0.342,0.265,0.202,0.151,0.112,0.082,0.059};


int main(int argc, char *argv[])
{
	if(argc <= 1) {
		cout<<"FastMapping only takes one argument, it's loc file!"<<endl;
		 exit(0);
	}
	cout<<"Running fast mapping...";
	

	//================= program-wide variables =========================================================	
	vector <string> locnames; // the loci names
	vector <string> bad_segregation_locnames; // the loci names
	vector <string> excessive_blank_locnames; // the loci names
	vector <string> comments; //stores lines beginning with ';'
	vector <vector <int> > markers; //a matrix of the markers
	vector <vector <int> > markers_transpose; //a transposed matrix of markers, used only when ordering individuals
	vector <vector <int> > grouping; //a matrix of grouped marker id's
	vector<int> old_plant_order;	//order of individuals
	vector<int> plant_order;	//order of individuals
	vector <vector <int> > bad_segregation_markers; //a matrix of bad segregation markers
	vector <vector <int> > excessive_blank_markers; //a matrix segregation markers
	vector< list<int> >singletons(grouping.size()); // a vector of lists of singletons for each group
		
	string outputfile,matrix_file; //filenames
	string filestr, poptstr; //first and second lines of .loc file
	int nloc, nind; //number of loci, number of markers per loci
	int newscore=0;  //running score of current ordering
	string oldscore=""; //old score possibly read in from loc file	

	//inputs read in from parameter file, set to default values here
	float LOD_threshold_1=16;
	float LOD_threshold_2=4;
	int chromosomes=12;
	float mv_threshold = .4;
	float chisq_thresh_11=13;
	float chisq_thresh_121=13;
	float chisq_thresh_13=13; //normal chi-square threshold w/ 2 degrees of freedom giving p-value .001 
	bool order_plants = false;

	char str[2000]; //buffer for reading in lines
	//================= read in arguments and data files =========================================================
	
	//read in file giving paramaters for rejecting bad markers
	string directory =  string(argv[1]).substr(0,string(argv[1]).find_last_of('\\',string(argv[1]).size()-1));
	if(directory == "") directory = string(argv[1]).substr( 0,string(argv[1]).find_last_of('/',string(argv[2]).size()-1) );
	if(directory.find_first_of("\\")==string::npos && directory.find_first_of("/")==string::npos) directory="";
	if(directory!="") directory.append("\\");

	if(use_log) map_log.open((directory + "fast_mapping_log.txt").c_str(),ios::out); //open the log file for writing
	
	//normalize the weights matrix
	if(delta>weights_length) delta=weights_length;
	float weightSum=0;
	for(int i=0;i<delta;i++){
		weightSum+=2.0*weights[i];
	}
	for(int i=0;i<delta;i++){
		weights[i]=weights[i]/weightSum;
		if(use_log) map_log<<"Weights["<<i<<"]="<<weights[i]<<endl;
	}

	//read in from mapping_parameters.txt
	string dum;
	fstream f_op((directory + "mapping_parameters.txt").c_str(),ios::in);
	if(!(f_op.is_open())) {
			if(use_log) map_log<<"bad parameters file!"<<directory + "mapping_parameters.txt"<<"\n"; exit(0);
	}
	if(f_op.is_open()) {
		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		matrix_file = dum;

		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		if(atof(dum.c_str()) > 0) chromosomes = atof(dum.c_str());

		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		if(atof(dum.c_str()) > 0) LOD_threshold_1 = atof(dum.c_str());

		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		if(atof(dum.c_str()) > 0) LOD_threshold_2 = atof(dum.c_str());

		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		if(atof(dum.c_str()) > 0) mv_threshold = atof(dum.c_str());

		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		if(atof(dum.c_str()) > 0) chisq_thresh_11=atof(dum.c_str());

		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		if(atof(dum.c_str()) > 0) chisq_thresh_121=atof(dum.c_str());

		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		if(atof(dum.c_str()) > 0) chisq_thresh_13=atof(dum.c_str());

		f_op.getline(str,1000);
		dum = string(str);
		dum = dum.substr(dum.find_first_of('=')+1,dum.size()-1);
		if( atof(dum.c_str()) == 1) order_plants = true;

		f_op.close();
		if(use_log) map_log<<"mv_theshold= "<<mv_threshold<<"\n";
		if(use_log) map_log<<"1:1 chisquare threshold= "<<chisq_thresh_11<<"\n";
		if(use_log) map_log<<"1:2:1 chisquare threshold= "<<chisq_thresh_121<<"\n";
		if(use_log) map_log<<"1:3 chisquare threshold= "<<chisq_thresh_13<<"\n\n";
	}

	//read in matrix file
	vector <vector <int> > smat(6);
	for(unsigned int i=0;i<6;i++){
		smat.at(i).resize(6);
		for(unsigned int j=0;j<6;j++){
			smat[i][j]=0;
		}	
	}

	while (matrix_file.at(0)==' '){
		matrix_file=matrix_file.substr(1,matrix_file.size()-1);
	}
	if(matrix_file.at(0)=='\"') matrix_file=matrix_file.substr(1,matrix_file.size()-2);

	matrix_file=directory + matrix_file;
	fstream file_opa(matrix_file.c_str(),ios::in);
	if(!(file_opa.is_open())){
		if(use_log) map_log<<"bad matrix file "<<matrix_file<<" !\n";  exit(0);
	}
	int row=0;
	while(!file_opa.eof() && row < 6 ) 
	{		
		file_opa.getline(str,1000);
		row+=read_mat(smat[row],string(str));
	}  
	file_opa.close();

	
	for(unsigned int i=0;i<6;i++){
		for(unsigned int j=0;j<6;j++) {
			penalty_matrix[i][j]=smat[i][j];
			if(use_log) map_log<<smat[i][j];
		}
		if(use_log) map_log<<endl;
	}	

	if(use_log) map_log<<endl<<endl;
	if(use_log) map_log<<"chromosomes = "<<chromosomes<<endl;
	if(use_log) map_log<<"LOD for core grouping = "<<LOD_threshold_1<<endl;
	if(use_log) map_log<<"minimum LOD for grouping = "<<LOD_threshold_2<<endl;
	if(use_log) map_log<<"missing value screening ratio = "<<mv_threshold<<endl;



	////////////////////////READ IN LOC FILE////////////////////////////////////////////////////////
	//read in number of loci and indicies
	fstream file_op(argv[1],ios::in);
	if(!(file_op.is_open())) {
			if(use_log) map_log<<"bad loc file!\n"; exit(0);
	}
	file_op.getline(str,1000);
	filestr=string(str);
	file_op.getline(str,1000);
	poptstr=string(str);
	file_op.getline(str,1000);
	string nlocstr(str);
	nloc = atoi((nlocstr.substr( nlocstr.find_last_of('=',nlocstr.size()-1) + 1)).c_str());
	file_op.getline(str,1000);	
	string nindstr(str);
	nind = unsigned( atoi( (nindstr.substr(nindstr.find_last_of('=',nindstr.size()-1) + 1)).c_str() ) );
	outputfile = string(argv[1]).substr(0,string(argv[1]).find_last_of('.',string(argv[1]).size()-1));
	if(use_log) map_log<<"\nRUNNING FASTMAPPING ON "<<outputfile<<endl;
	outputfile.append("_map.loc");


	//read in loc data
	vector <int> dummy;
	string locname;
	int marker_quality; //0=acceptable, 1==bad segregation, 2==too many blanks
	bool individuals_preordered=false; //0 if no inidividual order is present in loc file	
	while(!file_op.eof()) 
    {
		file_op.getline(str,1000);
		if(string(str).size() > 0 && str[0]!='\t' && str[0]!=';') {

			marker_quality=read_loc(string(str), locname, mv_threshold, chisq_thresh_11,chisq_thresh_121,chisq_thresh_13,dummy);

			if(dummy.size()!=unsigned(nind)) {
				if(use_log) map_log<<"Terminating Program at marker "<<locname<<", number of plants "<<dummy.size()<<" different than stated value "<<nind<<endl;
				exit (0);
			}			
			if(marker_quality==0){
				locnames.push_back(locname);
				markers.push_back(dummy);
			}
			else if(marker_quality==1) {
				bad_segregation_locnames.push_back(locname);
				bad_segregation_markers.push_back(dummy);
			}
			else{
				excessive_blank_locnames.push_back(locname);
				excessive_blank_markers.push_back(dummy);
			}
		}
		else{
			string line_str=string(str);
			if(line_str.substr(0,8)==";indOrdr") {
				old_plant_order.clear();
				if(use_log) map_log<<"loading old inidividual order...\n";
				line_str=line_str.substr(line_str.find_first_of('\t'));
				while(line_str[0]=='\t') line_str=line_str.substr(1);
				string temp;
				unsigned int pos=0;
				while(pos < line_str.size() && line_str.at(pos)!='\t' && line_str.at(pos)!=';'){
					temp=line_str.substr(pos,line_str.find_first_of('\t',pos)-pos);
					old_plant_order.push_back(atoi(temp.c_str() ));
					pos=line_str.find_first_of('\t',pos);
					if(pos==string::npos) break;
					else pos=pos+1;
				}
				if(old_plant_order.size()==(unsigned int)nind) {
					individuals_preordered=true;
					if(use_log) map_log<<"loaded individual order successfull\n";
				}
			}
			if(line_str.substr(0,10)==";newscore="){
				oldscore = ";oldscore="+line_str.substr(line_str.find_first_of('=')+1,line_str.size()-1);
			}
			line_str.append("\n");
			comments.push_back(line_str);
		}
		dummy.clear();
    }   
	file_op.close();

	if(markers.size() + bad_segregation_markers.size() + excessive_blank_markers.size() !=unsigned(nloc)){
		if(use_log) map_log<<"Warning, stated loci="<<nloc<<" loci counted="<<markers.size() + bad_segregation_markers.size() + excessive_blank_markers.size()<<endl;
		exit(0);
	}
	nloc=markers.size();


	//Create the initial order of plants
	plant_order.clear();
	plant_order.resize(nind);
	for(int i=0;i<nind;i++){
		plant_order[i]=i;
	}


	//=================================================== grouping =======================================================
	cout<<"\ngrouping markers...";
	if(use_log) map_log<<"\nGROUPING MARKERS...\n";
	
	int group,previous_group=-1;
	bool unplaced;
	float r, L;

	for(unsigned int loc=0;loc<unsigned(nloc);loc++){
			group=0;
			unplaced = true;
			for(unsigned int j=0;j<grouping.size();j++) {
				for(unsigned int k=0;k<grouping.at(j).size();k++) {
					L=LOD(markers,loc,grouping[j][k],r);
					if(L > LOD_threshold_1 && unplaced) {  //place in group
						grouping.at(j).insert(grouping.at(j).end(),loc);
						group=j;
						unplaced=false;
						if(group!=previous_group) {
							if(use_log) map_log<<endl<<"Placing markers in group ("<<group+1<<"):";
							previous_group=group;
						}
						if(use_log) map_log<<loc<<",";
						break;
					}
					if(L > LOD_threshold_1 && !unplaced) {  //merge groups
						grouping.at(group).insert(grouping.at(group).end(),grouping.at(j).begin(),grouping.at(j).end());
						grouping.at(j).erase(grouping.at(j).begin(),grouping.at(j).end());
						if(use_log) map_log<<endl<<"Loci "<<loc<<" matches groups ("<<group+1<<") and ("<<j+1<<") , combining!";
						previous_group=-1;
						break;
					}
					
				}
			}
			if(unplaced) {
				grouping.resize(grouping.size()+1);
				grouping.at(grouping.size()-1).insert(grouping.at(grouping.size()-1).end(),loc);
				if(use_log) map_log<<endl<<"Creating new group ("<<grouping.size()<<"), Placing markers:"<<loc<<",";
				previous_group=grouping.size();
			}
	}
	if(use_log) map_log<<endl;
	
	//place satellites with progressively smaller threshold
	float LOD_thresholdt_1 = ((LOD_threshold_1-LOD_threshold_2)*2.0/3.0) + LOD_threshold_2;
	float LOD_thresholdt_2 = ((LOD_threshold_1-LOD_threshold_2)/3.0) + LOD_threshold_2;
	satellites(markers, grouping, LOD_thresholdt_1, chromosomes);
	satellites(markers, grouping, LOD_thresholdt_2, chromosomes);
	satellites(markers, grouping, LOD_threshold_2, chromosomes);

	//remove empty groups which will all be at the end of the vector
	sort(grouping.begin(),grouping.end());
	for(vector <vector <int> >::iterator  itr = grouping.begin(); itr != grouping.end(); itr++ ){
		if((*itr).size() == 0){ 
			grouping.erase(itr,grouping.end());
			break;
		}
	}

	
	for(unsigned int j=0;j<grouping.size();j++) {
		if(use_log) map_log<<"group ("<<j+1<<"):";
		for(unsigned int k=0;k<grouping.at(j).size();k++) {
			if(use_log) map_log<<grouping[j][k]+1<<" ";
		}
		if(use_log) map_log<<endl<<endl;
	}

	//=================================================== implement RECORD algorithm =======================================================
	cout<<"\nordering markers...";
	if(use_log) map_log<<"\nORDERING MARKERS...\n\n";

	//set the random seed, always the same right now
	int seed = 6;
    srand(seed);

	//run RECORD on each group
	singletons.resize(grouping.size());
	for(unsigned int j=0;j<grouping.size();j++) {
		if(grouping.at(j).size() > 1) {
			
			if(use_log) map_log<<"-------------------------------------------------------------\n";
			if(use_log) map_log<<"Running RECORD on group "<<j+1<<endl;
			
			if(grouping.at(j).size() > singleton_group_threshold) {
				int markers_removed=1;
				while(markers_removed!=0){
					record(markers,grouping.at(j));
					markers_removed = remove_singletons(markers, grouping.at(j), singletons.at(j));
					if(markers_removed>0) if(use_log) map_log<<"Removed Singletons!, ReRunning RECORD\n"<<endl;
				}
				//now copy back the previous order and hammer in the removed markers 
				int score=totalscore(markers,grouping.at(j));
				if(use_log) map_log<<"\nMain loop done!!!!, score="<<score<<"\nHammering in removed markers..."<<endl;

				hammer_markers(markers, grouping.at(j), singletons.at(j));
				score=totalscore(markers,grouping.at(j));
				if(use_log) map_log<<"\nRECORD done, score="<<score<<endl;
			}
			else {
				record(markers,grouping.at(j));
				if(use_log) map_log<<"-------------------------------------------------------------\n";
			}
		}
	}
	if(use_log) map_log<<"-------------------------------------------------------------\n";
	
	//now flip marker matrix and perform RECORD on the transpose
	if(order_plants) {
		markers_transpose.resize(nind);	
		for(int i=0;i<nind;i++)
			markers_transpose.at(i).resize(nloc);
		for(int i=0;i<nind;i++){
			for(int j=0;j<nloc;j++)
				markers_transpose[i][j]=markers[j][i];
		}
		if(use_log) map_log<<"Running record on the order of individuals"<<endl;
		record( markers_transpose, plant_order);
	}
	
	for(unsigned int j=0;j<grouping.size();j++) {
		newscore +=totalscore( markers, grouping.at(j));
	}
	
	//=================================================== write loc file of new order========================================================
	
	fstream fwr(outputfile.c_str(),ios::out);
	fwr<<filestr<<endl<<poptstr<<endl<<nlocstr<<endl<<nindstr<<endl;
	for(unsigned int i=0;i<3;i++){
		if(i<comments.size()){
			fwr<<comments[i];	
		}
		else fwr<<'\n';
	}
	
	
	fwr<<";grouping threshold LOD="<<LOD_threshold_1<<endl;
	fwr<<oldscore<<endl;
	fwr<<";newscore="<<newscore<<"\n;indOrdr\t";
	
	for(int j=0; j<nind;j++){
		if(individuals_preordered) fwr<<old_plant_order[plant_order[j]]<<"\t";
		else fwr<<plant_order[j]<<"\t";
	}

	fwr<<";\tr\tLOD\tdist";
	fwr<<endl;
    
	for(unsigned i=0;i<grouping.size();i++){
		for(unsigned j=0;j<grouping.at(i).size();j++){
			fwr<<locnames[grouping[i][j]];
			for(int k=0; k<nind; k++){
				fwr<<'\t'<<int2marker(markers[grouping[i][j]][plant_order[k]]);
			}
			if(j<grouping.at(i).size() - 1){
				L = LOD2(markers,grouping[i][j],grouping[i][j+1],r);
				fwr<<"\t;\t"<<r<<'\t'<<L;
				if(r<0.5) fwr<<'\t'<<100.0/4.0*log10((1+2*r)/(1-2*r));		//Kosambi mapping function
			}
			fwr<<endl;
		}
		if(use_log) map_log<<"singletons in group "<<i+1<<":";

		for(list<int>::iterator smitr = singletons[i].begin(); smitr != singletons[i].end(); smitr++ ){
			if(use_log) map_log<<locnames[*smitr]<<", ";
		}
		if(use_log) map_log<<"\n";
		
	}
	
	if(bad_segregation_markers.size() > 0) fwr<<";bad segregation ratio markers\n";
	for(unsigned i=0; i<bad_segregation_markers.size();i++){
		fwr<<bad_segregation_locnames[i]<<'\t';
		for(unsigned j=0;j<bad_segregation_markers.at(i).size();j++)
			fwr<<int2marker(bad_segregation_markers[i][plant_order[j]])<<'\t';
		fwr<<endl;
	}
	if(excessive_blank_markers.size() > 0) fwr<<"\n;excessive missing value markers\n";
	for(unsigned i=0; i<excessive_blank_markers.size();i++){
		fwr<<excessive_blank_locnames[i]<<'\t';
		for(unsigned j=0;j<excessive_blank_markers.at(i).size();j++)
			fwr<<int2marker(excessive_blank_markers[i][plant_order[j]])<<'\t';
		fwr<<endl;
	}
		
	fwr.close();
	if(use_log) map_log<<"\nDONE\n";
	cout<<"\nDONE";
	if(use_log) map_log.close();
	return 0;
} 
//=============================================== END OF MAIN======================================================================================
//=============================================== END OF MAIN======================================================================================
//=============================================== END OF MAIN======================================================================================
//=============================================== END OF MAIN======================================================================================


//============================================== record ============================================================================================
//implements the RECORD algorithm to sort markers
//input: dmat is matrix expression data
//output: ordr is the new marker order
void record(vector <vector <int> > &dmat, vector<int> &ordr) {
	int ndata=ordr.size();
	if(ndata > 1) {
	
	list <int> inital_order;
	list <int> to_place;
	vector <int> oldordr(ndata);

	/////////randomize markers////////////////
	inital_order.clear();	
	to_place.clear();		
	if(use_log) map_log<<"Running RECORD!, groupsize="<<ordr.size()<<". Randomizing order...\n";
	for(unsigned int i=0;i<ordr.size();i++) inital_order.push_back(ordr.at(i));		
	for(unsigned int i=0;i<ordr.size();i++) to_place.push_back(poprand(inital_order));
	for(unsigned int i=0;i<ordr.size();i++) {
		ordr[i]=to_place.back();
		to_place.pop_back();
	}

	int score =totalscore(dmat,ordr);
	if(use_log) map_log<<"randomized score="<<score<<endl;
	

	int windowruns=1, oldscore=score+1;

	//The loop repeating the RECORD algorithm until no further inprovement
	while(score<oldscore && score >= 0){ 
		if(use_log) map_log<<"\nRECORD run "<<windowruns<<endl;
		windowruns++;

		//copy the old order so we can get back to it
		oldscore=score;
		for(int i=0;i<ndata;i++) oldordr[i]=ordr[i];

		//STEP ONE: do greedy first placement
		score = initial_placement( dmat, ordr);
		if(use_log) map_log<<"inital window placement score = "<<score<<endl;

		//STEP TWO: do 'window' improvement
		window_improvements(dmat, ordr, score);
		score = totalscore(dmat,ordr);
		if(use_log) map_log<<"score = "<<score<<endl;
	}
	
	for(int i=0;i<ndata;i++) ordr[i]=oldordr[i];
	score=totalscore(dmat,ordr);
	if(use_log) map_log<<"\nRECORD Done, using score "<<score<<endl<<endl;
	}
}

//change ordr by placing markers one by one in best position
//============================================== inital placement ================================================================
int initial_placement(vector <vector <int> > &dmat, vector<int> &ordr) {
	list<int>::iterator itr, best_pos;
	int loc_ind, dscore, best_dscore, ind_a,ind_b;
	int score;
	int ndata= ordr.size();
	list <int> placed;
	placed.clear();
	list <int> to_place;
	for(int i=0;i<ndata;i++) {
		to_place.push_back(ordr[i]);
	}


	//place first two loci
	placed.push_back(to_place.back());
	to_place.pop_back();
	placed.push_back(to_place.back());
	to_place.pop_back();

	score=scorepair(dmat,placed.front(),placed.back());
	
	while(to_place.size()>0){
		loc_ind=to_place.back();
		to_place.pop_back();
		itr=placed.begin();
		best_dscore=scorepair(dmat,loc_ind,*itr);
		best_pos=itr;
		while( itr != placed.end() ){
			ind_a=*itr;
			itr++;
			if(itr!= placed.end()) {
			ind_b=*itr;
			dscore=scorepair(dmat,ind_a,loc_ind) + scorepair(dmat,ind_b,loc_ind)
				- scorepair(dmat,ind_a,ind_b);
			if(dscore<best_dscore) {
				best_dscore=dscore;
				best_pos=itr;
			}
			}
		}
		itr--;
		if(scorepair(dmat,*itr,loc_ind)<best_dscore) {
			score+=scorepair(dmat,*itr,loc_ind);
			placed.push_back(loc_ind);
		}
		else {
			placed.insert(best_pos,loc_ind);
			score+=best_dscore;
		}
	}

	//copy list to order vector
	ordr.resize(ndata);
	list<int>::iterator list_iterator=placed.begin();
	int locx=0;
	while( list_iterator != placed.end() ) {
		ordr[locx]= *list_iterator;
		//if(use_log) map_log<<(*list_iterator)+1<<",";
		list_iterator++;
		locx++;
	}
	return score;

	}

//change ordr by running windows of increasing size along ordr and flipping if this improves the score
//============================================== window_improvements ====================================================================
void window_improvements(vector <vector <int> > &dmat, vector<int> &ordr, int score) {
	int ndata = ordr.size();
	int oldscore=score+1;
	int passes=0;
	int reverses, dscore;
	while(score<oldscore && score >= 0){
		oldscore=score;
		passes++;
		if(use_log) map_log<<"pass "<<passes;		
		reverses=0;
				
		//iterate over window size i
		for(unsigned int i=2;i<unsigned(ndata)-1;i++){ 
			//dscore is the difference between the current score and the reversed score

			//j=0 case, window is [0,i-1]
			dscore=scorepair(dmat,ordr[0],ordr[i]) - scorepair(dmat,ordr[i-1],ordr[i]);
			if(dscore < 0 ) {
				reverse(ordr,0,i-1);
				score+=dscore;
				reverses++;
			}
		
			//iterate over window start position j, window is [j,j+i-1]
			for(unsigned int j=1;j<unsigned(ndata)-i-1;j++){ 
				dscore = scorepair(dmat,ordr[j-1],ordr[j+i-1])+scorepair(dmat,ordr[j],ordr[j+i])
					- (scorepair(dmat,ordr[j-1],ordr[j])+scorepair(dmat,ordr[j+i-1],ordr[j+i]));
				if(dscore < 0) {
					reverse(ordr,j,j+i-1);
					score+=dscore;
					reverses++;
				}
			}
		
			//j=ndata-i-1 case, window is [ndata-i,ndata-1]
			dscore=scorepair(dmat,ordr[ndata-i-1],ordr[ndata-1]) - scorepair(dmat,ordr[ndata-i-1],ordr[ndata-i]);
			if(dscore < 0 ) {
				reverse(ordr,ndata-i,ndata-1);
				score+=dscore;
				reverses++;
			}
		}
		//score=totalscore(dmat,ordr);
		if(use_log) map_log<<", "<<reverses<<" reverses made, score is "<<score<<endl;
	}
}




//singleton markers are removed from ordr, and added to singleton_markers
//============================================== remove_singletons ====================================================================
int remove_singletons(vector <vector <int> > &dmat, vector<int> &ordr, list<int> &singleton_markers) {
	int nloc = dmat[ordr[0]].size();
	int singletons;
	//float d=1.0-d_increment*c;  //we're not doing a fixed number of cycles anymore
	float yhat;
	list<int> remove;
	remove.clear();
	if(verbose_log) if(use_log) map_log<<"Printing singletons, format: (order:order index:individual:yhat)\n";
	for(unsigned int i=0;i<ordr.size();i++){ //iterate on all rows in group
		singletons=0;
		for(int k=0;k<nloc;k++){  //iterate across all markers
			yhat=0;
			for(int m=-delta;m<=delta;m++){  //iterate on delta width
				if(m!=0 && i+m>0 && i+m < ordr.size())
						yhat+=weights[abs(m)-1]*smooth_matrix[dmat[ordr[i+m]][k]][dmat[ordr[i]][k]];				
			}
			if(yhat>default_d){  //use fixed d threshold instead of lowering one
				singletons++;
				if(verbose_log) if(use_log) map_log<<"("<<ordr[i]<<":"<<i<<":"<<k<<":"<<yhat<<"),";
			}			
		}
		if(((float)singletons)/((float)nloc) > singleton_threshold){
			singleton_markers.push_back(ordr[i]);	
			remove.push_back(i);	

			if(use_log) map_log<<"removing marker="<<ordr[i]<<" at "<<i<<" for "<<singletons<<" singletons with score "<<((float)singletons)/((float)nloc)<<endl;
			
			if(verbose_log) {
				//all this prints out the marker line and those surrounding it
				if(use_log) map_log<<"k % 10\t";
				for(int k=0;k<nloc;k++) if(use_log) map_log<<k % 10<<" ";
				if(use_log) map_log<<endl;
				if(use_log) map_log<<i-1<<"\t";
				if(i>0){				
					for(int k=0;k<nloc;k++){
						if(use_log) map_log<<int2marker(dmat[ordr[i-1]][k])<<" ";
					}
					if(use_log) map_log<<endl;
				}
				if(use_log) map_log<<i<<"\t";
				for(int k=0;k<nloc;k++){				
					if(use_log) map_log<<int2marker(dmat[ordr[i]][k])<<" ";
				}
				if(use_log) map_log<<endl;
				if(use_log) map_log<<i+1<<"\t";
				if(i<ordr.size()){
					for(int k=0;k<nloc;k++){
						if(use_log) map_log<<int2marker(dmat[ordr[i+1]][k])<<" ";
					}
					if(use_log) map_log<<endl;
				}
			}
		}
	}
	//remove.sort();
	list<int>::iterator itr = remove.begin();
	vector<int>::iterator oitr;
	int removed=0;  //compensates the index in ordr for the entries removed
	for( itr = remove.begin(); itr != remove.end(); itr++ ){
		oitr=ordr.begin();
		for(int j=0;j<*itr-removed;j++){ oitr++; }
		//if(use_log) map_log<<"erasing marker "<<*oitr<<" at "<<*itr-removed<<endl;
		ordr.erase(oitr);
		removed++;
	}
	return remove.size();

}


//markers in singleton markers are added back into ordr, only ordr is changed
//============================================== Hammer markers ====================================================================
void hammer_markers(vector <vector <int> > &dmat, vector<int> &ordr, list<int> &singleton_markers) {
	list<int>::iterator smitr;
	vector<int>::iterator itr;
	int bestpos;
	for( smitr = singleton_markers.begin(); smitr != singleton_markers.end(); smitr++ ){
		int bestpos=0;			
		int currentscore=scorepair(dmat,ordr[0],*smitr);
		int bestscore=currentscore;
		for(unsigned int i=1;i<ordr.size();i++){
			currentscore=scorepair(dmat,ordr[i-1],*smitr)+scorepair(dmat,ordr[i],*smitr);
			if(currentscore<bestscore){
				bestscore=currentscore;
				bestpos=i;
			}
		}
		currentscore=scorepair(dmat,ordr[ordr.size()-1],*smitr); //also try putting it at end
		if(currentscore<bestscore){	
			bestpos=ordr.size();
			ordr.push_back(*smitr);
		}
		else{
			itr=ordr.begin();
			for(int i=0;i<bestpos;i++) itr++;
			ordr.insert(itr,*smitr);
		}
		if(use_log) map_log<<"replaced singleton marker="<<*smitr<<" at "<<bestpos<<endl;
	}
	if(use_log) map_log<<"Replaced "<<singleton_markers.size()<<" markers in group of size "<<ordr.size()<<endl;
}


//============================================== satellites ================================================================
//try placing all but the largest groups again with half the threshold
//input:markers_ is marker data, grouping is current grouping, lod_threshold is previous lod_thresh, chromosomes is number of largest groups that is fixed
//output: grouping is updated, lod_threshold is updated
//THIS ALGORITHM IS INEFFICIENT, IT"S n^4 INSTEAD OF n^3. THIS IS BAD, IT SHOULD BE FIXED!
void satellites(vector<vector <int> > &markers_, vector<vector <int> > &grouping_, float &lod_thresh, int chromosomes){
	if(use_log) map_log<<"Placing satellites with threshold "<<lod_thresh<<endl;
	float L, best_lod, r;
	vector <vector <int> >::iterator  itr, itr1,itr2;
	

	sort(grouping_.begin(),grouping_.end());
	//remove empty groups
	
	itr=grouping_.begin();
	while(itr != grouping_.end()){
		if((*itr).size() == 0){ 
			grouping_.erase(itr,grouping_.end());
			break;
		}
		itr++;
	}
	//itr is the primary loop iterator, it skips the core groups
	//itr2 is the secondary loop iterator, itr1 is just a placeholder

	itr=grouping_.begin();
	itr+=chromosomes;
	while(itr!=grouping_.end()){
		best_lod = 0;
		itr2=grouping_.begin();
		while(itr2 != grouping_.end()) {
			if(itr!=itr2) {
					for(unsigned k=0;k<(*itr2).size();k++) {
						for(unsigned j=0;j<(*itr).size();j++) {
							L=LOD(markers_,(*itr).at(j),(*itr2).at(k),r);
							if(L > lod_thresh && L > best_lod) {
								best_lod=L;
								itr1=itr2;
							}
						}
					}
				
			}
			itr2++; //increment now in case we want to erase it's old position
		}
		if(best_lod>0){
			(*itr1).insert((*itr1).end(),(*itr).begin(),(*itr).end());
			(*itr).erase((*itr).begin(),(*itr).end());
		}
		itr++;
	}

}

//============================================== read_loc ====================================================================
//reads in a line of a .loc file and fills in &locname and &markers
int read_loc(string &locstr_, string &locname, float mv_thresh, float chisq_thresh_11, float chisq_thresh_121, float chisq_thresh_13, vector<int> &markers_) {
	float blanks=0;
	float As=0, Bs=0, Hs=0, Cs=0, Ds=0;
	unsigned int loc_name_end=locstr_.find_first_of('\t');
	while(loc_name_end<locstr_.size() && locstr_[loc_name_end+1]=='\t'){
		loc_name_end++;
	}
	locname=locstr_.substr(0,loc_name_end);
	unsigned int i = loc_name_end+1;
	while(i < locstr_.size() && locstr_.at(i)!='\t' && locstr_.at(i)!=';'){
		markers_.push_back(marker2int(locstr_.at(i)));
		if(marker2int(locstr_.at(i))==0) blanks++;
		if(marker2int(locstr_.at(i))==1) As++;
		if(marker2int(locstr_.at(i))==2) Bs++;
		if(marker2int(locstr_.at(i))==3) Cs++;
		if(marker2int(locstr_.at(i))==4) Ds++;
		if(marker2int(locstr_.at(i))==5) Hs++;
		i+=2;
	}
	//reject if chi_square - 2 > chi^2*
	float N=float(markers_.size());
	float chi_square;
	if(Cs==0 && Ds==0 && Hs==0) { //1:1 dominant recessive
		chi_square=(As-N*3.0/2.0)*(As-N/2.0)*2.0/N + (Bs-N/2.0)*(Bs-N/2.0)*2.0/N ;
		if(chi_square - 1 > chisq_thresh_11) {
			if(use_log) map_log<<"rejecting loci "<<locname<<", bad AB  segregation, chi square="<<chi_square<<endl;
			return 1;
		}
	}
	else if(Cs<N/10.0 && Ds<N/10.0){ //1:2:1 dominant, codominant, recessive
		chi_square=(As-N/4.0)*(As-N/4.0)*4.0/N + (Bs-N/4.0)*(Bs-N/4.0)*4.0/N +(Hs-N/2.0)*(Hs-N/2.0)*2.0/N;
		if(chi_square-2.0 > chisq_thresh_121) {
			if(use_log) map_log<<"rejecting loci "<<locname<<", bad 1:2:1  segregation, chi square="<<chi_square<<endl;
			return 1;
		}
	}
	else if(Cs>N*3.0/10.0){ //1:3 
		chi_square=(As-N/4.0)*(As-N/4.0)*4.0/N + (Cs-N*3.0/4.0)*(Cs-N*3.0/4.0)*4.0/(3.0*N);
		if(chi_square - 1 > chisq_thresh_13) {
			if(use_log) map_log<<"rejecting loci "<<locname<<", bad AC  segregation, chi square="<<chi_square<<endl;
			return 1;
		}
	}
	else if(Ds>N/10.0){ //1:3
		chi_square=(Bs-N/4.0)*(Bs-N/4.0)*4.0/N + (Ds-N*3.0/4.0)*(Ds-N*3.0/4.0)*4.0/(3.0*N);
		if(chi_square - 1 > chisq_thresh_11) {
			if(use_log) map_log<<"rejecting loci "<<locname<<", bad BD  segregation, chi square="<<chi_square<<endl;
			return 1;
		}
	}
	if( blanks/N > mv_thresh) {
		if(use_log) map_log<<"rejecting loci "<<locname<<", too many missing values, "<<blanks<<" missing values"<<endl;
		return 2; //reject if too many blanks
	}
	//if(use_log) map_log<<"loci "<<locname<<" okay"<<endl;
	return 0;
}

//============================================== read_mat ================================================================
//reads in a line of a matrix file and fills in a row of  returns 1 if line is an acceptable matrix row
int read_mat(vector <int> &matrow, string &line){
	if(line.at(0)==';') return 0;
	int col=0;
	unsigned strpos=0;
	string intstr;
	while((line.find_first_of('\t',strpos) != string::npos) && col < 6) {
		intstr=line.substr(strpos,line.find_first_of('\t',strpos)-strpos);
		matrow[col]=atoi(intstr.c_str());
		if(matrow[col]==-1){
			if(use_log) map_log<<"matrix entries should be integers! ";  exit(0);
		}
		col++;
		strpos=line.find_first_of('\t',strpos)+1;
	}
	if( (line.find_first_of('\t',strpos) == string::npos) && col==5 && (strpos < line.size()) ) {
		matrow[col]=atoi((line.substr(strpos, line.size()-strpos)).c_str());
		if(matrow[col]==-1){
			if(use_log) map_log<<"matrix entries should be integers! "; exit(0);
		}
	}
	return 1;
}


//============================================== marker2int ================================================================
int marker2int(char m){
	int r;
	switch (m){
	case 'A':
		r = 1;
		break;
	case 'B':
		r = 2;
		break;
	case 'C':
		r = 3;
		break;
	case 'D':
		r = 4;
		break;
	case 'H':
		r = 5;
		break;
	default:
		r = 0;
	}
	return r;
}

//============================================== int2marker ================================================================
char int2marker(int a){
	char c;
	switch (a){
	case 1:
		c = 'A';
		break;
	case 2:
		c = 'B';
		break;
	case 3:
		c = 'C';
		break;
	case 4:
		c = 'D';
		break;
	case 5:
		c = 'H';
		break;
	default:
		c = '-';
	}
	return c;
}

//============================================== scorepair ================================================================
//return score between loc1 and loc2 of &markers based on scoring matrix scmat
int scorepair(vector<vector <int> > &markers_, int loc1, int loc2) {
	if(loc1<0 || loc2<0 || unsigned(loc1)>=markers_.size() || unsigned(loc2)>=markers_.size())
		return 0;
	int score_=0;
	for(unsigned int i=0;i<(markers_.at(loc1)).size();i++){
		score_+=penalty_matrix[markers_[loc1][i]][markers_[loc2][i]];
	}
	return score_;
}

//============================================== reverse ================================================================
//reverses segment [ab] of vector, ineffecient
void reverse(vector <int> &v, int a, int b) {
	if(a<b && a>=0 && b>=0 && unsigned(a)<v.size() && unsigned(b)<v.size()) {
		vector <int> scratch(b-a+1);
		for(int i=0;i<b-a+1;i++) {
			scratch[i]=v[a+i];
		}
		for(int i=0;i<b-a+1;i++) {
			v[a+i]=scratch[b - a - i];
		}
	}
	else {
		if(use_log) map_log<<"Bad indicies ("<<a<<","<<b<<") in reverse function!\n";
	}
}

//============================================== poprand ================================================================
//pop a random entry of list and return it
int poprand(list <int> &list_) {
	int pos=int(list_.size() * rand()/(RAND_MAX+1.0));
	list<int>::iterator itr_=list_.begin();
	for(int i=0; i<pos;i++)
		itr_++;
	int val=*itr_;
	list_.erase(itr_);
	return val;
}

//============================================== totalscore ================================================================
//calculate total score of &markers in order &order based on matrix scmat (vector version)
int totalscore(vector<vector <int> > &markers_, vector <int> &order_) {
	int totalscore_ = 0;
	for(unsigned int i=0;i<order_.size()-1;i++){
		//if(use_log) map_log<<(order_[i])+1<<",";
		totalscore_+=scorepair(markers_,order_[i],order_[i+1]);
	}
	return totalscore_;
}

//============================================== totalscore ================================================================
//calculate total score of &markers in order &order based on matrix scmat (list version)
int totalscore(vector<vector <int> > &markers_, list <int> &order_) {
	int totalscore_ = 0,ind1,ind2;
	list<int>::iterator itr_=order_.begin();
	while(itr_!=order_.end()){
		ind1=*itr_;
		itr_++;
		ind2=*itr_;
		if(itr_!=order_.end())
			totalscore_+=scorepair(markers_,ind1,ind2);
			//if(use_log) map_log<<scorepair(markers_,ind1,ind2)<<",";
	}

	return totalscore_;
}

//============================================== LOD ================================================================
//return the LOD of two markers
float LOD(vector<vector <int> > &markers_, int loc1, int loc2, float &r) {
	int plants=markers_.at(loc1).size();
	vector <vector <float> > genotypes(6);
	for(unsigned int i=0;i<6;i++){
		genotypes.at(i).resize(6);
		for(unsigned int j=0;j<6;j++){
			genotypes[i][j]=0;
		}	
	}
	for(int i=0;i<plants;i++){
		genotypes[markers_[loc1][i]][markers_[loc2][i]]++;
	}
	
	//move 1/3 of C's into B's, 2/3 into H's, ect.
	for(unsigned int j=0;j<6;j++){
		genotypes[2][j]+=genotypes[3][j]/3;
		genotypes[5][j]+=genotypes[3][j]*2/3;
		genotypes[3][j]=0;
	}
	for(unsigned int i=0;i<6;i++){
		genotypes[i][2]+=genotypes[i][3]/3;
		genotypes[i][5]+=genotypes[i][3]*2/3;
		genotypes[i][3]=0;
	}
	for(unsigned int j=0;j<6;j++){
		genotypes[1][j]+=genotypes[4][j]/3;
		genotypes[5][j]+=genotypes[4][j]*2/3;
		genotypes[4][j]=0;
	}
	for(unsigned int i=0;i<6;i++){
		genotypes[i][1]+=genotypes[i][4]/3;
		genotypes[i][5]+=genotypes[i][4]*2/3;
		genotypes[i][4]=0;
	}

	//calculate R
	float X=genotypes[1][1]+genotypes[2][2]+genotypes[5][5];
	float Y=genotypes[1][2]+genotypes[2][1];
	float Z=genotypes[1][5]+genotypes[5][1]+genotypes[2][5]+genotypes[5][2];
	float W=genotypes[5][5];
	float N=X+Y+Z;
	r = (Y+Z/2)/N;
	if(r>0.5) r=0.5;
	float Rc=1+r*((2*r*r-3*r+1)/(2*r*r-r*r+1));
	float r_c= (Y+Z/2)/(N-W*Rc);
	
	float q = 1-r;
	float LOD;
	if(r==0){
		LOD=9999;
	}
	else {
		LOD=W*log10(q*q+r*r)+(X-W)*log10(q*q)+Z*log10(q*r)+Y*log10(r*r)-(N-W)*log10(1.0/4.0)-W*log10(1.0/2.0);
		//LOD=W*log10(2.0*(q*q+r*r))+(X-W)*log10(4.0*q*q)+Y*log10(4.0*r*r)+Z*log10(4.0*q*r);
	}
	if(verbose_log) {
	  if(use_log) map_log<<"loci "<<loc1 + 1<<" and "<<loc2 + 1<<" have LOD="<<LOD<<" r="<<r<<" r_c="<<r_c<<" Y="<<Y<<" Z="<<Z<<" N="<<N<<endl;
	}

	return LOD;
}



//============================================== LOD ================================================================
//return the LOD of two markers
float LOD2(vector<vector <int> > &markers_, int loc1, int loc2, float &r) {
	int plants=markers_.at(loc1).size();
	int cd_count=0;
	vector <vector <float> > genotypes(6);
	for(unsigned int i=0;i<6;i++){
		genotypes.at(i).resize(6);
		for(unsigned int j=0;j<6;j++){
			genotypes[i][j]=0;
		}	
	}
	for(int i=0;i<plants;i++){
		genotypes[markers_[loc1][i]][markers_[loc2][i]]++;
		if(markers_[loc1][i]==3 | markers_[loc1][i]==4 | markers_[loc2][i]==3 | markers_[loc2][i]==4) cd_count++;
	}
	

	//calculate R
	float X=genotypes[1][1]+genotypes[1][4]+genotypes[4][1]+genotypes[4][4]
		+genotypes[2][2]+genotypes[2][3]+genotypes[3][2]+genotypes[3][3]
		+genotypes[5][5]+genotypes[5][4]+genotypes[4][5]+genotypes[5][3]+genotypes[3][5]+genotypes[4][3]+genotypes[3][4];
	float Y=genotypes[1][2]+genotypes[2][1];
	float Z=genotypes[1][5]+genotypes[5][1]+genotypes[1][3]+genotypes[3][1]
		+genotypes[2][5]+genotypes[5][2]+genotypes[2][4]+genotypes[4][2];
	float W=genotypes[5][5]+genotypes[5][4]+genotypes[4][5]+genotypes[5][3]+genotypes[3][5]+genotypes[4][3]+genotypes[3][4];
	float N=X+Y+Z;
	r = (Y+Z/2)/N;
	if(r>0.5) r=0.5;
	float Rc=1+r*((2*r*r-3*r+1)/(2*r*r-r*r+1));
	float r_c= (Y+Z/2)/(N-W*Rc);
	
	float q = 1-r;
	float LOD;
	if(r<=0){
		LOD=9999;
	}
	else if(r>=0.5){
		LOD=0.00001;
	}
	else {
		LOD=W*log10(q*q+r*r)+(X-W)*log10(q*q)+Z*log10(q*r)+Y*log10(r*r)-(N-W)*log10(1.0/4.0)-W*log10(1.0/2.0);
	}
	if(verbose_log) {
	  if(use_log) map_log<<"loci "<<loc1 + 1<<" and "<<loc2 + 1<<" have LOD="<<LOD<<" r="<<r<<" r_c="<<r_c<<" X="<<X<<" Y="<<Y<<" Z="<<Z<<" W="<<W<<" N="<<N<<" cd_count="<<cd_count<<endl;
	}

	return LOD;
}

bool operator<(const vector <int> &a, const vector <int> &b) {
    return a.size() > b.size();
}
