#!/usr/bin/perl -w
use strict;

#this is the blast clean results file
my $filein=shift;

my $fileout="/tmp/locus_features.txt";

open FILEIN, $filein;

my %locus_features=();

my $locus='';
my $feature_space='';
my @locus_features=();

while (<FILEIN>){
    if (/<h1>Locus:\s+([^\s]+)<\/h1>/){
	$locus and @{$locus_features{$locus}} = @locus_features;
	@locus_features=();
	$locus=$1;
#	print "$locus\n";
	next;
    }

    if (/<\!--\s*gene\s*feature\s*-->/){
	$feature_space='true';
	next;
    }


    if (/<\!--\s*genetic_marker_locus_vector\s*-->/){
	$feature_space='false';
	next;
    }

    if(($feature_space eq 'true') and (/<td class="sm">\s*([^<]*)\s*<\/td>/)){

#	print "$locus:\t$1\n";
	my $feature_detail=$1;
	$feature_detail eq '&nbsp;' and next;

	push @locus_features, $feature_detail;
	next;
    }


}

close FILEIN;

open FILEOUT, ">$fileout";

foreach (sort keys %locus_features){
    print FILEOUT "\n" . '='x80 . "\n";
    print FILEOUT "$_\n" .'-'x80 . "\n";

    for (my $i=0; $i < @{$locus_features{$_}}; $i+=4){
        print FILEOUT ${$locus_features{$_}}[($i)] . "\t" . ${$locus_features{$_}}[($i+1)] . "\n"; 
}
}

close FILEOUT;
