#!/usr/local/bin/perl

###############################################################################
# Program     : CompareIsolates 
# $Id: CompareIsolates 2022-06-01  
#
# Description : Prints summary of a given protein given selection
#               atlas build and protein name.
#
# SBEAMS is Copyright (C) 2000-2021 Institute for Systems Biology
# This program is governed by the terms of the GNU General Public License (GPL)
# version 2 as published by the Free Software Foundation.  It is provided
# WITHOUT ANY WARRANTY.  See the full description of GPL terms in the
# LICENSE file distributed with this software.
#
###############################################################################


###############################################################################
# Set up all needed modules and objects
###############################################################################
use strict;
use Getopt::Long;
use FindBin;
use Data::Dumper;
use List::MoreUtils qw(uniq);
$|++;

use lib "$FindBin::Bin/../../lib/perl";
use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username
             $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE
             $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME
             @MENU_OPTIONS);
use SBEAMS::Connection qw($q $log);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::Tables;
use SBEAMS::Connection::TabMenu;
$sbeams = new SBEAMS::Connection;
$sbeams->enable_sql_cache();
my $htmlmode;

use SBEAMS::BioLink;
use SBEAMS::BioLink::Tables;
my $biolink = SBEAMS::BioLink->new();
$biolink->setSBEAMS($sbeams);

use SBEAMS::PeptideAtlas;
use SBEAMS::PeptideAtlas::Settings;
use SBEAMS::PeptideAtlas::Tables;

$sbeamsMOD = new SBEAMS::PeptideAtlas;
$sbeamsMOD->setSBEAMS($sbeams);
$sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR);

use SBEAMS::PeptideAtlas::KeySearch;
my $keySearch = new SBEAMS::PeptideAtlas::KeySearch;
$keySearch->setSBEAMS($sbeams);

use SBEAMS::PeptideAtlas::BestPeptideSelector;
my $bestPeptideSelector = new SBEAMS::PeptideAtlas::BestPeptideSelector;
$bestPeptideSelector->setSBEAMS($sbeams);
$bestPeptideSelector->setAtlas($sbeamsMOD);

# Global sequence coverage array, will be populated post-graphic
my @coverage;
my $max_intensity = 1;
my %coverage=();

# Swiss Prot annotations
my @sp_rows;
my $sp_rationale;

my $current_page = { organism => '', atlas_build_id => '' };
my $parameters;
my $swiss; # fetch info from swissprot/nextprot if applicable


use constant MIN_OBS_LENGTH => 6;
use constant MAX_OBS_LENGTH => 40;

###############################################################################
# Set program name and usage banner for command like use
###############################################################################
$PROG_NAME = $FindBin::Script;
$USAGE = <<EOU;
Usage: $PROG_NAME [OPTIONS] key=value key=value ...
Options:
  --verbose n         Set verbosity level.  default is 0
  --quiet             Set flag to print nothing at all except errors
  --debug n           Set debug flag

 e.g.:  $PROG_NAME [OPTIONS] [keyword=value],...

EOU

#### Process options
unless (GetOptions(\%OPTIONS,"verbose:s","quiet","debug:s")) {
  print "$USAGE";
  exit;
}

$VERBOSE = $OPTIONS{"verbose"} || 0;
$QUIET = $OPTIONS{"quiet"} || 0;
$DEBUG = $OPTIONS{"debug"} || 0;

if ($DEBUG) {
  print "Options settings:\n";
  print "  VERBOSE = $VERBOSE\n";
  print "  QUIET = $QUIET\n";
  print "  DEBUG = $DEBUG\n";
}


###############################################################################
# Set Global Variables and execute main()
###############################################################################
main();
#$sbeams->profile_sql( list=>1);
exit(0);


###############################################################################
# Main Program:
#
# Call $sbeams->Authenticate() and exit if it fails or continue if it works.
###############################################################################
sub main {

  #### Do the SBEAMS authentication and exit if a username is not returned
  exit unless ($current_username = $sbeams->Authenticate(
      permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin',
      'PeptideAtlas_readonly', 'PeptideAtlas_exec'],
      #connect_read_only=>1,
      allow_anonymous_access=>1,
  ));


  #### Read in the default input parameters
  my %parameters;
  $parameters = \%parameters;
  my $n_params_found = $sbeams->parse_input_parameters(
      q=>$q,
      parameters_ref=>\%parameters);
  #$sbeams->printDebuggingInfo($q);

  #### Process generic "state" parameters before we start
  $sbeams->processStandardParameters(parameters_ref=>\%parameters);

  # Cache current output mode
  $htmlmode = ( $sbeams->output_mode() eq 'html' ) ? 1 : 0;

  #### Decide what action to take based on information so far
  if ($parameters{action} eq "???") {
      # Some action
  } else {
      handle_request(ref_parameters=>\%parameters);
  }


} # end main

###############################################################################
# Handle Request
###############################################################################
sub handle_request {

  my %args = @_;

  my $spacer = $sbeams->getGifSpacer( 900 );

  #### Process the arguments list
  my $ref_parameters = $args{'ref_parameters'}
      || die "ref_parameters not passed";

  my %parameters = %{$ref_parameters};

  #### Get the HTML to display the tabs
  my $tabMenu = $sbeamsMOD->getTabMenu(
    parameters_ref => \%parameters,
    program_name => $PROG_NAME,
  );
  my $contact_id = $sbeams->getCurrent_contact_id();
#  die $contact_id;

  my $protein_list_sql = '';
  if ( $parameters{atlas_build_id} && $parameters{protein_name} ) {
    $protein_list_sql = qq~
    SELECT DISTINCT AB.atlas_build_id
    FROM $TBAT_ATLAS_BUILD AB
    JOIN $TBAT_PROTEIN_LIST_BUILD PLB
      ON AB.atlas_build_id = PLB.atlas_build_id
    JOIN $TBAT_PROTEIN_LIST PL
      ON PL.protein_list_id = PLB.protein_list_id
    JOIN $TBAT_PROTEIN_LIST_PROTEIN PLP
      ON PL.protein_list_id = PLP.protein_list_id
    JOIN $TBAT_BIOSEQUENCE B ON ( B.biosequence_set_id = AB.biosequence_set_ID
                            AND B.biosequence_name = PLP.protein_name )
    WHERE PL.contributor_contact_id = $contact_id
      AND AB.atlas_build_id = $parameters{atlas_build_id}
      AND PLP.protein_name = '$parameters{protein_name}'
    ~;
  }

  #### Get the current atlas_build_id based on parameters or session
  my $atlas_build_id = $sbeamsMOD->getCurrentAtlasBuildID(
    allow_protein_list_login => 1,
    protein_list_sql => $protein_list_sql,
    parameters_ref => \%parameters,
    display_page_header_footer => 1,
    PROG_NAME => $PROG_NAME,
      );

  if (defined($atlas_build_id) && $atlas_build_id < 0) {
    return;
  }
  $parameters{atlas_build_id} = $atlas_build_id;
  $current_page->{atlas_build_id} = $atlas_build_id;


  $parameters->{motif} = $sbeamsMOD->getBuildMotif( build_id => $atlas_build_id ) || '';

  #### If a new protein_name was supplied, store it
  my $protein_name = $parameters{"protein_name"};
  my $cached_protein_name = $sbeams->getSessionAttribute(
      key => 'PeptideAtlas_protein_name',
    );
  if ($protein_name) {
    $sbeams->setSessionAttribute(
      key => 'PeptideAtlas_protein_name',
      value => "$protein_name",
    );
    if ($protein_name ne $cached_protein_name){
			$parameters{exp_cat_full} = '';
			$parameters{exp_cat} = '';
    }
  #### Else see if we had one stored
  } else {
    if ($cached_protein_name){
      $protein_name = $cached_protein_name;
      $parameters{'apply_action'} = 'GO';
    }
  }

  #### Get the search keyword
  if (! $parameters->{caching}){
    my $html_cache_name ='';
		if ($ENV{REQUEST_URI} =~ /CompareIsolates$/){
       $protein_name =~ s/^\s+|\s+$//;
       $html_cache_name = lc("apply_action=QUERY\&atlas_build_id=$atlas_build_id".
                    "\&protein_name=$protein_name");
		}else{
			if($ENV{"REQUEST_URI"}=~ /\?/) {
          my ($path , $query_string) = split(/\?/,  $ENV{"REQUEST_URI"});
          my %parameter_hash =();
					my @TempArray=split("&", $query_string);
					foreach my $item (@TempArray) {
						my ($Key, $Value)=split("=", $item);
            $Value =~ s/^\s+|\s+$//;
						$parameter_hash{$Key}=$Value;
					}
          @TempArray = ();
          foreach my $param(sort {$a cmp $b} keys %parameter_hash){
            push @TempArray, "$param=$parameter_hash{$param}";
          }
          $html_cache_name= lc(join('&', @TempArray));
			}
		}
		my $html_cache_loc  = '';
    if ($PHYSICAL_BASE_DIR !~ /dev\w+\/sbeams/){
      $html_cache_loc = "/net/dblocal/www/html/sbeamscommon/htmlcache/CompareIsolates";
    }else{
      $html_cache_loc = "$PHYSICAL_BASE_DIR/htmlcache/CompareIsolates";
    }
    #$log->debug("html_cache_name  $ENV{REQUEST_URI}  $html_cache_loc/$html_cache_name");
    if ( -e "$html_cache_loc/$html_cache_name"){
      if (open (IN,"<$html_cache_loc/$html_cache_name")){
				print "Content-type: text/html\n\n";
				print $_ while (<IN>);
				close IN;
				exit;
      }
    }
  }

  my $project_id = $sbeamsMOD->getProjectID( atlas_build_id => $parameters{atlas_build_id});
  $sbeamsMOD->display_page_header(project_id => $project_id, init_tooltip => 1);
  print $tabMenu->asHTML() if ($sbeams->output_mode() eq 'html');

  $parameters{use_nextprot} =  ( $parameters->{use_nextprot} ne '' && $parameters->{use_nextprot} == 0  ) ? 0 : 1; 
  $swiss = $sbeamsMOD->get_uniprot_annotation( show_all_snps => 1,
					           accession => $protein_name,
                     build_id  => $parameters{atlas_build_id}, 
					        use_nextprot => $parameters{use_nextprot} );

  #### Define some variables for a query and resultset
  my %resultset = ();
  my $resultset_ref = \%resultset;
  my (%url_cols,%hidden_cols,%max_widths,$show_sql);

  #### Read in the standard form values
  my $apply_action  = $parameters{'action'} || $parameters{'apply_action'};
  my $TABLE_NAME = $parameters{'QUERY_NAME'};

  #### Set some specific settings for this program
  my $CATEGORY="Get Protein";
  my $PROGRAM_FILE_NAME = $PROG_NAME;
  my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME";
  my $help_url = "$CGI_BASE_DIR/help_popup.cgi";

  #### If the apply action was to recall a previous resultset, do it
  my %rs_params = $sbeams->parseResultSetParams('q' => $q);
  my $n_params_found = 0;

  if ($apply_action eq "VIEWRESULTSET") {
    $sbeams->readResultSet(
      resultset_file=>$rs_params{set_name},
      resultset_ref=>$resultset_ref,
      query_parameters_ref=>\%parameters,
      resultset_params_ref=>\%rs_params,
    );
    $n_params_found = 99;
  }

  #### Get a list of accessible project_ids
  my @accessible_project_ids = $sbeams->getAccessibleProjects();
  my $accessible_project_ids = join( ",", @accessible_project_ids ) || '0';

  #### Get a hash of available atlas builds
  my $sql = qq~
      SELECT atlas_build_id,atlas_build_name
      FROM $TBAT_ATLAS_BUILD
      WHERE project_id IN ( $accessible_project_ids )
      AND record_status!='D'
      ORDER BY atlas_build_name
  ~;
  my %atlas_build_names = $sbeams->selectTwoColumnHash($sql);

  my %name2build = reverse( %atlas_build_names );
  my @ordered_atlas_build_ids;
  for my $name ( sort ( keys( %name2build ) ) ) {
    push @ordered_atlas_build_ids, $name2build{$name};
  }

  unless ( grep /^$atlas_build_id$/, @ordered_atlas_build_ids ) {
    my ($build_name) = $sbeams->selectOneColumn( "SELECT atlas_build_name FROM $TBAT_ATLAS_BUILD WHERE atlas_build_id = $atlas_build_id" );
    push  @ordered_atlas_build_ids, $atlas_build_id;
    $atlas_build_names{$atlas_build_id} = $build_name;
  }
#  die "ID is $atlas_build_id, and name is $atlas_build_names{$atlas_build_id}";

  #### If the output_mode is HTML, then display the form
  if ($sbeams->output_mode() eq 'html') {

      print qq~
        <script LANGUAGE="Javascript">
          function switchAtlasBuild() {

            var sel = document.getElementById("exp_cat");
            var options = [];
            for (var i=0; i<sel.length; i++) {
              options.push(sel[i].value);
            }
            var redundancy_constraint = document.getElementById("redundancy_constraint"); 
            document.SearchForm.exp_cat_full.value = options.toString();
            document.SearchForm.apply_action.value = "GO";
            document.SearchForm.redundancy_constraint.value = redundancy_constraint.value;
            document.SearchForm.submit();
          }
          document.title = "PeptideAtlas Protein: $protein_name";
        </script>
      ~;

      print "<P>";

      print "<nobr>";

      print $q->start_form(-method=>"POST",
                           -action=>"$base_url",
			   -name=>"SearchForm",
                          );
      print "PeptideAtlas Build: ";

      print $q->popup_menu(-name => "atlas_build_id",
                           -values => [ @ordered_atlas_build_ids ],
                           -labels => \%atlas_build_names,
                           -default => $atlas_build_id,
                           -onChange => 'switchAtlasBuild()',
			  );


      print $q->br; 
      print "Protein Name: ";
      print $q->textfield( -name => "protein_name", -size=>50, -default=>$protein_name );
      print $q->br;
      print $q->br; 
      print "\n<div style='display: flex;align-items: center;'>Experiment Categories: \n";
      print '<img src="/sbeams/images/greyqmark.gif" title="show peptides observed in the selected sample categories">';
      print "\n";
      print $q->scrolling_list(-id=>'exp_cat',
                               -name=>'exp_cat',
                               -values=>[],
                               -size=>4, 
                               -style =>'margin-left:5px;',
                               -multiple=>'true');
      print qq~
            <label style="margin-left:20px;">
            No Redundancy:<img src='/sbeams/images/greyqmark.gif' title='remove redundant sequence entries'>
      ~;
      if ($parameters{redundancy_constraint} =~ /on/){
        print "<input type=checkbox name='redundancy_constraint' checked='yes'></input></label>\n" ;
      }else{
        print "<input type=checkbox name='redundancy_constraint'></input></label>\n" ;
      }
      print "\n";
      print $q->hidden( "apply_action", '');
      print "\n";
      print $q->hidden( "exp_cat_full", '');
      print "\n";
      print $q->submit(-name => "action",
                       -value => 'QUERY',
                       -onClick => 'switchAtlasBuild()',
											 -style => 'margin-left:50px;',
                       -label => 'GO');
      print "\n</div>\n";
      print $q->end_form;
      print "\n";
      print "</nobr>";
      print "</P>";
      $current_page->{organism} = $sbeamsMOD->getCurrentAtlasOrganism( parameters_ref => { atlas_build_id => $atlas_build_id } );
      my $gaggle = $sbeams->getGaggleMicroformat(
                                          organism => $current_page->{organism},
                                              data => [$protein_name],
                                            object => 'namelist',
                                              name => 'Protein name',
                                              type => 'direct' );
      print "$gaggle\n";
  }

  #########################################################################
  #### Process all the constraints

  #### If atlas_build_id was not selected, stop here
  unless ($atlas_build_id) {
    if ($sbeams->output_mode() eq 'html') {
      print "Please select an atlas build, type in a protein name ".
        "(e.g. ENSP00000290230 for human Ensembl protein), and ".
				"click GO";
    } else {
      $sbeams->reportException(
          state => 'ERROR',
          type => 'INSUFFICIENT CONSTRAINTS',
          message => 'You must provide an atlas_build_id',
      );
    }
    return;
  }


  #### If biosequence_name was not selected, stop here
  unless ($protein_name) {
    if ($sbeams->output_mode() eq 'html') {
      print "Please type in a protein name ".
        "(e.g. ENSP00000290230 for human Ensembl protein), and ".
				"click GO";
    } else {
      $sbeams->reportException(
          state => 'ERROR',
          type => 'INSUFFICIENT CONSTRAINTS',
          message => 'You must provide a protein_name',
      );
    }
    return;
  }


  my $sql = qq~
    SELECT IS_TRYPSIN_BUILD
    FROM $TBAT_ATLAS_BUILD
    WHERE ATLAS_BUILD_ID = $atlas_build_id
  ~ ;

  my @row = $sbeams->selectOneColumn($sql);
  my $is_trypsin_build = 'Y';
  if(@row){
    if ($row[0] eq 'N'){
      $is_trypsin_build = 'N';
    }
  }
  if($is_trypsin_build eq 'N'){
    print qq~
          <div style="border:5px double; color:white; background-color: CC0000; width: 600px;"  >
          WARNING: This build contains many samples that are not digested with trypsin
          and therefore the usual trypsin-centric display elements are turned off.
          </div>
    ~;
  }

  #### Build ATLAS_BUILD constraint
  my $atlas_build_clause = $sbeams->parseConstraint2SQL(
      constraint_column=>"AB.atlas_build_id",
      constraint_type=>"int_list",
      constraint_name=>"Atlas Build",
      constraint_value=>$atlas_build_id );
  return if ($atlas_build_clause eq '-1');


  #### Build PROTEIN_NAME constraint
  my $biosequence_name_clause = $sbeams->parseConstraint2SQL(
      constraint_column=>"BS.biosequence_name",
      constraint_type=>"plain_text",
      constraint_name=>"Protein Name",
      constraint_value=>$protein_name );
  return if ($biosequence_name_clause eq '-1');


  #### Define the SQL statement to find the biosequence
  $sql = qq~
       SELECT BS.biosequence_id, BS.biosequence_name
         FROM $TBAT_ATLAS_BUILD AB
        INNER JOIN $TBAT_BIOSEQUENCE_SET BSS
              ON ( AB.biosequence_set_id = BSS.biosequence_set_id )
        INNER JOIN $TBAT_BIOSEQUENCE BS
              ON ( BSS.biosequence_set_id = BS.biosequence_set_id )
       WHERE 1 = 1
          $atlas_build_clause
          $biosequence_name_clause
  ~;
  my %biosequences = $sbeams->selectTwoColumnHash($sql);
  my $n_biosequences = scalar(keys(%biosequences));

  #### If the protein was not found, report the problem
  if ($n_biosequences == 0) {
    if ($sbeams->output_mode() eq 'html') {
      print qq~
        This protein identifier was not found in
        atlas build '$atlas_build_names{$atlas_build_id}'.
				<br>
				Please check selections and try again.
				<p>
				<form method="post" action="Search">
				Or, search for the identifer across all major PeptideAtlas builds:
				&nbsp;&nbsp;<input type="text" name="search_key" value="$protein_name" size="20" maxlength="60"/>
				<input type="submit" name="apply_action" value="GO" /><BR>
			~;
			## looking for similar ids
			my $sql = qq~
				SELECT SKE.RESOURCE_NAME, SKE.RESOURCE_TYPE, BS.BIOSEQUENCE_ID
				FROM $TBAT_SEARCH_KEY_ENTITY SKE
				JOIN $TBAT_BIOSEQUENCE BS ON (BS.BIOSEQUENCE_NAME = SKE.SEARCH_KEY_NAME)
				JOIN $TBAT_ATLAS_BUILD AB ON (AB.BIOSEQUENCE_SET_ID = BS.BIOSEQUENCE_SET_ID
				AND AB.ATLAS_BUILD_ID = $atlas_build_id) 
				WHERE SKE.RESOURCE_NAME in (
					(
						 SELECT SKE2.PROTEIN_ALIAS_MASTER
						 FROM $TBAT_SEARCH_KEY_ENTITY SKE2
						 WHERE SKE2.RESOURCE_NAME like '$protein_name%'
					 )
					UNION
					( SELECT SKE4.RESOURCE_NAME
						FROM $TBAT_SEARCH_KEY_ENTITY SKE4
						WHERE  SKE4.PROTEIN_ALIAS_MASTER like '$protein_name%' 
					)
					UNION (
						 SELECT SKE4.RESOURCE_NAME
						 FROM $TBAT_SEARCH_KEY_ENTITY SKE4
						 WHERE SKE4.search_key_name like '$protein_name%'
					)
				)
			~;
			my @rows = $sbeams->selectSeveralColumns ($sql);
			if (@rows){
				my %related_names = ();
				my %ids;
				foreach my $row(@rows){
					my ($name, $type, $biosequence_id) = @$row;
					$related_names{$name} = $type;
					$ids{$biosequence_id} =1;
				}
				my $biosequence_ids = join (",", keys %ids);
				$sql = qq~
					 SELECT DISTINCT B1.BIOSEQUENCE_NAME, B2.BIOSEQUENCE_NAME 
					 FROM $TBAT_BIOSEQUENCE_RELATIONSHIP BR 
					 LEFT JOIN $TBAT_BIOSEQUENCE B1 ON (B1.BIOSEQUENCE_ID = BR.RELATED_BIOSEQUENCE_ID) 
					 LEFT JOIN $TBAT_BIOSEQUENCE B2 ON (B2.BIOSEQUENCE_ID = BR.REFERENCE_BIOSEQUENCE_ID)
					 WHERE BR.atlas_build_id = $atlas_build_id
					 AND (BR.related_biosequence_id in ($biosequence_ids) 
						OR BR.reference_biosequence_id in ($biosequence_ids))
				 AND RELATIONSHIP_TYPE_ID = 2
				~;
				@rows = $sbeams->selectSeveralColumns ($sql);
				my %biosequence_identifcal = ();
				foreach my $row(@rows){
					my ($name1, $name2) = @$row;
					$biosequence_identifcal{$name1}{$name2} =1;
					foreach my $p (keys %{$biosequence_identifcal{$name2}}){
						$biosequence_identifcal{$name1}{$p} =1;
					}
					$biosequence_identifcal{$name2}{$name1} = 1;
					foreach my $p (keys %{$biosequence_identifcal{$name1}}){
						$biosequence_identifcal{$name2}{$p} =1;
					}

				}
				print  "<br><h4>Here are some related protein names in the same build. Proteins in the same row have identical sequences.<br><br>" if (keys %related_names); 
				foreach my $name(sort {$a cmp $b} keys %related_names){
					print "<a href='" . "$CGI_BASE_DIR/PeptideAtlas/GetProtein?atlas_build_id=$atlas_build_id\&protein_name=$name\&action=QUERY" .
							"'>$name</a>";
					if (defined $biosequence_identifcal{$name}){
						foreach my $name3(sort {$a cmp $b} keys %{$biosequence_identifcal{$name}}){
							next if ($name3 eq $name);
							print ", <a href='" .
						"$CGI_BASE_DIR/PeptideAtlas/GetProtein?atlas_build_id=$atlas_build_id\&protein_name=$name3\&action=QUERY" .
						"'>$name3</a>";
						}
					}
					print "<br><br>";
				}
        print "</h4>";

      }else{
				$sbeams->reportException(
					state => 'ERROR',
					type => 'RECORD NOT FOUND',
					message => "The protein entered '$protein_name' was not found ".
					"in the atlas build '$atlas_build_names{$atlas_build_id}'.",
						);
      }
    } else {
      $sbeams->reportException(
          state => 'ERROR',
          type => 'RECORD NOT FOUND',
          message => "The protein entered '$protein_name' was not found ".
            "in the atlas build '$atlas_build_names{$atlas_build_id}'.",
      );
    }
    return;
  }


  #### If more than one protein was found, list them
  if ($n_biosequences > 1) {
    if ($sbeams->output_mode() eq 'html') {
      print qq~
        Several ($n_biosequences) entries were returned from your request
        for '$protein_name' in the
        atlas build '$atlas_build_names{$atlas_build_id}'. Please select
        the appropriate one:<BR><BR>
      ~;
      while (my ($key,$value) = each (%biosequences)) {
				print "<b>$value</b><BR><BR>";
      }

    } else {
      $sbeams->reportException(
          state => 'ERROR',
          type => 'TOO MANY RECORDS',
          message => qq~More than one ($n_biosequences) was returned from
             your request
             for '$protein_name' in the
             atlas build '$atlas_build_names{$atlas_build_id}'.~,
      );
    }
    return;
  }

  #### Extract the one biosequence_id
  my ($biosequence_id) = keys(%biosequences);

  ## get proteins in the same cluster of this protein
  #protein_name
  my @proteins_in_cluster;
  my $proteins_in_cluster_str;
  my %bioseq_strain;
  my @bioseq_ids =();
  my %pfam_domain =();
  my $tree_plot =  '';
  my $newick_data = '';
  my $cluster_id;
  my $build_path='';

  if ( ($parameters->{restore} || $parameters->{bioseq_id}) && $apply_action !~ /QUERY/ ) {
    my $list_names = '';
    if ($parameters->{bioseq_id}) {
      if ($parameters->{restore}){
        $list_names = $sbeamsMOD->getBioseqNamesFromBioseqenceIDs (biosequence_ids => $parameters->{orig_bioseq_id});
      }else{
        $list_names = $sbeamsMOD->getBioseqNamesFromBioseqenceIDs (biosequence_ids => $parameters->{bioseq_id});
      }
    }
    
    $list_names =~ s/\s+//g;
    @proteins_in_cluster = uniq split(",", $list_names);
  }
	## get cluster info
	$build_path = $sbeamsMOD->getAtlasBuildDirectory( atlas_build_id => $atlas_build_id ); 
	my $cluster_id_file = "$build_path/Orthogroups.txt";
	open (IN, "<$cluster_id_file") or $sbeams->reportException(
					state => 'ERROR',
					type => 'FILE not found',
					message => "can't find file $build_path/Orthogroups.txt"
			);
	 while (my $line = <IN>){
		 if ($line =~ /$protein_name/){
				chomp $line;
				$line =~ /^(\S+):\s+(.*)$/;
				$cluster_id = $1;
				foreach my $p(split(/\s+/, $2)){
					push @proteins_in_cluster, $p if ($apply_action =~ /QUERY/);
				}
		 }
	 }
	 if (@proteins_in_cluster < 2){
			$sbeams->reportException(
					state => 'ERROR',
					type => 'Too Few Sequence',
					message => "Needs more than one sequence in cluster.", 
			);
			return;
	 }

	 my $gene_tree_file = "$build_path/OrthoFinder/Resolved_Gene_Trees//$cluster_id"."_tree.txt";
	 if (open (IN,"<$gene_tree_file")){
		$newick_data = <IN>;
		 close IN;
		 $tree_plot = $sbeamsMOD->draw_tree($newick_data); 
	 }

	 my $hmmscan_out_file ="$build_path/pfam.out";
	 ## <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> 
	 #<hmm length> <bit score> <E-value> <significance> <clan>
	 #WP_014540656.1     26    466     24    466 PF01425.21  Amidase           Family     3   451   451    478.2  2.6e-143   1 No_clan
	 if (open (IN,"<$hmmscan_out_file")){
		 my %proteins_in_cluster = map {$_ => 1} @proteins_in_cluster;
		 my @colors = qw(#2dcf00 #ff5353 #5b5bff #ebd61d #ba21e0 #ff9c42 #ff7dff #b9264f #baba21 #c48484 #1f88a7 #cafeb8 #4a9586 #ceb86c #0e180f #2dcfff);
		 my $color_counter = 0;
		 my %used_color = ();
		 #my %pfam_result = ();
		 while (my $line = <IN>){
			 next if ($line =~ /^#/);
			 chomp $line;
			 my ($name,$aliS,$aliE,$eS,$eE,$hmm_acc,$hmm_name,$type,$hS,$hE,$len,$bit,$e,$sig,$clan) = split(/\s+/, $line);
			 next if (not defined $proteins_in_cluster{$name});
			 my %data =();
			 $data{type} = "pfama";
			 $data{text} = $hmm_name;
			 if (defined $used_color{$hmm_name}){
				 $data{colour} = $used_color{$hmm_name};
			 }else{
				 $data{colour} = $colors[$color_counter];
				 $used_color{$hmm_name} = $colors[$color_counter];
				 $color_counter++;
				 $color_counter = 0 if ($color_counter > $#colors);
			 }
			 $data{display} = 'true';
			 $data{start} = $eS;
			 $data{end} = $eE;
			 if ($hS > 1 ){
				 $data{startStyle} = 'jagged';
			 }else{
				 $data{startStyle} = 'curved';
			 } 
			 if ($hE < $len){
				 $data{endStyle} = 'jagged';
			 }else{
				 $data{endStyle} = 'curved';
			 }
			 if ($type =~/^(repeat|modif)/i){
				 $data{endStyle} = 'straight' if ($data{endStyle} ne 'jagged');
				 $data{startStyle} = 'straight' if ($data{startStyle} ne 'jagged');
			 } 
			 $data{metadata}{'scoreName'} = 'e-value';
			 $data{metadata}{'score'}= $e;
			 $data{metadata}{'bitscore'} =$bit; 
			 $data{metadata}{'accession'} = $hmm_acc;
			 $data{metadata}{'type'} = $type;
			 $data{metadata}{'database'} = 'pfam';
			 $data{metadata}{'aliEnd'} = $aliE;
			 $data{metadata}{'aliStart'} = $aliS;
			 $data{metadata}{'start'} = $eS;
			 $data{metadata}{'end'} = $eE;
			 $data{metadata}{'hmmstart'} = $hS;
			 $data{metadata}{'hmmend'} = $hE;
			 $data{metadata}{'hmmlen'} = $len; 
			 $data{metadata}{'description'} = $hmm_name;  
			 push @{$pfam_domain{$name}{regions}}, \%data;

		 }
	 } 


	 $proteins_in_cluster_str = join("','", @proteins_in_cluster);
	 $sql = qq~
			SELECT biosequence_ID,biosequence_name, BS.biosequence_desc, biosequence_seq
			FROM $TBAT_ATLAS_BUILD AB
			JOIN $TBAT_BIOSEQUENCE_SET BSS ON AB.biosequence_set_id = BSS.biosequence_set_id
			JOIN $TBAT_BIOSEQUENCE BS ON BSS.biosequence_set_id = BS.biosequence_set_id
			WHERE AB.atlas_build_id IN ( $atlas_build_id )
			AND BS.biosequence_name in ('$proteins_in_cluster_str')
		~; 
	 my @results = $sbeams->selectSeveralColumns($sql);
	 my %tmp =();
   my %unique_sequences =();
   if ($parameters{redundancy_constraint} =~ /on/){
		 ## get strain info
		 foreach my $row(@results){
			 my $sequence = $row->[3];
			 $sequence =~ s/\*+$//;
			 if ($row->[2] =~ /strain\/isolate=(\S+)/){
					$bioseq_strain{$row->[0]} = $1;
			 }else{
					$bioseq_strain{$row->[0]} = ''; 
			 }
			 $unique_sequences{$sequence}{scalar(split(",", $bioseq_strain{$row->[0]}))}{$row->[1]}=$row->[0];
			 $tmp{$row->[0]} = $row->[1] if ($row->[1] eq $protein_name);
		 }
		 
		 LOOP:foreach my $seq (keys %unique_sequences){
			 foreach my $n (sort {$b <=> $a} keys %{$unique_sequences{$seq}}){
				 my @ids = sort {$b cmp $a} keys %{$unique_sequences{$seq}{$n}};
				 $tmp{$unique_sequences{$seq}{$n}{$ids[0]}} = $ids[0];
				 next LOOP;
			 }
		 }
		 @proteins_in_cluster = ();
		 ## sort protein names
		 foreach my $id (sort {$bioseq_strain{$a} cmp $bioseq_strain{$b}} keys %bioseq_strain){
			 push @proteins_in_cluster, $tmp{$id} if (defined $tmp{$id} || $tmp{$id} eq $protein_name);
			 push @bioseq_ids, $id if (defined $tmp{$id} || $tmp{$id} eq $protein_name);
		 }
   }else{
		 foreach my $row(@results){
			 $tmp{$row->[0]} = $row->[1];
			 my $sequence = $row->[3];
			 $sequence =~ s/\*+$//;
			 $unique_sequences{$sequence} =1;
			 if ($row->[2] =~ /strain\/isolate=(\S+)/){
					$bioseq_strain{$row->[0]} = $1;
			 }else{
					$bioseq_strain{$row->[0]} = '';
			 }
		 }
		 @proteins_in_cluster = ();

		 ## sort protein names
		 foreach my $id (sort {$bioseq_strain{$a} cmp $bioseq_strain{$b}} keys %bioseq_strain){
			 push @proteins_in_cluster, $tmp{$id};
			 push @bioseq_ids, $id;
		 }
   }

   %tmp = ();
   $proteins_in_cluster_str = join("','", @proteins_in_cluster);

   #print "cluster_id=$cluster_id proteins=".join(",", @proteins_in_cluster) ."<BR>";
   close IN;

  #### Return some information about these biosequences
  $sql = qq~
	 SELECT BS.biosequence_id, BS.biosequence_name,BS.biosequence_gene_name,
					BS.biosequence_accession, BS.biosequence_desc,
					BS.biosequence_seq,BSS.set_name,
					DBX.accessor,DBX.accessor_suffix,
					BAG.annotated_gene_id,
					BPS.transmembrane_class,
					BPS.transmembrane_topology,
					BPS.has_signal_peptide,
					BPS.has_signal_peptide_probability,
					BPS.signal_peptide_length,
					BPS.signal_peptide_is_cleaved,
					BPS.published_ng_per_ml,
					BPS.pub_ng_ml_source,
					PID.probability, 
					PID.confidence,
					PID.n_observations,
					PPL.level_name,
					BSRT.relationship_name,
					BS_REP.biosequence_name as "group_rep",
					BS_REF.biosequence_name as "reference_biosequence_name",
					BS_REF_REP.biosequence_name as "ref_group_rep",
					BS_REF_REF_REP.biosequence_name as "ref_ref_group_rep",
					BPS.tm_sigp_source,
          LEN(CONVERT(varchar(MAX), BS.biosequence_seq)) as "length"
		 FROM $TBAT_ATLAS_BUILD AB 
     JOIN $TBAT_BIOSEQUENCE_SET BSS ON (AB.biosequence_set_id = BSS.biosequence_set_id AND AB.atlas_build_id = $atlas_build_id)
		 INNER JOIN $TBAT_BIOSEQUENCE BS ON ( BSS.biosequence_set_id = BS.biosequence_set_id )
		 LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id )
		 LEFT JOIN $TBAT_BIOSEQUENCE_ANNOTATED_GENE BAG ON ( BAG.biosequence_id = BS.biosequence_id )
		 LEFT JOIN $TBAT_BIOSEQUENCE_PROPERTY_SET BPS ON ( BS.biosequence_id = BPS.biosequence_id )
		 LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID ON ( PID.biosequence_id = BS.biosequence_id AND PID.atlas_build_id = $atlas_build_id)
		 LEFT JOIN $TBAT_BIOSEQUENCE BS_REP ON ( BS_REP.biosequence_id = PID.represented_by_biosequence_id )
		 LEFT JOIN $TBAT_PROTEIN_PRESENCE_LEVEL PPL ON ( PPL.protein_presence_level_id = PID.presence_level_id )
		 LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP BSR ON ( BSR.related_biosequence_id = BS.biosequence_id )
		 LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP_TYPE BSRT ON ( BSRT.biosequence_relationship_type_id = BSR.relationship_type_id )
		 LEFT JOIN $TBAT_BIOSEQUENCE BS_REF ON ( BS_REF.biosequence_id = BSR.reference_biosequence_id )
		 LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID_REF  ON ( PID_REF.biosequence_id = BS_REF.biosequence_id 
					AND  PID_REF.atlas_build_id = $atlas_build_id )
		 LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REP ON ( BS_REF_REP.biosequence_id = PID_REF.represented_by_biosequence_id )
		 LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP BSR_REF ON ( BSR_REF.related_biosequence_id = BS_REF.biosequence_id )
		 LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REF ON ( BS_REF_REF.biosequence_id = BSR_REF.reference_biosequence_id )
		 LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID_REF_REF ON ( PID_REF_REF.biosequence_id = BS_REF_REF.biosequence_id )
		      AND ( PID_REF_REF.atlas_build_id = $atlas_build_id ) 
     LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REF_REP ON ( BS_REF_REF_REP.biosequence_id = PID_REF_REF.represented_by_biosequence_id )
		WHERE BS.biosequence_name in ('$proteins_in_cluster_str')
  ~;
  my @rows = $sbeams->selectHashArray($sql);
  my $n_rows = scalar @rows;

  ## order result set
  my %result =();
  foreach my $biosequence(@rows){
    $result{$biosequence->{biosequence_name}} = $biosequence;
  }
  my @rows =();
  foreach my $biosequence_name (@proteins_in_cluster){
    push @rows, $result{$biosequence_name};
  }  
  my $biosequences = \@rows;

	#############################################################################
	#### Print out a summary of the proteins
	#############################################################################
  my %biosequence_id_info = ();
	my $htmlcontent = '';
	my $tr = 'class="hoverable"';
	if ($sbeams->output_mode() eq 'html') {
		$htmlcontent = qq~
			<table style="margin-left:15px;">
		~;
  }

  foreach my $biosequence (@rows){
		#### Build the full identification status
		my $biosequence_name = $biosequence->{biosequence_name};
 		my $presence_level = $biosequence->{level_name};
		my $redundancy_rel = $biosequence->{relationship_name};
		my $identification_status = '';
		my %prepositions = ( subsumed=>"by", "ntt-subsumed"=>"by", identical=>"to", indistinguishable=>"from" );
    next if (defined $biosequence_id_info{$biosequence_name});

		if (defined $presence_level) {
			$identification_status = $presence_level;
		} elsif (defined $redundancy_rel) {
			$identification_status = $redundancy_rel;
		}

		if ($prepositions{$identification_status}) {
			$identification_status .= " $prepositions{$identification_status} $biosequence->{reference_biosequence_name}";
		}
    $biosequence_id_info{$biosequence_name}{'biosequence_id'} = $biosequence->{biosequence_id}; 
		#### Display a large title, status, and status description
    $biosequence_id_info{$biosequence_name}{'Identification Status'} = $identification_status;
		if ($sbeams->output_mode() eq 'html') {
			$htmlcontent .= $sbeamsMOD->encodeSectionItem(
				key=>"$biosequence_name",
				tr_info => $tr,
				value=>$biosequence->{biosequence_desc}
			);
    }
    $biosequence_id_info{$biosequence_name}{biosequence_gene_name} = $biosequence->{biosequence_gene_name};
    $biosequence_id_info{$biosequence_name}{'Sequence Length'} = $biosequence->{length};

		my $group_rep = '';
		# if this bioseq has a presence_level ...
		if (defined $biosequence->{group_rep}) {
			$biosequence_id_info{$biosequence_name}{'Protein Group'} = $biosequence->{group_rep};
		# if this bioseq is ident/indist from a bioseq with a presence level ...
		} elsif ( defined $biosequence->{ref_group_rep}) {
			$biosequence_id_info{$biosequence_name}{'Protein Group'} =  $biosequence->{ref_group_rep};
		# if this bioseq is ident/indist from an indist bioseq ...
		} elsif ( defined $biosequence->{ref_ref_group_rep}) {
			$biosequence_id_info{$biosequence_name}{'Protein Group'} = $biosequence->{ref_ref_group_rep};
		}
    $biosequence_id_info{$biosequence_name}{'Protein Group'} = "<a href=\"$CGI_BASE_DIR/PeptideAtlas/GetProteins?".
                                                               'protein_group_representative_constraint='. 
                                                               $biosequence_id_info{$biosequence_name}{'Protein Group'} . 
                                                               '&apply_action=QUERY">' . 
                                                               $biosequence_id_info{$biosequence_name}{'Protein Group'} .
                                                               '</a>';
		if ( defined $biosequence->{estimated_ng_per_ml}) {
			my $conc_string = sprintf("%0.1f ng/ml (%s uncertainty)",
																	 $biosequence->{estimated_ng_per_ml},
																	 $biosequence->{abundance_uncertainty});
			$biosequence_id_info{$biosequence_name}{'Estimated concentration'} = $conc_string;
		}
		if ( defined $biosequence->{published_ng_per_ml}) {
			my $conc_string = sprintf("%0.1f ng/ml (%s)",
																 $biosequence->{published_ng_per_ml},
																 $biosequence->{pub_ng_ml_source});
			 $biosequence_id_info{$biosequence_name}{'Published concentration'} = $conc_string;
		}

		# Check to see if there is ortholog information
#		my $orthologs = get_ortholog_information( $biosequence );
#		if ( $orthologs ) {
#			$biosequence_id_info{$biosequence_name}{'Ortholog Group'} = $orthologs;
#		}else{
#			$biosequence_id_info{$biosequence_name}{'Ortholog Group'} =  '';
#		}
  } 


  #############################################################################
  #### Continue with main query

  #### Define the desired columns in the query
  #### [friendly name used in url_cols,SQL,displayed column title]
  my $exp_cat_str = "'". join("','", split(",", $parameters{exp_cat})) ."'"; 
  my $group_by_clause = '';
  my $sample_category_clause = '';
  my @labels = ( 'Accession', 'Start', 'Pre AA','Sequence', 'Fol AA', 'ESS', 'NET', 'NMC' ,'Best Prob', 'Best Adj Prob', 'N Obs', 'EOS', 'SSRT', 'N Prot Map', 'N Gen Loc', 'N Experiments', 'Subpep of', 'Isolate Map' );

  my @column_array = (
    ["peptide_accession","P.peptide_accession","Accession"],
    ["start_in_biosequence", "PM.start_in_biosequence", "<SPAN TITLE='Start postion in the protein'>Start</SPAN>"],
    ["preceding_residue","PI.preceding_residue","<SPAN TITLE='Preceding (towards the N terminus) amino acid'>Pre AA</SPAN>"],
    ["peptide_sequence","P.peptide_sequence","<SPAN TITLE='Amino Acid sequence of this peptide'>Sequence</SPAN>"],
    ["following_residue","PI.following_residue","<SPAN TITLE='Following (towards the C terminus) amino acid'>Fol AA</SPAN>"],
    ["suitability_score","0.0001","<SPAN TITLE='Score derived from peptide probability, EOS, and sequence characteristics such as missed cleavage(MC) or semi-tryptic(NTT); these are annotated in red.'>ESS</SPAN>"],
    ["highest_n_enzymatic_termini","''","<SPAN TITLE='Highest # of enzymatic termini'>NET</SPAN>"],
    ["lowest_n_missed_cleavages","''","<SPAN TITLE='Lowest # of missed cleavage'>NMC</SPAN>"],
    ["best_probability","STR(PI.best_probability,7,3)","<SPAN TITLE='Highest PeptideProphet probability for this observed sequence'>Best Prob</SPAN>"],
    ["best_adjusted_probability","STR(PI.best_adjusted_probability,7,3)","Best Adj Prob"],
  );
  my $group_by_clause .= "GROUP BY P.peptide_accession, PM.start_in_biosequence, PI.preceding_residue,P.peptide_sequence,PI.following_residue,PI.best_probability,PI.best_adjusted_probability,";
    
	push @column_array, (["n_observations","PI.n_observations","<SPAN TITLE='Total number of observations in all modified forms and charge states'>N Obs</SPAN>"]);
	$group_by_clause .= "PI.n_observations,"; 
  push @column_array, (
    ["empirical_proteotypic_score","STR(PI.empirical_proteotypic_score,7,2)","<SPAN TITLE='Empirical Observability Score'>EOS</SPAN>"],
    ["SSRCalc_relative_hydrophobicity","STR(P.SSRCalc_relative_hydrophobicity,7,2)","<SPAN TITLE='SSRCalc Relative Hydrophobicity score'>SSRT</SPAN>"],
    ["n_protein_mappings","PI.n_protein_mappings","<SPAN TITLE='Number of proteins in the reference database to which this peptide maps'>N Prot Map</SPAN>"],
    ["n_genome_locations","PI.n_genome_locations","<SPAN TITLE='Number of discrete genome locations which encode this amino acid sequence'>N Gen Loc</SPAN>"],
    ["sample_ids","PISB.atlas_search_batch_id","N Experiments"],
    ["is_subpeptide_of","PI.is_subpeptide_of",'Subpep of'],
    ["peptide_instance_id","PI.peptide_instance_id","peptide_instance_id"],
    ["isolate_map","''","Isolate Map"],
    ["biosequence_id","PM.matched_biosequence_id", "biosequence_id"],
    
  );
  $group_by_clause .= "PI.empirical_proteotypic_score,P.SSRCalc_relative_hydrophobicity,PI.n_protein_mappings,PI.n_genome_locations,PI.is_subpeptide_of,PI.peptide_instance_id, PISB.atlas_search_batch_id, PI.is_subpeptide_of, PM.matched_biosequence_id";
  if ($parameters{exp_cat}){
    $sample_category_clause = "AND SC.name in ($exp_cat_str)";
  } 
    

  #### Build the columns part of the SQL statement
  my %colnameidx = ();
  my @column_titles = ();
  my $columns_clause = $sbeams->build_SQL_columns_list(
    column_array_ref=>\@column_array,
    colnameidx_ref=>\%colnameidx,
    column_titles_ref=>\@column_titles
  );

  my $peptide_instance_clause='';

 
  my $biosequence_id_str = join(",", @bioseq_ids);
  #### Define a query to return peptides for this protein
  $sql = qq~
     SELECT DISTINCT $columns_clause
     FROM $TBAT_PEPTIDE_INSTANCE PI
     JOIN $TBAT_PEPTIDE_INSTANCE_SEARCH_BATCH PISB ON (PISB.peptide_instance_id = PI.peptide_instance_id)
     JOIN $TBAT_ATLAS_BUILD_SEARCH_BATCH ASB ON (ASB.ATLAS_SEARCH_BATCH_ID = PISB.ATLAS_SEARCH_BATCH_ID)
     JOIN $TBAT_SAMPLE S ON (S.SAMPLE_ID = ASB.SAMPLE_ID)
     JOIN $TBAT_SAMPLE_CATEGORY SC ON (SC.id = S.sample_category_id)
     INNER JOIN $TBAT_PEPTIDE P ON ( PI.peptide_id = P.peptide_id )
     LEFT JOIN $TBAT_PEPTIDE_MAPPING PM ON ( PI.peptide_instance_id = PM.peptide_instance_id )
     INNER JOIN $TBAT_ATLAS_BUILD AB ON ( PI.atlas_build_id = AB.atlas_build_id )
     LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id )
     LEFT JOIN $TB_ORGANISM O ON ( BSS.organism_id = O.organism_id )
     LEFT JOIN $TBAT_BIOSEQUENCE BS ON ( PM.matched_biosequence_id = BS.biosequence_id )
     LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id )
     LEFT JOIN $TBAT_PEPTIDE_INSTANCE_ANNOTATION PIA ON  PIA.peptide_instance_id = PI.peptide_instance_id
                                                  AND  PIA.peptide_instance_id = PI.peptide_instance_id
     LEFT JOIN $TBAT_SPECTRUM_ANNOTATION_LEVEL  SAL ON  
            SAL.spectrum_annotation_level_id = PIA.spectrum_annotation_level_id
	          AND PIA.record_status != 'D'
    WHERE 1 = 1
          AND AB.atlas_build_id IN ( $atlas_build_id )
					AND BS.biosequence_id in ($biosequence_id_str)
          $sample_category_clause
	  $group_by_clause
    ORDER BY P.peptide_accession
  ~;

  #### Define the hypertext links for columns that need them
  %url_cols = (
    'Accession' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&searchForThis=%V&action=QUERY",
  );

  %hidden_cols = (
    'best_adjusted_probability' => 1,
    'peptide_instance_id' => 1,
    'biosequence_id' => 1
  );

  #########################################################################
  #### If QUERY or VIEWRESULTSET was selected, display the data
  if ($apply_action =~ /(QUERY|GO|VIEWRESULTSET)/) {

    #### Show the SQL that will be or was executed
    $sbeams->display_sql(sql=>$sql) if ($show_sql);

    #### If the action contained QUERY, then fetch the results from
    #### the database
    if ($apply_action =~ /(QUERY|GO)/i) {

      #### Fetch the results from the database server
      $sbeams->fetchResultSet(
        sql_query=>$sql,
        resultset_ref=>$resultset_ref,
      );

        #### Post process the resultset
        postProcessResultset(
          rs_params_ref=>\%rs_params,
          resultset_ref=>$resultset_ref,
          query_parameters_ref=>\%parameters,
          atlas_build_id => $atlas_build_id,
          column_titles_ref=>\@column_titles,
          bioseq_strain => \%bioseq_strain,
        );

      #### Store the resultset and parameters to disk resultset cache
      $rs_params{set_name} = "SETME";
      $sbeams->writeResultSet(
        resultset_file_ref=>\$rs_params{set_name},
        resultset_ref=>$resultset_ref,
        query_parameters_ref=>\%parameters,
        resultset_params_ref=>\%rs_params,
        query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME",
      );
    }

    #### Get protein structure information
 
    my ($obs, $peptides, $pep_nobs, $seq2instance, $pep_info);
    my %peptide_list   =();
    foreach my $biosequence(@$biosequences){
			( $obs, $peptides, $pep_nobs, $seq2instance, $pep_info ) = getPeptideCount( biosequence => $biosequence,
											 resultset_ref => $resultset_ref );
			my $cnt = scalar(uniq @$peptides );
      $peptide_list{$biosequence} = $peptides;
      $biosequence_id_info{$biosequence->{biosequence_name}}{'Distinct Peptides'} = $cnt;
      $biosequence_id_info{$biosequence->{biosequence_name}}{'Total Observations'} = $obs;
       
    }
		### biosequence_id_info
		my @data =();
    my @headings = ( 'Sequence Length',
                     'biosequence_gene_name',
										 'Identification Status',
										 'Protein Group',
										 #'Ortholog Group',
										 'Distinct Peptides',
										 'Total Observations'
										);
		 
		foreach my $biosequence_name(@proteins_in_cluster){
      my @record;
      push @record, $bioseq_strain{$biosequence_id_info{$biosequence_name}{'biosequence_id'}};
 
      push @record, "<a href=\"$CGI_BASE_DIR/PeptideAtlas/GetProtein?atlas_build_id=$atlas_build_id&protein_name=$biosequence_name" .
									 "&apply_action=QUERY\">$biosequence_name</a>";

			foreach my $col (@headings){
        push @record , $biosequence_id_info{$biosequence_name}{$col} || '';
			}
      push @data , \@record;
		}
    if ($htmlmode){
        @headings = ('Strain/Isolate', 'Protein Name', @headings);
				$htmlcontent .= "</table>\n";
				print scalar $sbeams->make_toggle_section( neutraltext => "$protein_name Ortholog Group",
								 sticky => 1,
									 name => 'getprotein_overview_div',
								barlink => 1,
								visible => 1,
								content => $htmlcontent );
			 my $html = $sbeamsMOD->create_table (data => \@data,
                          column_names=> \@headings,
                          table_name => "Protein Identification Summary",
                          table_id => "protein_info",
                          align => [qw(left center center center center center center)],
                          sortable => 1);
       print $html;
    }

   ### isolate_alignment  
	 my $isolate_alignment = $sbeamsMOD->get_alignment_display(atlas_build_id => $atlas_build_id,
																								 bioseq_id => join(",", @bioseq_ids),
                                                 protein_list => $parameters->{protein_list} || '',
                                                 orig_bioseq_id => $parameters->{orig_bioseq_id} || '',
																								 bioseq_strain => \%bioseq_strain,
                                                 sample_category_contraint => $exp_cat_str, 
                                                 );
   if ($htmlmode){
     print scalar $sbeams->make_toggle_section( neutraltext => "Peptide Mapping and Coverage",
                 sticky => 1,
                   name => 'gene_tree_div',
                barlink => 1,
                visible => 1,
                content => $isolate_alignment );
   }

   if (scalar keys %unique_sequences > 1){
		 if ($htmlmode){
				print scalar $sbeams->make_toggle_section( neutraltext => "Gene Tree",
									 sticky => 1,
										 name => 'gene_tree_div',
									barlink => 1,
									visible => 1,
									content => $tree_plot );


		 }
		 if (%pfam_domain){
			 my $domain_plot = '';
			 if($htmlmode){
         my @ps = ();
         foreach my $p(@proteins_in_cluster){
           if (defined $pfam_domain{$p}){
              push @ps, $p;
           }
         }
				 foreach my $biosequence_name(keys %pfam_domain){
					 my $length = $biosequence_id_info{$biosequence_name}{'Sequence Length'};
					 $pfam_domain{$biosequence_name}{length} = $length;
				}
				 my $domain_plot = $sbeamsMOD->get_pfam_domain_display(data => \%pfam_domain,
																															 list => \@ps); 

				 @headings = ('protein name', 'alignment start','alignment end','envelope start','envelope end',
											'hmm acc','hmm name','domain type','hmm start','hmm end','hmm length', 'bit score','e value');
		 
				 @data = ();
				 foreach my $prot (@proteins_in_cluster){
					 if (defined $pfam_domain{$prot}){
						 foreach my $regions(@{$pfam_domain{$prot}{regions}}){
							 my @record = ($prot); 
							 foreach my $col (qw(aliStart aliEnd start end accession description type hmmstart hmmend hmmlen bitscore score)){
								 push @record, $regions->{metadata}{$col};
							 }
							 push @data, \@record;
						 }
					 }
				 }	
				 print  $sbeamsMOD->create_table (data => \@data,
													column_names=> \@headings,
													table_name => "Pfam Domain",
													table_id => "pfam_domain",
													align => [qw(left center center center center center center)],
													sortable => 1,
													plot_html => $domain_plot);
			 }
		 }
   }

    #############################################################################
    #### Observed peptides
    #############################################################################
    my $obs_help = $sbeamsMOD->get_table_help(column_titles_ref=>\@labels);


    #### Since we don't display the controls, make the default page size
    #### huge, otherwise the user may not see all peptides
    $rs_params{page_size} = 500;

    #### get correct flanking AA info
    $sql = qq~
       SELECT DISTINCT
           P.peptide_accession AS "peptide_accession",
           PM.start_in_biosequence AS "start",
           COALESCE(PM.peptide_preceding_residue,PI.preceding_residue) As "preceding_residue",
           P.peptide_sequence AS "peptide_sequence",
           COALESCE(PM.peptide_following_residue,PI.following_residue) AS "following_residue",
           PM.highest_n_enzymatic_termini,
           PM.lowest_n_missed_cleavages
			FROM $TBAT_PEPTIDE_INSTANCE PI
			INNER JOIN $TBAT_PEPTIDE P
						ON ( PI.PEPTIDE_ID = P.PEPTIDE_ID )
			 LEFT JOIN $TBAT_PEPTIDE_MAPPING PM
						ON ( PI.PEPTIDE_INSTANCE_ID = PM.PEPTIDE_INSTANCE_ID )
			INNER JOIN $TBAT_ATLAS_BUILD AB
						ON ( PI.ATLAS_BUILD_ID = AB.ATLAS_BUILD_ID )
			 LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS
						ON ( AB.BIOSEQUENCE_SET_ID = BSS.BIOSEQUENCE_SET_ID )
			 LEFT JOIN $TBAT_BIOSEQUENCE BS
						ON ( PM.MATCHED_BIOSEQUENCE_ID = BS.BIOSEQUENCE_ID )
			WHERE 1 = 1
						AND AB.atlas_build_id IN ( $atlas_build_id )
						AND BS.biosequence_id in ($biosequence_id_str)
			ORDER BY P.PEPTIDE_ACCESSION
    ~;
    my @rows = $sbeams->selectSeveralColumns($sql);
    my %peptide_info = ();
    foreach my $row(@rows){
      my ($peptide_accession,$start,$preceding_residue,$peptide_sequence,$following_residue,
          $highest_n_enzymatic_termini,$lowest_n_missed_cleavages) = @$row;
      push @{$peptide_info{$peptide_accession}{preceding_residue}}, $preceding_residue;
      push @{$peptide_info{$peptide_accession}{following_residue}}, $following_residue;
      $peptide_info{$peptide_accession}{highest_n_enzymatic_termini} = $highest_n_enzymatic_termini;
      $peptide_info{$peptide_accession}{lowest_n_missed_cleavages} = $lowest_n_missed_cleavages;
      push @{$peptide_info{$peptide_accession}{start}},  $start;
    }

    my $peptide_counts = scalar keys %peptide_info;
    print scalar $sbeams->make_toggle_section( neutraltext => "Distinct Observed Peptides ($peptide_counts)",
					       sticky => 1,
					       name => 'getprotein_observedlist_div',
					       barlink => 1,
					       opendiv => 1,
					       visible => 1,
					       content => $obs_help
		) if $htmlmode;

    #### Calculate the best peptides to use
    my $best_peptide_information = $bestPeptideSelector->getBestPeptides(
      atlas_build_id => $atlas_build_id,
      biosequence_id => $biosequence_id,
      query_parameters_ref=>\%parameters,
      resultset_ref => $resultset_ref,
      column_titles_ref=>\@column_titles,
      is_trypsin_build => $is_trypsin_build,
      peptide_info => \%peptide_info,
      no_escape => 1
    );
#    $log->debug( Dumper( $best_peptide_information ) );
#    $log->debug( Dumper( $resultset_ref ) );

    my %instance_ids;
    my $max_peptides = 40;
    my $pep_cnt;
    for my $row ( sort { $b->[9] <=> $a->[9] } @{$resultset_ref->{data_ref}} ) {
      $pep_cnt++;
      last if $pep_cnt >= $max_peptides;
      $instance_ids{$row->[$resultset_ref->{column_hash_ref}->{peptide_instance_id}]}++;
    }
    my @pa_accessions;
    my @sample_list;

    my $seq_idx = $resultset_ref->{column_hash_ref}->{peptide_sequence};
    my $pre_idx = $resultset_ref->{column_hash_ref}->{preceding_residue};
    my $fol_idx = $resultset_ref->{column_hash_ref}->{following_residue};
    my $ntt_idx = $resultset_ref->{column_hash_ref}->{highest_n_enzymatic_termini};
    my $mc_idx = $resultset_ref->{column_hash_ref}->{lowest_n_missed_cleavages};
    my $ngl_idx = $resultset_ref->{column_hash_ref}->{n_genome_locations};
    my $ess_idx = $resultset_ref->{column_hash_ref}->{suitability_score};

    for my $row ( @{$resultset_ref->{data_ref}} ) {
      push @pa_accessions, $row->[0];
			$row->[$pre_idx]  =  join(",", uniq (split(",", $row->[$pre_idx])));
			$row->[$fol_idx]  = join(",", uniq (split(",", $row->[$fol_idx]))); 
      my $ntt = 0;
      if ( defined $row->[$ntt_idx] && $row->[$ntt_idx] ne '') {
				$ntt = $row->[$ntt_idx];
						} else {
				if ( $row->[$pre_idx] =~ /[-RK]/ ) {
					$ntt++;
				}
				if ( $row->[$fol_idx] =~ /[RK]$/ || $row->[$fol_idx] eq '-' ) {
					$ntt++;
				}
      }
      $row->[$ntt_idx] = $ntt if !defined $row->[$ntt_idx];

      my $mc = 0;
      if ( defined $row->[$mc_idx] && $row->[$mc_idx] ne '') {
				$mc = $row->[$mc_idx];
		  } elsif ( $row->[$seq_idx] =~ /[RK][^P]/ ) {
				$mc++;
		  }
						$row->[$mc_idx] = $mc if !defined $row->[$mc_idx];
						my $mgl = 0;
						if ( $row->[$ngl_idx] > 1 ) {
				$mgl++;
      }

      push @sample_list, [ $row->[0], $row->[$resultset_ref->{column_hash_ref}{'sample_ids'}] ];

      for my $idx ($resultset_ref->{column_hash_ref}{'sample_ids'},
		   $resultset_ref->{column_hash_ref}{'is_subpeptide_of'} ) {
				$row->[$idx] =~ s/\s//g;
				my @samples = split( /,/, $row->[$idx] );
				if ( $idx == $resultset_ref->{column_hash_ref}{'sample_ids'} ) {
					my $cnt = scalar( @samples );
					$row->[$idx] = ( $htmlmode ) ? "<A HREF='$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&searchForThis=$row->[0]&action=QUERY#samples' TITLE='$row->[$idx]'> $cnt </A>" : $cnt;
				} else {
					$row->[$idx] = scalar( @samples );
				}
      }

      $row->[0] =  ( $htmlmode ) ? "<A HREF='$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&searchForThis=$row->[0]&action=QUERY'>$row->[0] </A>" :$row->[0];

      my @errs;
			push @errs, 'mc' if $mc;
			push @errs, 'et'.$ntt if $ntt < 2;
			push @errs, 'mgl' if $mgl;
			my $annot = '';
			if ( @errs) {
			  $annot = '[' . join( ',', @errs ) . ']';
      }

      if ($sbeams->output_mode() eq 'html') {
				$annot = "<FONT COLOR=RED><SUP>$annot</SUP></FONT>";
      }
      $row->[$ess_idx] = sprintf( "%0.2f", $row->[$ess_idx] ) . " $annot";
      my $cnt = 0;
    }

    # Must... continue... hacking!!!
    $resultset_ref->{types_list_ref}->[4] = 'varchar';
#    $resultset_ref->{precision_list_ref}->[4] = 36;

    my @row_color_list = ("#f3f1e4","#d3d1c4");

    my $distinct_obs_peptide_display = $sbeamsMOD->get_individual_spectra_display(
      column_titles_ref=>\@column_titles,
      colnameidx_ref => \%colnameidx,
      resultset_ref=> $resultset_ref,
      hidden_cols_ref=>\%hidden_cols );

    print "$distinct_obs_peptide_display" if $htmlmode;

    print "</DIV>" if $htmlmode;  ## close section!


    #############################################################################
    #### Display synthetic peptide info
    #############################################################################
    unless ( $sbeams->isGuestUser() ) {
      #### Display the best peptide information
      my $dirty_peptide_display = $bestPeptideSelector->get_dirty_peptide_display(
				atlas_build_id => $current_page->{atlas_build_id},
				base_url=>$base_url,
				biosequence_id => $biosequence_id,
				tr_info => '',
      );
      if ( $htmlmode && $dirty_peptide_display ) {
				print scalar $sbeams->make_toggle_section( neutraltext => 'Synthetic peptide info',
						   sticky => 1,
						   name => 'getprotein_dirty_peptide_div',
						   #ToDo?# tooltip => 'Show/Hide Section',
						   barlink => 1,
						   visible => 1,
						   content => $dirty_peptide_display);
      }
    }


    #############################################################################
    #### Display the peptide map and samples list
    #############################################################################
    my $sample_ids = getSampleList( sample_list => \@sample_list );
    if ($htmlmode) {
      my $sampleMap = $sbeamsMOD->getSampleMapDisplay( sample_ids => $sample_ids,
						     instance_ids => \%instance_ids,
						     atlas_build_id => $atlas_build_id,
                 sample_category_contraint => $exp_cat_str,
						     no_header => 1,
                 sample_category => 1,
						     header_text => "Per-sample source expression for $max_peptides most highly observed peptides",
						     second_header => "(Peptides listed by accession, * denotes single genome mapping)"
						   );

      print scalar $sbeams->make_toggle_section( neutraltext => 'Experiment Peptide Map',
						 sticky => 1,
						 name => 'getprotein_samplemap_div',
						 #ToDo?# tooltip => 'Show/Hide Section',
						 barlink => 1,
						 visible => 1,
						 content => "<A NAME='samplemap'></A><TABLE>$sampleMap</TABLE>" );

      my $sampleDisplay = $sbeamsMOD->getProteinSampleDisplay( sample_ids => $sample_ids,
						      no_header => 1,
						      rows_to_show => 25,
						      max_rows => 500,
                  sample_category => 1,
						    );
      print '<script type="text/javascript" src="https://cdn.plot.ly/plotly-latest.min.js"></script>';
      print scalar $sbeams->make_toggle_section( neutraltext => 'Observed in Experiments',
						 sticky => 1,
						 name => 'getprotein_samplelist_div',
						 #ToDo?# tooltip => 'Show/Hide Section',
						 barlink => 1,
						 visible => 1,
						 content => "<TABLE>$sampleDisplay</TABLE>" );

      print "<script type='text/javascript'>\n";
      if ($parameters{exp_cat_full}){
         print "var options=['". join("','",split(",", $parameters{exp_cat_full})) ."'];\n";
      }else{
         print 'var options=data[0].labels' .";\n";
      }
      print qq~
           var selected_val = [$exp_cat_str];
					 var sel = document.getElementById("exp_cat");
					 for (var i=0; i<options.length; i++) {
					     var opt = document.createElement('option');
							 opt.text = opt.value = options[i] ;
               if (selected_val.includes(options[i])){
                  opt.selected=true;
               } 
							 sel.add(opt); 
						}
        </script>
      ~;
    }

    #### If QUERY was not selected, then tell the user to enter some parameters
  } else {
    if ($sbeams->invocation_mode() eq 'http') {
      print "<H4>Select parameters above and press QUERY</H4>\n";
    } else {
      print "You need to supply some parameters to constrain the query\n";
    }
  }

  $sbeamsMOD->display_page_footer();
} # end handle_request


sub get_ortholog_information {
  my $biosequence = shift;
  return unless $biosequence->{biosequence_name};
  my $sql =<<"  END";
  SELECT ortholog_group
  FROM $TBBL_ORTHOLOG
  WHERE entry_accession = '$biosequence->{biosequence_name}'
  END
  my $link = '';
  my $sth = $sbeams->get_statement_handle( $sql );
  while ( my @row = $sth->fetchrow_array() ) {
    $link .= qq~<A HREF="viewOrthologs?group_id=$row[0];entry_accession=$biosequence->{biosequence_name}" TITLE="View orthologs/homolog information"><IMG BORDER=0 SRC="$HTML_BASE_DIR/images/OrthoMCL.jpg">&nbsp; $row[0]</A>\n~;
  }
  return $link if $link;


  $sql =<<"  END";
  SELECT ortholog_group
  FROM $TBBL_ORTHOLOG O
  JOIN $TBAT_SEARCH_KEY SK ON SK.search_key_name = O.entry_accession
  JOIN $TBAT_BIOSEQUENCE B ON B.biosequence_name = SK.resource_name
  WHERE B.biosequence_name = '$biosequence->{biosequence_name}'
  END
  my $sth = $sbeams->get_statement_handle( $sql );
  while ( my @row = $sth->fetchrow_array() ) {
    $link .= qq~<A HREF="viewOrthologs?group_id=$row[0];entry_accession=$biosequence->{biosequence_name}" TITLE="View orthologs/homolog information"><IMG BORDER=0 SRC="$HTML_BASE_DIR/images/OrthoMCL.jpg">&nbsp; $row[0]</A>\n~;
  }
  return $link;


}

sub getPeptideCount {
  my %args = @_;
  my $SUB_NAME = $sbeams->get_subname();

  #### Decode the argument list
  my $resultset_ref = $args{'resultset_ref'}
   || die "ERROR[$SUB_NAME]: resultset_ref not passed";
  my $biosequence = $args{'biosequence'}
   || die "ERROR[$SUB_NAME]: biosequence not passed";
  my $line_length = $args{'line_length'} || 70;
  my $word_length = $args{'word_length'} || 10;
  my $enzyme = $args{'enzyme'} || '';
  my $total_observations = $args{'total_observations'};
  my $biosequence_id = $biosequence->{biosequence_id};

  # removed DSC 2008-03 - let caller decide if it should be displayed
  #### Don't display unless HTML
#  return unless ($sbeams->output_mode() eq 'html');

  #### Get the hash of indices of the columns
  my %col = %{$resultset_ref->{column_hash_ref}};

  #### Loop over all the peptides
  my $data_ref = $resultset_ref->{data_ref};
  my @peptides = ();
  my %peptides;
  my %seq2instance;
  my %map_info;
  my %peptide_mapping;
  foreach my $row (@{$data_ref}) {
    my @biosequence_ids =  split(",", $row->[$col{biosequence_id}]);
    my %biosequence_ids = map {$_ => 1} @biosequence_ids;

    $peptides{$row->[$col{peptide_sequence}]} = $row->[$col{n_observations}];
    $seq2instance{$row->[$col{peptide_sequence}]} = $row->[$col{peptide_instance_id}];


    $map_info{$row->[$col{peptide_sequence}]} = { acc => $row->[$col{peptide_accession}],
                                                  nobs => $row->[$col{n_observations}],
                                                  inst => $row->[$col{peptide_instance_id}],
                                                  n_gen => $row->[$col{n_genome_locations}] };


    next if(not defined $biosequence_ids{$biosequence_id});
    push(@peptides,$row->[$col{peptide_sequence}]);
    if (not defined $peptide_mapping{$row->[$col{biosequence_id}]}{$row->[$col{peptide_sequence}]} ){
      $total_observations += $row->[$col{n_observations}];
    }
    $peptide_mapping{$row->[$col{biosequence_id}]}{$row->[$col{peptide_sequence}]} =1;
   
  }
  return( $total_observations, \@peptides, \%peptides, \%seq2instance, \%map_info );
#  return( $total_observations, \@peptides );
}


sub getSampleList {
  my %args = @_;

  #### Decode the argument list
  return [] unless $args{sample_list};

#  each row has an accession, sample_string pair.

  my %observed_samples;
  foreach my $row (@{$args{sample_list}}) {
    my $observed_sample_list = $row->[1];
    my @all = split(/[,;]/,$observed_sample_list);
    foreach my $element ( @all ) {
      $observed_samples{$element}++;
    }
  }
  my @keys = sort { "\L$a" cmp "\L$b" } ( keys( %observed_samples ) );
  return \@keys;
}

###############################################################################
## postProcessResultset 
##
## 
################################################################################
sub postProcessResultset {
  my %args = @_;

  #### Process the arguments list
  my $resultset_ref = $args{'resultset_ref'};
  my $rs_params_ref = $args{'rs_params_ref'};
  my $query_parameters_ref = $args{'query_parameters_ref'};
  my $column_titles_ref = $args{'column_titles_ref'};
  my $atlas_build_id = $args{'atlas_build_id'}; 
  my $bioseq_strain = $args{'bioseq_strain'};

  my %rs_params = %{$rs_params_ref};
  my %parameters = %{$query_parameters_ref};
  my $n_rows = scalar(@{$resultset_ref->{data_ref}});
  my $cols = $resultset_ref->{column_hash_ref};

  my @data =();
  my %peptide_samples =();
  my %peptide_orgs = ();
  my %peptide_ids =(); 
  my $sql = qq~
    SELECT ASB.atlas_search_batch_id, S.SAMPLE_ID
     FROM $TBAT_SAMPLE S
     JOIN $TBAT_ATLAS_SEARCH_BATCH ASB ON (S.SAMPLE_ID = ASB.SAMPLE_ID)
     JOIN $TBAT_ATLAS_BUILD_SEARCH_BATCH ABSB ON (ABSB.atlas_search_batch_id = ASB.atlas_search_batch_id)
     WHERE ABSB.ATLAS_BUILD_ID = $atlas_build_id
  ~;
  my %asb2sample_id=$sbeams->selectTwoColumnHash($sql);
 
  #$resultset_ref->{data_ref}->[$i]->[$cols->{sample_ids}] = join(",", keys %{$pi_samples->{$peptide_accession}});
  for (my $i=0; $i<$n_rows; $i++) {
    my $peptide_accession = $resultset_ref->{data_ref}->[$i]->[$cols->{peptide_accession}];
    my $asb_id = $resultset_ref->{data_ref}->[$i]->[$cols->{sample_ids}];
    my $biosequence_id = $resultset_ref->{data_ref}->[$i]->[$cols->{'biosequence_id'}];
    $peptide_samples{$peptide_accession}{$asb2sample_id{$asb_id}} =1;
    $peptide_orgs{$peptide_accession}{$bioseq_strain->{$biosequence_id}} =1;
    $peptide_ids{$peptide_accession}{$biosequence_id} =1;
  }

  my %processed = ();
  for (my $i=0; $i<$n_rows; $i++) {
    my $peptide_accession = $resultset_ref->{data_ref}->[$i]->[$cols->{peptide_accession}];
    $resultset_ref->{data_ref}->[$i]->[$cols->{sample_ids}] = join(",", sort {$a <=> $b} keys %{$peptide_samples{$peptide_accession}});
    $resultset_ref->{data_ref}->[$i]->[$cols->{isolate_map}] = join(",", sort {$a cmp $b} keys %{$peptide_orgs{$peptide_accession}});
    $resultset_ref->{data_ref}->[$i]->[$cols->{biosequence_id}]= join(",", sort {$a cmp $b} keys %{$peptide_ids{$peptide_accession}});
    my $values = join(",", @{$resultset_ref->{data_ref}->[$i]});
    if (! $processed{$values}){
      #print join(",", @{$resultset_ref->{data_ref}->[$i]})."<BR>";;
      push @data, $resultset_ref->{data_ref}->[$i];
      $processed{$values} = 1;
    }
  }
  $resultset_ref->{data_ref} = \@data;
}