#!/usr/local/bin/perl ############################################################################### # Program : CompareIsolates # $Id: CompareIsolates 2022-06-01 # # Description : Prints summary of a given protein given selection # atlas build and protein name. # # SBEAMS is Copyright (C) 2000-2021 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use Data::Dumper; use List::MoreUtils qw(uniq); $|++; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::TabMenu; $sbeams = new SBEAMS::Connection; $sbeams->enable_sql_cache(); my $htmlmode; use SBEAMS::BioLink; use SBEAMS::BioLink::Tables; my $biolink = SBEAMS::BioLink->new(); $biolink->setSBEAMS($sbeams); use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; $sbeamsMOD = new SBEAMS::PeptideAtlas; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); use SBEAMS::PeptideAtlas::KeySearch; my $keySearch = new SBEAMS::PeptideAtlas::KeySearch; $keySearch->setSBEAMS($sbeams); use SBEAMS::PeptideAtlas::BestPeptideSelector; my $bestPeptideSelector = new SBEAMS::PeptideAtlas::BestPeptideSelector; $bestPeptideSelector->setSBEAMS($sbeams); $bestPeptideSelector->setAtlas($sbeamsMOD); # Global sequence coverage array, will be populated post-graphic my @coverage; my $max_intensity = 1; my %coverage=(); # Swiss Prot annotations my @sp_rows; my $sp_rationale; my $current_page = { organism => '', atlas_build_id => '' }; my $parameters; my $swiss; # fetch info from swissprot/nextprot if applicable use constant MIN_OBS_LENGTH => 6; use constant MAX_OBS_LENGTH => 40; ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <profile_sql( list=>1); exit(0); ############################################################################### # Main Program: # # Call $sbeams->Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin', 'PeptideAtlas_readonly', 'PeptideAtlas_exec'], #connect_read_only=>1, allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; $parameters = \%parameters; my $n_params_found = $sbeams->parse_input_parameters( q=>$q, parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); # Cache current output mode $htmlmode = ( $sbeams->output_mode() eq 'html' ) ? 1 : 0; #### Decide what action to take based on information so far if ($parameters{action} eq "???") { # Some action } else { handle_request(ref_parameters=>\%parameters); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; my $spacer = $sbeams->getGifSpacer( 900 ); #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Get the HTML to display the tabs my $tabMenu = $sbeamsMOD->getTabMenu( parameters_ref => \%parameters, program_name => $PROG_NAME, ); my $contact_id = $sbeams->getCurrent_contact_id(); # die $contact_id; my $protein_list_sql = ''; if ( $parameters{atlas_build_id} && $parameters{protein_name} ) { $protein_list_sql = qq~ SELECT DISTINCT AB.atlas_build_id FROM $TBAT_ATLAS_BUILD AB JOIN $TBAT_PROTEIN_LIST_BUILD PLB ON AB.atlas_build_id = PLB.atlas_build_id JOIN $TBAT_PROTEIN_LIST PL ON PL.protein_list_id = PLB.protein_list_id JOIN $TBAT_PROTEIN_LIST_PROTEIN PLP ON PL.protein_list_id = PLP.protein_list_id JOIN $TBAT_BIOSEQUENCE B ON ( B.biosequence_set_id = AB.biosequence_set_ID AND B.biosequence_name = PLP.protein_name ) WHERE PL.contributor_contact_id = $contact_id AND AB.atlas_build_id = $parameters{atlas_build_id} AND PLP.protein_name = '$parameters{protein_name}' ~; } #### Get the current atlas_build_id based on parameters or session my $atlas_build_id = $sbeamsMOD->getCurrentAtlasBuildID( allow_protein_list_login => 1, protein_list_sql => $protein_list_sql, parameters_ref => \%parameters, display_page_header_footer => 1, PROG_NAME => $PROG_NAME, ); if (defined($atlas_build_id) && $atlas_build_id < 0) { return; } $parameters{atlas_build_id} = $atlas_build_id; $current_page->{atlas_build_id} = $atlas_build_id; $parameters->{motif} = $sbeamsMOD->getBuildMotif( build_id => $atlas_build_id ) || ''; #### If a new protein_name was supplied, store it my $protein_name = $parameters{"protein_name"}; my $cached_protein_name = $sbeams->getSessionAttribute( key => 'PeptideAtlas_protein_name', ); if ($protein_name) { $sbeams->setSessionAttribute( key => 'PeptideAtlas_protein_name', value => "$protein_name", ); if ($protein_name ne $cached_protein_name){ $parameters{exp_cat_full} = ''; $parameters{exp_cat} = ''; } #### Else see if we had one stored } else { if ($cached_protein_name){ $protein_name = $cached_protein_name; $parameters{'apply_action'} = 'GO'; } } #### Get the search keyword if (! $parameters->{caching}){ my $html_cache_name =''; if ($ENV{REQUEST_URI} =~ /CompareIsolates$/){ $protein_name =~ s/^\s+|\s+$//; $html_cache_name = lc("apply_action=QUERY\&atlas_build_id=$atlas_build_id". "\&protein_name=$protein_name"); }else{ if($ENV{"REQUEST_URI"}=~ /\?/) { my ($path , $query_string) = split(/\?/, $ENV{"REQUEST_URI"}); my %parameter_hash =(); my @TempArray=split("&", $query_string); foreach my $item (@TempArray) { my ($Key, $Value)=split("=", $item); $Value =~ s/^\s+|\s+$//; $parameter_hash{$Key}=$Value; } @TempArray = (); foreach my $param(sort {$a cmp $b} keys %parameter_hash){ push @TempArray, "$param=$parameter_hash{$param}"; } $html_cache_name= lc(join('&', @TempArray)); } } my $html_cache_loc = ''; if ($PHYSICAL_BASE_DIR !~ /dev\w+\/sbeams/){ $html_cache_loc = "/net/dblocal/www/html/sbeamscommon/htmlcache/CompareIsolates"; }else{ $html_cache_loc = "$PHYSICAL_BASE_DIR/htmlcache/CompareIsolates"; } #$log->debug("html_cache_name $ENV{REQUEST_URI} $html_cache_loc/$html_cache_name"); if ( -e "$html_cache_loc/$html_cache_name"){ if (open (IN,"<$html_cache_loc/$html_cache_name")){ print "Content-type: text/html\n\n"; print $_ while (); close IN; exit; } } } my $project_id = $sbeamsMOD->getProjectID( atlas_build_id => $parameters{atlas_build_id}); $sbeamsMOD->display_page_header(project_id => $project_id, init_tooltip => 1); print $tabMenu->asHTML() if ($sbeams->output_mode() eq 'html'); $parameters{use_nextprot} = ( $parameters->{use_nextprot} ne '' && $parameters->{use_nextprot} == 0 ) ? 0 : 1; $swiss = $sbeamsMOD->get_uniprot_annotation( show_all_snps => 1, accession => $protein_name, build_id => $parameters{atlas_build_id}, use_nextprot => $parameters{use_nextprot} ); #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $CATEGORY="Get Protein"; my $PROGRAM_FILE_NAME = $PROG_NAME; my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; my $help_url = "$CGI_BASE_DIR/help_popup.cgi"; #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams('q' => $q); my $n_params_found = 0; if ($apply_action eq "VIEWRESULTSET") { $sbeams->readResultSet( resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, ); $n_params_found = 99; } #### Get a list of accessible project_ids my @accessible_project_ids = $sbeams->getAccessibleProjects(); my $accessible_project_ids = join( ",", @accessible_project_ids ) || '0'; #### Get a hash of available atlas builds my $sql = qq~ SELECT atlas_build_id,atlas_build_name FROM $TBAT_ATLAS_BUILD WHERE project_id IN ( $accessible_project_ids ) AND record_status!='D' ORDER BY atlas_build_name ~; my %atlas_build_names = $sbeams->selectTwoColumnHash($sql); my %name2build = reverse( %atlas_build_names ); my @ordered_atlas_build_ids; for my $name ( sort ( keys( %name2build ) ) ) { push @ordered_atlas_build_ids, $name2build{$name}; } unless ( grep /^$atlas_build_id$/, @ordered_atlas_build_ids ) { my ($build_name) = $sbeams->selectOneColumn( "SELECT atlas_build_name FROM $TBAT_ATLAS_BUILD WHERE atlas_build_id = $atlas_build_id" ); push @ordered_atlas_build_ids, $atlas_build_id; $atlas_build_names{$atlas_build_id} = $build_name; } # die "ID is $atlas_build_id, and name is $atlas_build_names{$atlas_build_id}"; #### If the output_mode is HTML, then display the form if ($sbeams->output_mode() eq 'html') { print qq~ ~; print "

"; print ""; print $q->start_form(-method=>"POST", -action=>"$base_url", -name=>"SearchForm", ); print "PeptideAtlas Build: "; print $q->popup_menu(-name => "atlas_build_id", -values => [ @ordered_atlas_build_ids ], -labels => \%atlas_build_names, -default => $atlas_build_id, -onChange => 'switchAtlasBuild()', ); print $q->br; print "Protein Name: "; print $q->textfield( -name => "protein_name", -size=>50, -default=>$protein_name ); print $q->br; print $q->br; print "\n

Experiment Categories: \n"; print ''; print "\n"; print $q->scrolling_list(-id=>'exp_cat', -name=>'exp_cat', -values=>[], -size=>4, -style =>'margin-left:5px;', -multiple=>'true'); print qq~ \n" ; }else{ print "\n" ; } print "\n"; print $q->hidden( "apply_action", ''); print "\n"; print $q->hidden( "exp_cat_full", ''); print "\n"; print $q->submit(-name => "action", -value => 'QUERY', -onClick => 'switchAtlasBuild()', -style => 'margin-left:50px;', -label => 'GO'); print "\n
\n"; print $q->end_form; print "\n"; print ""; print "

"; $current_page->{organism} = $sbeamsMOD->getCurrentAtlasOrganism( parameters_ref => { atlas_build_id => $atlas_build_id } ); my $gaggle = $sbeams->getGaggleMicroformat( organism => $current_page->{organism}, data => [$protein_name], object => 'namelist', name => 'Protein name', type => 'direct' ); print "$gaggle\n"; } ######################################################################### #### Process all the constraints #### If atlas_build_id was not selected, stop here unless ($atlas_build_id) { if ($sbeams->output_mode() eq 'html') { print "Please select an atlas build, type in a protein name ". "(e.g. ENSP00000290230 for human Ensembl protein), and ". "click GO"; } else { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => 'You must provide an atlas_build_id', ); } return; } #### If biosequence_name was not selected, stop here unless ($protein_name) { if ($sbeams->output_mode() eq 'html') { print "Please type in a protein name ". "(e.g. ENSP00000290230 for human Ensembl protein), and ". "click GO"; } else { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => 'You must provide a protein_name', ); } return; } my $sql = qq~ SELECT IS_TRYPSIN_BUILD FROM $TBAT_ATLAS_BUILD WHERE ATLAS_BUILD_ID = $atlas_build_id ~ ; my @row = $sbeams->selectOneColumn($sql); my $is_trypsin_build = 'Y'; if(@row){ if ($row[0] eq 'N'){ $is_trypsin_build = 'N'; } } if($is_trypsin_build eq 'N'){ print qq~
WARNING: This build contains many samples that are not digested with trypsin and therefore the usual trypsin-centric display elements are turned off.
~; } #### Build ATLAS_BUILD constraint my $atlas_build_clause = $sbeams->parseConstraint2SQL( constraint_column=>"AB.atlas_build_id", constraint_type=>"int_list", constraint_name=>"Atlas Build", constraint_value=>$atlas_build_id ); return if ($atlas_build_clause eq '-1'); #### Build PROTEIN_NAME constraint my $biosequence_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"Protein Name", constraint_value=>$protein_name ); return if ($biosequence_name_clause eq '-1'); #### Define the SQL statement to find the biosequence $sql = qq~ SELECT BS.biosequence_id, BS.biosequence_name FROM $TBAT_ATLAS_BUILD AB INNER JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) INNER JOIN $TBAT_BIOSEQUENCE BS ON ( BSS.biosequence_set_id = BS.biosequence_set_id ) WHERE 1 = 1 $atlas_build_clause $biosequence_name_clause ~; my %biosequences = $sbeams->selectTwoColumnHash($sql); my $n_biosequences = scalar(keys(%biosequences)); #### If the protein was not found, report the problem if ($n_biosequences == 0) { if ($sbeams->output_mode() eq 'html') { print qq~ This protein identifier was not found in atlas build '$atlas_build_names{$atlas_build_id}'.
Please check selections and try again.

Or, search for the identifer across all major PeptideAtlas builds:   
~; ## looking for similar ids my $sql = qq~ SELECT SKE.RESOURCE_NAME, SKE.RESOURCE_TYPE, BS.BIOSEQUENCE_ID FROM $TBAT_SEARCH_KEY_ENTITY SKE JOIN $TBAT_BIOSEQUENCE BS ON (BS.BIOSEQUENCE_NAME = SKE.SEARCH_KEY_NAME) JOIN $TBAT_ATLAS_BUILD AB ON (AB.BIOSEQUENCE_SET_ID = BS.BIOSEQUENCE_SET_ID AND AB.ATLAS_BUILD_ID = $atlas_build_id) WHERE SKE.RESOURCE_NAME in ( ( SELECT SKE2.PROTEIN_ALIAS_MASTER FROM $TBAT_SEARCH_KEY_ENTITY SKE2 WHERE SKE2.RESOURCE_NAME like '$protein_name%' ) UNION ( SELECT SKE4.RESOURCE_NAME FROM $TBAT_SEARCH_KEY_ENTITY SKE4 WHERE SKE4.PROTEIN_ALIAS_MASTER like '$protein_name%' ) UNION ( SELECT SKE4.RESOURCE_NAME FROM $TBAT_SEARCH_KEY_ENTITY SKE4 WHERE SKE4.search_key_name like '$protein_name%' ) ) ~; my @rows = $sbeams->selectSeveralColumns ($sql); if (@rows){ my %related_names = (); my %ids; foreach my $row(@rows){ my ($name, $type, $biosequence_id) = @$row; $related_names{$name} = $type; $ids{$biosequence_id} =1; } my $biosequence_ids = join (",", keys %ids); $sql = qq~ SELECT DISTINCT B1.BIOSEQUENCE_NAME, B2.BIOSEQUENCE_NAME FROM $TBAT_BIOSEQUENCE_RELATIONSHIP BR LEFT JOIN $TBAT_BIOSEQUENCE B1 ON (B1.BIOSEQUENCE_ID = BR.RELATED_BIOSEQUENCE_ID) LEFT JOIN $TBAT_BIOSEQUENCE B2 ON (B2.BIOSEQUENCE_ID = BR.REFERENCE_BIOSEQUENCE_ID) WHERE BR.atlas_build_id = $atlas_build_id AND (BR.related_biosequence_id in ($biosequence_ids) OR BR.reference_biosequence_id in ($biosequence_ids)) AND RELATIONSHIP_TYPE_ID = 2 ~; @rows = $sbeams->selectSeveralColumns ($sql); my %biosequence_identifcal = (); foreach my $row(@rows){ my ($name1, $name2) = @$row; $biosequence_identifcal{$name1}{$name2} =1; foreach my $p (keys %{$biosequence_identifcal{$name2}}){ $biosequence_identifcal{$name1}{$p} =1; } $biosequence_identifcal{$name2}{$name1} = 1; foreach my $p (keys %{$biosequence_identifcal{$name1}}){ $biosequence_identifcal{$name2}{$p} =1; } } print "

Here are some related protein names in the same build. Proteins in the same row have identical sequences.

" if (keys %related_names); foreach my $name(sort {$a cmp $b} keys %related_names){ print "$name"; if (defined $biosequence_identifcal{$name}){ foreach my $name3(sort {$a cmp $b} keys %{$biosequence_identifcal{$name}}){ next if ($name3 eq $name); print ", $name3"; } } print "

"; } print "

"; }else{ $sbeams->reportException( state => 'ERROR', type => 'RECORD NOT FOUND', message => "The protein entered '$protein_name' was not found ". "in the atlas build '$atlas_build_names{$atlas_build_id}'.", ); } } else { $sbeams->reportException( state => 'ERROR', type => 'RECORD NOT FOUND', message => "The protein entered '$protein_name' was not found ". "in the atlas build '$atlas_build_names{$atlas_build_id}'.", ); } return; } #### If more than one protein was found, list them if ($n_biosequences > 1) { if ($sbeams->output_mode() eq 'html') { print qq~ Several ($n_biosequences) entries were returned from your request for '$protein_name' in the atlas build '$atlas_build_names{$atlas_build_id}'. Please select the appropriate one:

~; while (my ($key,$value) = each (%biosequences)) { print "$value

"; } } else { $sbeams->reportException( state => 'ERROR', type => 'TOO MANY RECORDS', message => qq~More than one ($n_biosequences) was returned from your request for '$protein_name' in the atlas build '$atlas_build_names{$atlas_build_id}'.~, ); } return; } #### Extract the one biosequence_id my ($biosequence_id) = keys(%biosequences); ## get proteins in the same cluster of this protein #protein_name my @proteins_in_cluster; my $proteins_in_cluster_str; my %bioseq_strain; my @bioseq_ids =(); my %pfam_domain =(); my $tree_plot = ''; my $newick_data = ''; my $cluster_id; my $build_path=''; if ( ($parameters->{restore} || $parameters->{bioseq_id}) && $apply_action !~ /QUERY/ ) { my $list_names = ''; if ($parameters->{bioseq_id}) { if ($parameters->{restore}){ $list_names = $sbeamsMOD->getBioseqNamesFromBioseqenceIDs (biosequence_ids => $parameters->{orig_bioseq_id}); }else{ $list_names = $sbeamsMOD->getBioseqNamesFromBioseqenceIDs (biosequence_ids => $parameters->{bioseq_id}); } } $list_names =~ s/\s+//g; @proteins_in_cluster = uniq split(",", $list_names); } ## get cluster info $build_path = $sbeamsMOD->getAtlasBuildDirectory( atlas_build_id => $atlas_build_id ); my $cluster_id_file = "$build_path/Orthogroups.txt"; open (IN, "<$cluster_id_file") or $sbeams->reportException( state => 'ERROR', type => 'FILE not found', message => "can't find file $build_path/Orthogroups.txt" ); while (my $line = ){ if ($line =~ /$protein_name/){ chomp $line; $line =~ /^(\S+):\s+(.*)$/; $cluster_id = $1; foreach my $p(split(/\s+/, $2)){ push @proteins_in_cluster, $p if ($apply_action =~ /QUERY/); } } } if (@proteins_in_cluster < 2){ $sbeams->reportException( state => 'ERROR', type => 'Too Few Sequence', message => "Needs more than one sequence in cluster.", ); return; } my $gene_tree_file = "$build_path/OrthoFinder/Resolved_Gene_Trees//$cluster_id"."_tree.txt"; if (open (IN,"<$gene_tree_file")){ $newick_data = ; close IN; $tree_plot = $sbeamsMOD->draw_tree($newick_data); } my $hmmscan_out_file ="$build_path/pfam.out"; ## # #WP_014540656.1 26 466 24 466 PF01425.21 Amidase Family 3 451 451 478.2 2.6e-143 1 No_clan if (open (IN,"<$hmmscan_out_file")){ my %proteins_in_cluster = map {$_ => 1} @proteins_in_cluster; my @colors = qw(#2dcf00 #ff5353 #5b5bff #ebd61d #ba21e0 #ff9c42 #ff7dff #b9264f #baba21 #c48484 #1f88a7 #cafeb8 #4a9586 #ceb86c #0e180f #2dcfff); my $color_counter = 0; my %used_color = (); #my %pfam_result = (); while (my $line = ){ next if ($line =~ /^#/); chomp $line; my ($name,$aliS,$aliE,$eS,$eE,$hmm_acc,$hmm_name,$type,$hS,$hE,$len,$bit,$e,$sig,$clan) = split(/\s+/, $line); next if (not defined $proteins_in_cluster{$name}); my %data =(); $data{type} = "pfama"; $data{text} = $hmm_name; if (defined $used_color{$hmm_name}){ $data{colour} = $used_color{$hmm_name}; }else{ $data{colour} = $colors[$color_counter]; $used_color{$hmm_name} = $colors[$color_counter]; $color_counter++; $color_counter = 0 if ($color_counter > $#colors); } $data{display} = 'true'; $data{start} = $eS; $data{end} = $eE; if ($hS > 1 ){ $data{startStyle} = 'jagged'; }else{ $data{startStyle} = 'curved'; } if ($hE < $len){ $data{endStyle} = 'jagged'; }else{ $data{endStyle} = 'curved'; } if ($type =~/^(repeat|modif)/i){ $data{endStyle} = 'straight' if ($data{endStyle} ne 'jagged'); $data{startStyle} = 'straight' if ($data{startStyle} ne 'jagged'); } $data{metadata}{'scoreName'} = 'e-value'; $data{metadata}{'score'}= $e; $data{metadata}{'bitscore'} =$bit; $data{metadata}{'accession'} = $hmm_acc; $data{metadata}{'type'} = $type; $data{metadata}{'database'} = 'pfam'; $data{metadata}{'aliEnd'} = $aliE; $data{metadata}{'aliStart'} = $aliS; $data{metadata}{'start'} = $eS; $data{metadata}{'end'} = $eE; $data{metadata}{'hmmstart'} = $hS; $data{metadata}{'hmmend'} = $hE; $data{metadata}{'hmmlen'} = $len; $data{metadata}{'description'} = $hmm_name; push @{$pfam_domain{$name}{regions}}, \%data; } } $proteins_in_cluster_str = join("','", @proteins_in_cluster); $sql = qq~ SELECT biosequence_ID,biosequence_name, BS.biosequence_desc, biosequence_seq FROM $TBAT_ATLAS_BUILD AB JOIN $TBAT_BIOSEQUENCE_SET BSS ON AB.biosequence_set_id = BSS.biosequence_set_id JOIN $TBAT_BIOSEQUENCE BS ON BSS.biosequence_set_id = BS.biosequence_set_id WHERE AB.atlas_build_id IN ( $atlas_build_id ) AND BS.biosequence_name in ('$proteins_in_cluster_str') ~; my @results = $sbeams->selectSeveralColumns($sql); my %tmp =(); my %unique_sequences =(); if ($parameters{redundancy_constraint} =~ /on/){ ## get strain info foreach my $row(@results){ my $sequence = $row->[3]; $sequence =~ s/\*+$//; if ($row->[2] =~ /strain\/isolate=(\S+)/){ $bioseq_strain{$row->[0]} = $1; }else{ $bioseq_strain{$row->[0]} = ''; } $unique_sequences{$sequence}{scalar(split(",", $bioseq_strain{$row->[0]}))}{$row->[1]}=$row->[0]; $tmp{$row->[0]} = $row->[1] if ($row->[1] eq $protein_name); } LOOP:foreach my $seq (keys %unique_sequences){ foreach my $n (sort {$b <=> $a} keys %{$unique_sequences{$seq}}){ my @ids = sort {$b cmp $a} keys %{$unique_sequences{$seq}{$n}}; $tmp{$unique_sequences{$seq}{$n}{$ids[0]}} = $ids[0]; next LOOP; } } @proteins_in_cluster = (); ## sort protein names foreach my $id (sort {$bioseq_strain{$a} cmp $bioseq_strain{$b}} keys %bioseq_strain){ push @proteins_in_cluster, $tmp{$id} if (defined $tmp{$id} || $tmp{$id} eq $protein_name); push @bioseq_ids, $id if (defined $tmp{$id} || $tmp{$id} eq $protein_name); } }else{ foreach my $row(@results){ $tmp{$row->[0]} = $row->[1]; my $sequence = $row->[3]; $sequence =~ s/\*+$//; $unique_sequences{$sequence} =1; if ($row->[2] =~ /strain\/isolate=(\S+)/){ $bioseq_strain{$row->[0]} = $1; }else{ $bioseq_strain{$row->[0]} = ''; } } @proteins_in_cluster = (); ## sort protein names foreach my $id (sort {$bioseq_strain{$a} cmp $bioseq_strain{$b}} keys %bioseq_strain){ push @proteins_in_cluster, $tmp{$id}; push @bioseq_ids, $id; } } %tmp = (); $proteins_in_cluster_str = join("','", @proteins_in_cluster); #print "cluster_id=$cluster_id proteins=".join(",", @proteins_in_cluster) ."
"; close IN; #### Return some information about these biosequences $sql = qq~ SELECT BS.biosequence_id, BS.biosequence_name,BS.biosequence_gene_name, BS.biosequence_accession, BS.biosequence_desc, BS.biosequence_seq,BSS.set_name, DBX.accessor,DBX.accessor_suffix, BAG.annotated_gene_id, BPS.transmembrane_class, BPS.transmembrane_topology, BPS.has_signal_peptide, BPS.has_signal_peptide_probability, BPS.signal_peptide_length, BPS.signal_peptide_is_cleaved, BPS.published_ng_per_ml, BPS.pub_ng_ml_source, PID.probability, PID.confidence, PID.n_observations, PPL.level_name, BSRT.relationship_name, BS_REP.biosequence_name as "group_rep", BS_REF.biosequence_name as "reference_biosequence_name", BS_REF_REP.biosequence_name as "ref_group_rep", BS_REF_REF_REP.biosequence_name as "ref_ref_group_rep", BPS.tm_sigp_source, LEN(CONVERT(varchar(MAX), BS.biosequence_seq)) as "length" FROM $TBAT_ATLAS_BUILD AB JOIN $TBAT_BIOSEQUENCE_SET BSS ON (AB.biosequence_set_id = BSS.biosequence_set_id AND AB.atlas_build_id = $atlas_build_id) INNER JOIN $TBAT_BIOSEQUENCE BS ON ( BSS.biosequence_set_id = BS.biosequence_set_id ) LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) LEFT JOIN $TBAT_BIOSEQUENCE_ANNOTATED_GENE BAG ON ( BAG.biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE_PROPERTY_SET BPS ON ( BS.biosequence_id = BPS.biosequence_id ) LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID ON ( PID.biosequence_id = BS.biosequence_id AND PID.atlas_build_id = $atlas_build_id) LEFT JOIN $TBAT_BIOSEQUENCE BS_REP ON ( BS_REP.biosequence_id = PID.represented_by_biosequence_id ) LEFT JOIN $TBAT_PROTEIN_PRESENCE_LEVEL PPL ON ( PPL.protein_presence_level_id = PID.presence_level_id ) LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP BSR ON ( BSR.related_biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP_TYPE BSRT ON ( BSRT.biosequence_relationship_type_id = BSR.relationship_type_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REF ON ( BS_REF.biosequence_id = BSR.reference_biosequence_id ) LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID_REF ON ( PID_REF.biosequence_id = BS_REF.biosequence_id AND PID_REF.atlas_build_id = $atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REP ON ( BS_REF_REP.biosequence_id = PID_REF.represented_by_biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP BSR_REF ON ( BSR_REF.related_biosequence_id = BS_REF.biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REF ON ( BS_REF_REF.biosequence_id = BSR_REF.reference_biosequence_id ) LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID_REF_REF ON ( PID_REF_REF.biosequence_id = BS_REF_REF.biosequence_id ) AND ( PID_REF_REF.atlas_build_id = $atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REF_REP ON ( BS_REF_REF_REP.biosequence_id = PID_REF_REF.represented_by_biosequence_id ) WHERE BS.biosequence_name in ('$proteins_in_cluster_str') ~; my @rows = $sbeams->selectHashArray($sql); my $n_rows = scalar @rows; ## order result set my %result =(); foreach my $biosequence(@rows){ $result{$biosequence->{biosequence_name}} = $biosequence; } my @rows =(); foreach my $biosequence_name (@proteins_in_cluster){ push @rows, $result{$biosequence_name}; } my $biosequences = \@rows; ############################################################################# #### Print out a summary of the proteins ############################################################################# my %biosequence_id_info = (); my $htmlcontent = ''; my $tr = 'class="hoverable"'; if ($sbeams->output_mode() eq 'html') { $htmlcontent = qq~ ~; } foreach my $biosequence (@rows){ #### Build the full identification status my $biosequence_name = $biosequence->{biosequence_name}; my $presence_level = $biosequence->{level_name}; my $redundancy_rel = $biosequence->{relationship_name}; my $identification_status = ''; my %prepositions = ( subsumed=>"by", "ntt-subsumed"=>"by", identical=>"to", indistinguishable=>"from" ); next if (defined $biosequence_id_info{$biosequence_name}); if (defined $presence_level) { $identification_status = $presence_level; } elsif (defined $redundancy_rel) { $identification_status = $redundancy_rel; } if ($prepositions{$identification_status}) { $identification_status .= " $prepositions{$identification_status} $biosequence->{reference_biosequence_name}"; } $biosequence_id_info{$biosequence_name}{'biosequence_id'} = $biosequence->{biosequence_id}; #### Display a large title, status, and status description $biosequence_id_info{$biosequence_name}{'Identification Status'} = $identification_status; if ($sbeams->output_mode() eq 'html') { $htmlcontent .= $sbeamsMOD->encodeSectionItem( key=>"$biosequence_name", tr_info => $tr, value=>$biosequence->{biosequence_desc} ); } $biosequence_id_info{$biosequence_name}{biosequence_gene_name} = $biosequence->{biosequence_gene_name}; $biosequence_id_info{$biosequence_name}{'Sequence Length'} = $biosequence->{length}; my $group_rep = ''; # if this bioseq has a presence_level ... if (defined $biosequence->{group_rep}) { $biosequence_id_info{$biosequence_name}{'Protein Group'} = $biosequence->{group_rep}; # if this bioseq is ident/indist from a bioseq with a presence level ... } elsif ( defined $biosequence->{ref_group_rep}) { $biosequence_id_info{$biosequence_name}{'Protein Group'} = $biosequence->{ref_group_rep}; # if this bioseq is ident/indist from an indist bioseq ... } elsif ( defined $biosequence->{ref_ref_group_rep}) { $biosequence_id_info{$biosequence_name}{'Protein Group'} = $biosequence->{ref_ref_group_rep}; } $biosequence_id_info{$biosequence_name}{'Protein Group'} = "' . $biosequence_id_info{$biosequence_name}{'Protein Group'} . ''; if ( defined $biosequence->{estimated_ng_per_ml}) { my $conc_string = sprintf("%0.1f ng/ml (%s uncertainty)", $biosequence->{estimated_ng_per_ml}, $biosequence->{abundance_uncertainty}); $biosequence_id_info{$biosequence_name}{'Estimated concentration'} = $conc_string; } if ( defined $biosequence->{published_ng_per_ml}) { my $conc_string = sprintf("%0.1f ng/ml (%s)", $biosequence->{published_ng_per_ml}, $biosequence->{pub_ng_ml_source}); $biosequence_id_info{$biosequence_name}{'Published concentration'} = $conc_string; } # Check to see if there is ortholog information # my $orthologs = get_ortholog_information( $biosequence ); # if ( $orthologs ) { # $biosequence_id_info{$biosequence_name}{'Ortholog Group'} = $orthologs; # }else{ # $biosequence_id_info{$biosequence_name}{'Ortholog Group'} = ''; # } } ############################################################################# #### Continue with main query #### Define the desired columns in the query #### [friendly name used in url_cols,SQL,displayed column title] my $exp_cat_str = "'". join("','", split(",", $parameters{exp_cat})) ."'"; my $group_by_clause = ''; my $sample_category_clause = ''; my @labels = ( 'Accession', 'Start', 'Pre AA','Sequence', 'Fol AA', 'ESS', 'NET', 'NMC' ,'Best Prob', 'Best Adj Prob', 'N Obs', 'EOS', 'SSRT', 'N Prot Map', 'N Gen Loc', 'N Experiments', 'Subpep of', 'Isolate Map' ); my @column_array = ( ["peptide_accession","P.peptide_accession","Accession"], ["start_in_biosequence", "PM.start_in_biosequence", "Start"], ["preceding_residue","PI.preceding_residue","Pre AA"], ["peptide_sequence","P.peptide_sequence","Sequence"], ["following_residue","PI.following_residue","Fol AA"], ["suitability_score","0.0001","ESS"], ["highest_n_enzymatic_termini","''","NET"], ["lowest_n_missed_cleavages","''","NMC"], ["best_probability","STR(PI.best_probability,7,3)","Best Prob"], ["best_adjusted_probability","STR(PI.best_adjusted_probability,7,3)","Best Adj Prob"], ); my $group_by_clause .= "GROUP BY P.peptide_accession, PM.start_in_biosequence, PI.preceding_residue,P.peptide_sequence,PI.following_residue,PI.best_probability,PI.best_adjusted_probability,"; push @column_array, (["n_observations","PI.n_observations","N Obs"]); $group_by_clause .= "PI.n_observations,"; push @column_array, ( ["empirical_proteotypic_score","STR(PI.empirical_proteotypic_score,7,2)","EOS"], ["SSRCalc_relative_hydrophobicity","STR(P.SSRCalc_relative_hydrophobicity,7,2)","SSRT"], ["n_protein_mappings","PI.n_protein_mappings","N Prot Map"], ["n_genome_locations","PI.n_genome_locations","N Gen Loc"], ["sample_ids","PISB.atlas_search_batch_id","N Experiments"], ["is_subpeptide_of","PI.is_subpeptide_of",'Subpep of'], ["peptide_instance_id","PI.peptide_instance_id","peptide_instance_id"], ["isolate_map","''","Isolate Map"], ["biosequence_id","PM.matched_biosequence_id", "biosequence_id"], ); $group_by_clause .= "PI.empirical_proteotypic_score,P.SSRCalc_relative_hydrophobicity,PI.n_protein_mappings,PI.n_genome_locations,PI.is_subpeptide_of,PI.peptide_instance_id, PISB.atlas_search_batch_id, PI.is_subpeptide_of, PM.matched_biosequence_id"; if ($parameters{exp_cat}){ $sample_category_clause = "AND SC.name in ($exp_cat_str)"; } #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); my $peptide_instance_clause=''; my $biosequence_id_str = join(",", @bioseq_ids); #### Define a query to return peptides for this protein $sql = qq~ SELECT DISTINCT $columns_clause FROM $TBAT_PEPTIDE_INSTANCE PI JOIN $TBAT_PEPTIDE_INSTANCE_SEARCH_BATCH PISB ON (PISB.peptide_instance_id = PI.peptide_instance_id) JOIN $TBAT_ATLAS_BUILD_SEARCH_BATCH ASB ON (ASB.ATLAS_SEARCH_BATCH_ID = PISB.ATLAS_SEARCH_BATCH_ID) JOIN $TBAT_SAMPLE S ON (S.SAMPLE_ID = ASB.SAMPLE_ID) JOIN $TBAT_SAMPLE_CATEGORY SC ON (SC.id = S.sample_category_id) INNER JOIN $TBAT_PEPTIDE P ON ( PI.peptide_id = P.peptide_id ) LEFT JOIN $TBAT_PEPTIDE_MAPPING PM ON ( PI.peptide_instance_id = PM.peptide_instance_id ) INNER JOIN $TBAT_ATLAS_BUILD AB ON ( PI.atlas_build_id = AB.atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) LEFT JOIN $TB_ORGANISM O ON ( BSS.organism_id = O.organism_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS ON ( PM.matched_biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) LEFT JOIN $TBAT_PEPTIDE_INSTANCE_ANNOTATION PIA ON PIA.peptide_instance_id = PI.peptide_instance_id AND PIA.peptide_instance_id = PI.peptide_instance_id LEFT JOIN $TBAT_SPECTRUM_ANNOTATION_LEVEL SAL ON SAL.spectrum_annotation_level_id = PIA.spectrum_annotation_level_id AND PIA.record_status != 'D' WHERE 1 = 1 AND AB.atlas_build_id IN ( $atlas_build_id ) AND BS.biosequence_id in ($biosequence_id_str) $sample_category_clause $group_by_clause ORDER BY P.peptide_accession ~; #### Define the hypertext links for columns that need them %url_cols = ( 'Accession' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&searchForThis=%V&action=QUERY", ); %hidden_cols = ( 'best_adjusted_probability' => 1, 'peptide_instance_id' => 1, 'biosequence_id' => 1 ); ######################################################################### #### If QUERY or VIEWRESULTSET was selected, display the data if ($apply_action =~ /(QUERY|GO|VIEWRESULTSET)/) { #### Show the SQL that will be or was executed $sbeams->display_sql(sql=>$sql) if ($show_sql); #### If the action contained QUERY, then fetch the results from #### the database if ($apply_action =~ /(QUERY|GO)/i) { #### Fetch the results from the database server $sbeams->fetchResultSet( sql_query=>$sql, resultset_ref=>$resultset_ref, ); #### Post process the resultset postProcessResultset( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, atlas_build_id => $atlas_build_id, column_titles_ref=>\@column_titles, bioseq_strain => \%bioseq_strain, ); #### Store the resultset and parameters to disk resultset cache $rs_params{set_name} = "SETME"; $sbeams->writeResultSet( resultset_file_ref=>\$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME", ); } #### Get protein structure information my ($obs, $peptides, $pep_nobs, $seq2instance, $pep_info); my %peptide_list =(); foreach my $biosequence(@$biosequences){ ( $obs, $peptides, $pep_nobs, $seq2instance, $pep_info ) = getPeptideCount( biosequence => $biosequence, resultset_ref => $resultset_ref ); my $cnt = scalar(uniq @$peptides ); $peptide_list{$biosequence} = $peptides; $biosequence_id_info{$biosequence->{biosequence_name}}{'Distinct Peptides'} = $cnt; $biosequence_id_info{$biosequence->{biosequence_name}}{'Total Observations'} = $obs; } ### biosequence_id_info my @data =(); my @headings = ( 'Sequence Length', 'biosequence_gene_name', 'Identification Status', 'Protein Group', #'Ortholog Group', 'Distinct Peptides', 'Total Observations' ); foreach my $biosequence_name(@proteins_in_cluster){ my @record; push @record, $bioseq_strain{$biosequence_id_info{$biosequence_name}{'biosequence_id'}}; push @record, "$biosequence_name"; foreach my $col (@headings){ push @record , $biosequence_id_info{$biosequence_name}{$col} || ''; } push @data , \@record; } if ($htmlmode){ @headings = ('Strain/Isolate', 'Protein Name', @headings); $htmlcontent .= "
\n"; print scalar $sbeams->make_toggle_section( neutraltext => "$protein_name Ortholog Group", sticky => 1, name => 'getprotein_overview_div', barlink => 1, visible => 1, content => $htmlcontent ); my $html = $sbeamsMOD->create_table (data => \@data, column_names=> \@headings, table_name => "Protein Identification Summary", table_id => "protein_info", align => [qw(left center center center center center center)], sortable => 1); print $html; } ### isolate_alignment my $isolate_alignment = $sbeamsMOD->get_alignment_display(atlas_build_id => $atlas_build_id, bioseq_id => join(",", @bioseq_ids), protein_list => $parameters->{protein_list} || '', orig_bioseq_id => $parameters->{orig_bioseq_id} || '', bioseq_strain => \%bioseq_strain, sample_category_contraint => $exp_cat_str, ); if ($htmlmode){ print scalar $sbeams->make_toggle_section( neutraltext => "Peptide Mapping and Coverage", sticky => 1, name => 'gene_tree_div', barlink => 1, visible => 1, content => $isolate_alignment ); } if (scalar keys %unique_sequences > 1){ if ($htmlmode){ print scalar $sbeams->make_toggle_section( neutraltext => "Gene Tree", sticky => 1, name => 'gene_tree_div', barlink => 1, visible => 1, content => $tree_plot ); } if (%pfam_domain){ my $domain_plot = ''; if($htmlmode){ my @ps = (); foreach my $p(@proteins_in_cluster){ if (defined $pfam_domain{$p}){ push @ps, $p; } } foreach my $biosequence_name(keys %pfam_domain){ my $length = $biosequence_id_info{$biosequence_name}{'Sequence Length'}; $pfam_domain{$biosequence_name}{length} = $length; } my $domain_plot = $sbeamsMOD->get_pfam_domain_display(data => \%pfam_domain, list => \@ps); @headings = ('protein name', 'alignment start','alignment end','envelope start','envelope end', 'hmm acc','hmm name','domain type','hmm start','hmm end','hmm length', 'bit score','e value'); @data = (); foreach my $prot (@proteins_in_cluster){ if (defined $pfam_domain{$prot}){ foreach my $regions(@{$pfam_domain{$prot}{regions}}){ my @record = ($prot); foreach my $col (qw(aliStart aliEnd start end accession description type hmmstart hmmend hmmlen bitscore score)){ push @record, $regions->{metadata}{$col}; } push @data, \@record; } } } print $sbeamsMOD->create_table (data => \@data, column_names=> \@headings, table_name => "Pfam Domain", table_id => "pfam_domain", align => [qw(left center center center center center center)], sortable => 1, plot_html => $domain_plot); } } } ############################################################################# #### Observed peptides ############################################################################# my $obs_help = $sbeamsMOD->get_table_help(column_titles_ref=>\@labels); #### Since we don't display the controls, make the default page size #### huge, otherwise the user may not see all peptides $rs_params{page_size} = 500; #### get correct flanking AA info $sql = qq~ SELECT DISTINCT P.peptide_accession AS "peptide_accession", PM.start_in_biosequence AS "start", COALESCE(PM.peptide_preceding_residue,PI.preceding_residue) As "preceding_residue", P.peptide_sequence AS "peptide_sequence", COALESCE(PM.peptide_following_residue,PI.following_residue) AS "following_residue", PM.highest_n_enzymatic_termini, PM.lowest_n_missed_cleavages FROM $TBAT_PEPTIDE_INSTANCE PI INNER JOIN $TBAT_PEPTIDE P ON ( PI.PEPTIDE_ID = P.PEPTIDE_ID ) LEFT JOIN $TBAT_PEPTIDE_MAPPING PM ON ( PI.PEPTIDE_INSTANCE_ID = PM.PEPTIDE_INSTANCE_ID ) INNER JOIN $TBAT_ATLAS_BUILD AB ON ( PI.ATLAS_BUILD_ID = AB.ATLAS_BUILD_ID ) LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.BIOSEQUENCE_SET_ID = BSS.BIOSEQUENCE_SET_ID ) LEFT JOIN $TBAT_BIOSEQUENCE BS ON ( PM.MATCHED_BIOSEQUENCE_ID = BS.BIOSEQUENCE_ID ) WHERE 1 = 1 AND AB.atlas_build_id IN ( $atlas_build_id ) AND BS.biosequence_id in ($biosequence_id_str) ORDER BY P.PEPTIDE_ACCESSION ~; my @rows = $sbeams->selectSeveralColumns($sql); my %peptide_info = (); foreach my $row(@rows){ my ($peptide_accession,$start,$preceding_residue,$peptide_sequence,$following_residue, $highest_n_enzymatic_termini,$lowest_n_missed_cleavages) = @$row; push @{$peptide_info{$peptide_accession}{preceding_residue}}, $preceding_residue; push @{$peptide_info{$peptide_accession}{following_residue}}, $following_residue; $peptide_info{$peptide_accession}{highest_n_enzymatic_termini} = $highest_n_enzymatic_termini; $peptide_info{$peptide_accession}{lowest_n_missed_cleavages} = $lowest_n_missed_cleavages; push @{$peptide_info{$peptide_accession}{start}}, $start; } my $peptide_counts = scalar keys %peptide_info; print scalar $sbeams->make_toggle_section( neutraltext => "Distinct Observed Peptides ($peptide_counts)", sticky => 1, name => 'getprotein_observedlist_div', barlink => 1, opendiv => 1, visible => 1, content => $obs_help ) if $htmlmode; #### Calculate the best peptides to use my $best_peptide_information = $bestPeptideSelector->getBestPeptides( atlas_build_id => $atlas_build_id, biosequence_id => $biosequence_id, query_parameters_ref=>\%parameters, resultset_ref => $resultset_ref, column_titles_ref=>\@column_titles, is_trypsin_build => $is_trypsin_build, peptide_info => \%peptide_info, no_escape => 1 ); # $log->debug( Dumper( $best_peptide_information ) ); # $log->debug( Dumper( $resultset_ref ) ); my %instance_ids; my $max_peptides = 40; my $pep_cnt; for my $row ( sort { $b->[9] <=> $a->[9] } @{$resultset_ref->{data_ref}} ) { $pep_cnt++; last if $pep_cnt >= $max_peptides; $instance_ids{$row->[$resultset_ref->{column_hash_ref}->{peptide_instance_id}]}++; } my @pa_accessions; my @sample_list; my $seq_idx = $resultset_ref->{column_hash_ref}->{peptide_sequence}; my $pre_idx = $resultset_ref->{column_hash_ref}->{preceding_residue}; my $fol_idx = $resultset_ref->{column_hash_ref}->{following_residue}; my $ntt_idx = $resultset_ref->{column_hash_ref}->{highest_n_enzymatic_termini}; my $mc_idx = $resultset_ref->{column_hash_ref}->{lowest_n_missed_cleavages}; my $ngl_idx = $resultset_ref->{column_hash_ref}->{n_genome_locations}; my $ess_idx = $resultset_ref->{column_hash_ref}->{suitability_score}; for my $row ( @{$resultset_ref->{data_ref}} ) { push @pa_accessions, $row->[0]; $row->[$pre_idx] = join(",", uniq (split(",", $row->[$pre_idx]))); $row->[$fol_idx] = join(",", uniq (split(",", $row->[$fol_idx]))); my $ntt = 0; if ( defined $row->[$ntt_idx] && $row->[$ntt_idx] ne '') { $ntt = $row->[$ntt_idx]; } else { if ( $row->[$pre_idx] =~ /[-RK]/ ) { $ntt++; } if ( $row->[$fol_idx] =~ /[RK]$/ || $row->[$fol_idx] eq '-' ) { $ntt++; } } $row->[$ntt_idx] = $ntt if !defined $row->[$ntt_idx]; my $mc = 0; if ( defined $row->[$mc_idx] && $row->[$mc_idx] ne '') { $mc = $row->[$mc_idx]; } elsif ( $row->[$seq_idx] =~ /[RK][^P]/ ) { $mc++; } $row->[$mc_idx] = $mc if !defined $row->[$mc_idx]; my $mgl = 0; if ( $row->[$ngl_idx] > 1 ) { $mgl++; } push @sample_list, [ $row->[0], $row->[$resultset_ref->{column_hash_ref}{'sample_ids'}] ]; for my $idx ($resultset_ref->{column_hash_ref}{'sample_ids'}, $resultset_ref->{column_hash_ref}{'is_subpeptide_of'} ) { $row->[$idx] =~ s/\s//g; my @samples = split( /,/, $row->[$idx] ); if ( $idx == $resultset_ref->{column_hash_ref}{'sample_ids'} ) { my $cnt = scalar( @samples ); $row->[$idx] = ( $htmlmode ) ? " $cnt " : $cnt; } else { $row->[$idx] = scalar( @samples ); } } $row->[0] = ( $htmlmode ) ? "$row->[0] " :$row->[0]; my @errs; push @errs, 'mc' if $mc; push @errs, 'et'.$ntt if $ntt < 2; push @errs, 'mgl' if $mgl; my $annot = ''; if ( @errs) { $annot = '[' . join( ',', @errs ) . ']'; } if ($sbeams->output_mode() eq 'html') { $annot = "$annot"; } $row->[$ess_idx] = sprintf( "%0.2f", $row->[$ess_idx] ) . " $annot"; my $cnt = 0; } # Must... continue... hacking!!! $resultset_ref->{types_list_ref}->[4] = 'varchar'; # $resultset_ref->{precision_list_ref}->[4] = 36; my @row_color_list = ("#f3f1e4","#d3d1c4"); my $distinct_obs_peptide_display = $sbeamsMOD->get_individual_spectra_display( column_titles_ref=>\@column_titles, colnameidx_ref => \%colnameidx, resultset_ref=> $resultset_ref, hidden_cols_ref=>\%hidden_cols ); print "$distinct_obs_peptide_display" if $htmlmode; print "" if $htmlmode; ## close section! ############################################################################# #### Display synthetic peptide info ############################################################################# unless ( $sbeams->isGuestUser() ) { #### Display the best peptide information my $dirty_peptide_display = $bestPeptideSelector->get_dirty_peptide_display( atlas_build_id => $current_page->{atlas_build_id}, base_url=>$base_url, biosequence_id => $biosequence_id, tr_info => '', ); if ( $htmlmode && $dirty_peptide_display ) { print scalar $sbeams->make_toggle_section( neutraltext => 'Synthetic peptide info', sticky => 1, name => 'getprotein_dirty_peptide_div', #ToDo?# tooltip => 'Show/Hide Section', barlink => 1, visible => 1, content => $dirty_peptide_display); } } ############################################################################# #### Display the peptide map and samples list ############################################################################# my $sample_ids = getSampleList( sample_list => \@sample_list ); if ($htmlmode) { my $sampleMap = $sbeamsMOD->getSampleMapDisplay( sample_ids => $sample_ids, instance_ids => \%instance_ids, atlas_build_id => $atlas_build_id, sample_category_contraint => $exp_cat_str, no_header => 1, sample_category => 1, header_text => "Per-sample source expression for $max_peptides most highly observed peptides", second_header => "(Peptides listed by accession, * denotes single genome mapping)" ); print scalar $sbeams->make_toggle_section( neutraltext => 'Experiment Peptide Map', sticky => 1, name => 'getprotein_samplemap_div', #ToDo?# tooltip => 'Show/Hide Section', barlink => 1, visible => 1, content => "$sampleMap
" ); my $sampleDisplay = $sbeamsMOD->getProteinSampleDisplay( sample_ids => $sample_ids, no_header => 1, rows_to_show => 25, max_rows => 500, sample_category => 1, ); print ''; print scalar $sbeams->make_toggle_section( neutraltext => 'Observed in Experiments', sticky => 1, name => 'getprotein_samplelist_div', #ToDo?# tooltip => 'Show/Hide Section', barlink => 1, visible => 1, content => "$sampleDisplay
" ); print " ~; } #### If QUERY was not selected, then tell the user to enter some parameters } else { if ($sbeams->invocation_mode() eq 'http') { print "

Select parameters above and press QUERY

\n"; } else { print "You need to supply some parameters to constrain the query\n"; } } $sbeamsMOD->display_page_footer(); } # end handle_request sub get_ortholog_information { my $biosequence = shift; return unless $biosequence->{biosequence_name}; my $sql =<<" END"; SELECT ortholog_group FROM $TBBL_ORTHOLOG WHERE entry_accession = '$biosequence->{biosequence_name}' END my $link = ''; my $sth = $sbeams->get_statement_handle( $sql ); while ( my @row = $sth->fetchrow_array() ) { $link .= qq~  $row[0]\n~; } return $link if $link; $sql =<<" END"; SELECT ortholog_group FROM $TBBL_ORTHOLOG O JOIN $TBAT_SEARCH_KEY SK ON SK.search_key_name = O.entry_accession JOIN $TBAT_BIOSEQUENCE B ON B.biosequence_name = SK.resource_name WHERE B.biosequence_name = '$biosequence->{biosequence_name}' END my $sth = $sbeams->get_statement_handle( $sql ); while ( my @row = $sth->fetchrow_array() ) { $link .= qq~  $row[0]\n~; } return $link; } sub getPeptideCount { my %args = @_; my $SUB_NAME = $sbeams->get_subname(); #### Decode the argument list my $resultset_ref = $args{'resultset_ref'} || die "ERROR[$SUB_NAME]: resultset_ref not passed"; my $biosequence = $args{'biosequence'} || die "ERROR[$SUB_NAME]: biosequence not passed"; my $line_length = $args{'line_length'} || 70; my $word_length = $args{'word_length'} || 10; my $enzyme = $args{'enzyme'} || ''; my $total_observations = $args{'total_observations'}; my $biosequence_id = $biosequence->{biosequence_id}; # removed DSC 2008-03 - let caller decide if it should be displayed #### Don't display unless HTML # return unless ($sbeams->output_mode() eq 'html'); #### Get the hash of indices of the columns my %col = %{$resultset_ref->{column_hash_ref}}; #### Loop over all the peptides my $data_ref = $resultset_ref->{data_ref}; my @peptides = (); my %peptides; my %seq2instance; my %map_info; my %peptide_mapping; foreach my $row (@{$data_ref}) { my @biosequence_ids = split(",", $row->[$col{biosequence_id}]); my %biosequence_ids = map {$_ => 1} @biosequence_ids; $peptides{$row->[$col{peptide_sequence}]} = $row->[$col{n_observations}]; $seq2instance{$row->[$col{peptide_sequence}]} = $row->[$col{peptide_instance_id}]; $map_info{$row->[$col{peptide_sequence}]} = { acc => $row->[$col{peptide_accession}], nobs => $row->[$col{n_observations}], inst => $row->[$col{peptide_instance_id}], n_gen => $row->[$col{n_genome_locations}] }; next if(not defined $biosequence_ids{$biosequence_id}); push(@peptides,$row->[$col{peptide_sequence}]); if (not defined $peptide_mapping{$row->[$col{biosequence_id}]}{$row->[$col{peptide_sequence}]} ){ $total_observations += $row->[$col{n_observations}]; } $peptide_mapping{$row->[$col{biosequence_id}]}{$row->[$col{peptide_sequence}]} =1; } return( $total_observations, \@peptides, \%peptides, \%seq2instance, \%map_info ); # return( $total_observations, \@peptides ); } sub getSampleList { my %args = @_; #### Decode the argument list return [] unless $args{sample_list}; # each row has an accession, sample_string pair. my %observed_samples; foreach my $row (@{$args{sample_list}}) { my $observed_sample_list = $row->[1]; my @all = split(/[,;]/,$observed_sample_list); foreach my $element ( @all ) { $observed_samples{$element}++; } } my @keys = sort { "\L$a" cmp "\L$b" } ( keys( %observed_samples ) ); return \@keys; } ############################################################################### ## postProcessResultset ## ## ################################################################################ sub postProcessResultset { my %args = @_; #### Process the arguments list my $resultset_ref = $args{'resultset_ref'}; my $rs_params_ref = $args{'rs_params_ref'}; my $query_parameters_ref = $args{'query_parameters_ref'}; my $column_titles_ref = $args{'column_titles_ref'}; my $atlas_build_id = $args{'atlas_build_id'}; my $bioseq_strain = $args{'bioseq_strain'}; my %rs_params = %{$rs_params_ref}; my %parameters = %{$query_parameters_ref}; my $n_rows = scalar(@{$resultset_ref->{data_ref}}); my $cols = $resultset_ref->{column_hash_ref}; my @data =(); my %peptide_samples =(); my %peptide_orgs = (); my %peptide_ids =(); my $sql = qq~ SELECT ASB.atlas_search_batch_id, S.SAMPLE_ID FROM $TBAT_SAMPLE S JOIN $TBAT_ATLAS_SEARCH_BATCH ASB ON (S.SAMPLE_ID = ASB.SAMPLE_ID) JOIN $TBAT_ATLAS_BUILD_SEARCH_BATCH ABSB ON (ABSB.atlas_search_batch_id = ASB.atlas_search_batch_id) WHERE ABSB.ATLAS_BUILD_ID = $atlas_build_id ~; my %asb2sample_id=$sbeams->selectTwoColumnHash($sql); #$resultset_ref->{data_ref}->[$i]->[$cols->{sample_ids}] = join(",", keys %{$pi_samples->{$peptide_accession}}); for (my $i=0; $i<$n_rows; $i++) { my $peptide_accession = $resultset_ref->{data_ref}->[$i]->[$cols->{peptide_accession}]; my $asb_id = $resultset_ref->{data_ref}->[$i]->[$cols->{sample_ids}]; my $biosequence_id = $resultset_ref->{data_ref}->[$i]->[$cols->{'biosequence_id'}]; $peptide_samples{$peptide_accession}{$asb2sample_id{$asb_id}} =1; $peptide_orgs{$peptide_accession}{$bioseq_strain->{$biosequence_id}} =1; $peptide_ids{$peptide_accession}{$biosequence_id} =1; } my %processed = (); for (my $i=0; $i<$n_rows; $i++) { my $peptide_accession = $resultset_ref->{data_ref}->[$i]->[$cols->{peptide_accession}]; $resultset_ref->{data_ref}->[$i]->[$cols->{sample_ids}] = join(",", sort {$a <=> $b} keys %{$peptide_samples{$peptide_accession}}); $resultset_ref->{data_ref}->[$i]->[$cols->{isolate_map}] = join(",", sort {$a cmp $b} keys %{$peptide_orgs{$peptide_accession}}); $resultset_ref->{data_ref}->[$i]->[$cols->{biosequence_id}]= join(",", sort {$a cmp $b} keys %{$peptide_ids{$peptide_accession}}); my $values = join(",", @{$resultset_ref->{data_ref}->[$i]}); if (! $processed{$values}){ #print join(",", @{$resultset_ref->{data_ref}->[$i]})."
";; push @data, $resultset_ref->{data_ref}->[$i]; $processed{$values} = 1; } } $resultset_ref->{data_ref} = \@data; }