#!/usr/local/bin/perl ############################################################################### # Program : GetVariantEvidence # $Id: GetProtein 7447 2013-10-01 23:17:07Z edeutsch $ # # Description : Prints summary of ... # # # SBEAMS is Copyright (C) 2000-2014 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; $|++; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::TabMenu; $sbeams = new SBEAMS::Connection; my $htmlmode; use SBEAMS::BioLink; use SBEAMS::BioLink::Tables; my $biolink = SBEAMS::BioLink->new(); $biolink->setSBEAMS($sbeams); use SBEAMS::BioLink; my $biolink = new SBEAMS::BioLink; use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; use SBEAMS::PeptideAtlas::ProtInfo; use SBEAMS::PeptideAtlas::GetOrigeneCov; $sbeamsMOD = new SBEAMS::PeptideAtlas; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); my $origenecov = new SBEAMS::PeptideAtlas::GetOrigeneCov; use SBEAMS::PeptideAtlas::KeySearch; my $keySearch = new SBEAMS::PeptideAtlas::KeySearch; $keySearch->setSBEAMS($sbeams); use SBEAMS::PeptideAtlas::BestPeptideSelector; my $bestPeptideSelector = new SBEAMS::PeptideAtlas::BestPeptideSelector; $bestPeptideSelector->setSBEAMS($sbeams); $bestPeptideSelector->setAtlas($sbeamsMOD); # Global sequence coverage array, will be populated post-graphic my @coverage; my $max_intensity = 1; my %coverage=(); my @methods = qw(ITCID FTCID ITETD FTETD HCD); # Swiss Prot annotations my @sp_rows; my $sp_rationale; my $current_page = { organism => '', atlas_build_id => '' }; use constant MIN_OBS_LENGTH => 6; use constant MAX_OBS_LENGTH => 40; ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin', 'PeptideAtlas_readonly', 'PeptideAtlas_exec'], #connect_read_only=>1, allow_anonymous_access=>1, )); #### Read in the default input parameters my $n_params_found = $sbeams->parse_input_parameters( q=>$q, parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); my $curr_bid = $sbeamsMOD->getCurrentAtlasBuildID( parameters_ref => \%parameters ); my $msg = $sbeams->update_PA_table_variables($curr_bid); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); # Cache current output mode $htmlmode = ( $sbeams->output_mode() eq 'html' ) ? 1 : 0; #### Decide what action to take based on information so far if ($parameters{action} eq "???") { # Some action } else { my $project_id = $sbeamsMOD->getProjectID( atlas_build_id => $parameters{atlas_build_id} ); # TMF processDatabaseActions(); $sbeamsMOD->display_page_header(project_id => $project_id, init_tooltip => 1); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer(); } } # end main ############################################################################### # Process insert/update/delete, if any, then redirect # TMF: copied this sub from ShowObservedSpectrum, then made a few # changes ############################################################################### sub processDatabaseActions { my $redirect = 0; my %parameters; $sbeams->parse_input_parameters(q=>$q,parameters_ref=>\%parameters); $sbeams->processStandardParameters(parameters_ref=>\%parameters); my $apply_action = $q->param('apply_action'); if ($apply_action eq "UPDATE ANNOTATION") { my %rowdata = ( spectrum_annotation_level_id => $parameters{user_spectrum_annotation}, comment => $parameters{user_spectrum_commment} ); my $PK = $sbeams->updateOrInsertRow( update => 1, # TMF table_name => $TBAT_PROTEIN_IDENTIFICATION_ANNOTATION, rowdata_ref => \%rowdata, # TMF next 2 lines PK => 'protein_identification_annotation_id', PK_value => $parameters{protein_identification_annotation_id}, return_PK => 1, add_audit_parameters => 1 ); if ($PK) { $sbeams->set_page_message( type => 'Info', msg => "Your annotation record has been updated." ); } else { $sbeams->set_page_message( type => 'Error', msg => "ERROR: There was a problem updating your annotation record." ); } $redirect++; } elsif ($apply_action eq "ADD ANNOTATION") { my $PK; my $error_reason= ''; if ( ! $parameters{protein_identification_id} ) { $error_reason .= "Need protein_identification_id for ADD ANNOTATION."; } else { my %rowdata = ( annotator_contact_id => $sbeams->getCurrent_contact_id(), # TMF #spectrum_identification_id => $parameters{spectrum_identification_id}, protein_identification_id => $parameters{protein_identification_id}, spectrum_annotation_level_id => $parameters{user_spectrum_annotation}, comment => $parameters{user_spectrum_commment} ); $PK = $sbeams->updateOrInsertRow( insert => 1, # TMF table_name => $TBAT_PROTEIN_IDENTIFICATION_ANNOTATION, rowdata_ref => \%rowdata, return_PK => 1, add_audit_parameters => 1 ); } if ($PK) { $sbeams->set_page_message( type => 'Info', msg => "Your annotation record has been added." ); } else { $sbeams->set_page_message( type => 'Error', msg => "ERROR: There was a problem inserting your annotation record. $error_reason" ); } $redirect++; } elsif ($apply_action eq "DELETE ANNOTATION") { my %rowdata = ( record_status => 'D' ); my $PK = $sbeams->updateOrInsertRow( update => 1, #TMF table_name => $TBAT_PROTEIN_IDENTIFICATION_ANNOTATION, rowdata_ref => \%rowdata, PK => 'protein_identification_annotation_id', PK_value => $parameters{protein_identification_annotation_id}, return_PK => 1, add_audit_parameters => 1 ); if ($PK) { $sbeams->set_page_message( type => 'Info', msg => "Your annotation record has been deleted." ); } else { $sbeams->set_page_message( type => 'Error', msg => "ERROR: There was a problem deleting your annotation record." ); } $redirect++; } if ($redirect) { $q->delete( 'apply_action' ); print $q->redirect( $q->self_url() ); exit; } } ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; my $spacer = $sbeams->getGifSpacer( 900 ); #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; # put a spacer so that showing hidden content doesn't mangle the layout print "
$spacer\n" if $htmlmode; #### Get the HTML to display the tabs my $tabMenu = $sbeamsMOD->getTabMenu( parameters_ref => \%parameters, program_name => $PROG_NAME, ); print $tabMenu->asHTML() if ($sbeams->output_mode() eq 'html'); my $contact_id = $sbeams->getCurrent_contact_id(); # die $contact_id; my $protein_list_sql = ''; if ( $parameters{atlas_build_id} && $parameters{protein_name} ) { $protein_list_sql = qq~ SELECT DISTINCT AB.atlas_build_id FROM $TBAT_ATLAS_BUILD AB JOIN $TBAT_PROTEIN_LIST_BUILD PLB ON AB.atlas_build_id = PLB.atlas_build_id JOIN $TBAT_PROTEIN_LIST PL ON PL.protein_list_id = PLB.protein_list_id JOIN $TBAT_PROTEIN_LIST_PROTEIN PLP ON PL.protein_list_id = PLP.protein_list_id JOIN $TBAT_BIOSEQUENCE B ON ( B.biosequence_set_id = AB.biosequence_set_ID AND B.biosequence_accession = PLP.protein_name ) WHERE PL.contributor_contact_id = $contact_id AND AB.atlas_build_id = $parameters{atlas_build_id} AND PLP.protein_name = '$parameters{protein_name}' ~; } #### Get the current atlas_build_id based on parameters or session my $atlas_build_id = $sbeamsMOD->getCurrentAtlasBuildID( allow_protein_list_login => 1, protein_list_sql => $protein_list_sql, parameters_ref => \%parameters, ); if (defined($atlas_build_id) && $atlas_build_id < 0) { return; } $parameters{atlas_build_id} = $atlas_build_id; $current_page->{atlas_build_id} = $atlas_build_id; #### Get the search keyword my $protein_name = $parameters{"protein_name"}; my ($tube, $origene_accession,$expected_prot); #### If a new protein_name was supplied, store it if ($protein_name) { $sbeams->setSessionAttribute( key => 'PeptideAtlas_protein_name', value => $protein_name, ); #### Else see if we had one stored } else { $protein_name = $sbeams->getSessionAttribute( key => 'PeptideAtlas_protein_name', ); if ($protein_name) { $parameters{'apply_action'} = 'GO'; } } #### do the same for tube name if ($parameters{ori_tube} ne '') { $sbeams->setSessionAttribute( key => 'PeptideAtlas_ori_tube', value => $parameters{"ori_tube"}, ); $sbeams->setSessionAttribute( key => 'PeptideAtlas_ori_sku', value => $parameters{"ori_sku"}, ); $sbeams->setSessionAttribute( key => 'PeptideAtlas_ori_expected_prot', value => $parameters{"ori_expected_prot"}, ); #### Else see if we had one stored } else { $tube = $sbeams->getSessionAttribute( key => 'PeptideAtlas_ori_tube', ); $origene_accession = $sbeams->getSessionAttribute( key => 'PeptideAtlas_ori_sku', ); $expected_prot = $sbeams->getSessionAttribute( key => 'PeptideAtlas_ori_expected_prot', ); if ($tube and $protein_name) { $parameters{'apply_action'} = 'GO'; } } #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $CATEGORY="Get Protein"; my $PROGRAM_FILE_NAME = $PROG_NAME; my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; my $help_url = "$CGI_BASE_DIR/help_popup.cgi"; #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams('q' => $q); my $n_params_found = 0; if ($apply_action eq "VIEWRESULTSET") { $sbeams->readResultSet( resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, ); $n_params_found = 99; } #### Get a list of accessible project_ids my @accessible_project_ids = $sbeams->getAccessibleProjects(); my $accessible_project_ids = join( ",", @accessible_project_ids ) || '0'; #### Get a hash of available atlas builds my $sql = qq~ SELECT atlas_build_id,atlas_build_name FROM $TBAT_ATLAS_BUILD WHERE project_id IN ( $accessible_project_ids ) AND record_status!='D' ORDER BY atlas_build_name ~; my %atlas_build_names = $sbeams->selectTwoColumnHash($sql); my %name2build = reverse( %atlas_build_names ); my @ordered_atlas_build_ids; for my $name ( sort ( keys( %name2build ) ) ) { push @ordered_atlas_build_ids, $name2build{$name}; } #### Get a list of id's sorted by name # my $sql = qq~ # SELECT DISINCT atlas_build_id,atlas_build_name # FROM $TBAT_ATLAS_BUILD # WHERE project_id IN ( $accessible_project_ids ) # AND record_status!='D' # ORDER BY atlas_build_name # ~; # my @ordered_atlas_build_ids = $sbeams->selectOneColumn($sql); unless ( grep /^$atlas_build_id$/, @ordered_atlas_build_ids ) { my ($build_name) = $sbeams->selectOneColumn( "SELECT atlas_build_name FROM $TBAT_ATLAS_BUILD WHERE atlas_build_id = $atlas_build_id" ); push @ordered_atlas_build_ids, $atlas_build_id; $atlas_build_names{$atlas_build_id} = $build_name; } # die "ID is $atlas_build_id, and name is $atlas_build_names{$atlas_build_id}"; my $tube_pattern = ''; my $tube_pattern2 = ''; my $tube = ''; if($parameters{ori_tube} ne '' and $atlas_build_names{$atlas_build_id} =~ /Origene/i){ $tube = $parameters{ori_tube}; if($parameters{ori_expected_prot} =~ /yes/i ){ $origene_accession = $parameters{ori_sku}; }; $tube =~ /(ISBHOT\d+)\-(\w)(\d+)/; if($4 <= 10 and $4 !~ /^0/){ $tube_pattern = '%'.$1.'%'.$2.$3.'%'; $tube_pattern2 = '%'.$1.'%'.$2.'0'.$3.'%'; }else{ $tube_pattern = '%'.$1.'%'.$2.$3.'%'; } } #### If the output_mode is HTML, then display the form if ($sbeams->output_mode() eq 'html') { print qq~ ~; print "

"; print ""; print $q->start_form(-method=>"POST", -action=>"$base_url", -name=>"SearchForm", ); print ""; print "
PeptideAtlas Build: "; print $q->popup_menu(-name => "atlas_build_id", -values => [ @ordered_atlas_build_ids ], -labels => \%atlas_build_names, -default => $atlas_build_id, -onChange => 'switchAtlasBuild()', ); #print "   "; print "
Protein Name: "; $current_page->{organism} = $sbeamsMOD->getCurrentAtlasOrganism( parameters_ref => { atlas_build_id => $atlas_build_id } ); my $gaggle = $sbeams->getGaggleMicroformat( organism => $current_page->{organism}, data => [$protein_name], object => 'namelist', name => 'Protein name', type => 'direct' ); print $q->textfield( "protein_name", $protein_name); print "
Variant Type: "; print " "; print "
"; if( $atlas_build_names{$atlas_build_id} =~ /Origene/i){ print "   "; print "Origene Tube: "; print $q->textfield( -name => "ori_tube", -value => $tube, -size => 25); } print $q->hidden( "apply_action", ''); print "   "; print $q->submit(-name => "action", -value => 'QUERY', -label => 'GO'); print $q->end_form; print "
"; print "

"; print "$gaggle\n"; } ######################################################################### #### Process all the constraints #### If atlas_build_id was not selected, stop here unless ($atlas_build_id) { if ($sbeams->output_mode() eq 'html') { print "Please select an atlas build, type in a protein name ". "(e.g. ENSP00000290230 for human Ensembl protein), and ". "click GO"; } else { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => 'You must provide an atlas_build_id', ); } return; } #### If biosequence_name was not selected, stop here unless ($protein_name) { if ($sbeams->output_mode() eq 'html') { print "Please type in a protein name ". "(e.g. ENSP00000290230 for human Ensembl protein), and ". "click GO"; } else { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => 'You must provide a protein_name', ); } return; } #### Build ATLAS_BUILD constraint my $atlas_build_clause = $sbeams->parseConstraint2SQL( constraint_column=>"AB.atlas_build_id", constraint_type=>"int_list", constraint_name=>"Atlas Build", constraint_value=>$atlas_build_id ); return if ($atlas_build_clause eq '-1'); #### Build PROTEIN_NAME constraint my $biosequence_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"Protein Name", constraint_value=>$protein_name ); return if ($biosequence_name_clause eq '-1'); #### Define the SQL statement to find the biosequence $sql = qq~ SELECT BS.biosequence_id, BS.biosequence_name FROM $TBAT_ATLAS_BUILD AB INNER JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) INNER JOIN $TBAT_BIOSEQUENCE BS ON ( BSS.biosequence_set_id = BS.biosequence_set_id ) WHERE 1 = 1 $atlas_build_clause $biosequence_name_clause ~; my %biosequences = $sbeams->selectTwoColumnHash($sql); my $n_biosequences = scalar(keys(%biosequences)); #### If the protein was not found, report the problem if ($n_biosequences == 0) { if ($sbeams->output_mode() eq 'html') { print qq~ Protein identifier '$protein_name' not found in atlas build '$atlas_build_names{$atlas_build_id}'.
Please check selections and try again.

Or, search for the identifer across all major PeptideAtlas builds:   
~; } else { $sbeams->reportException( state => 'ERROR', type => 'RECORD NOT FOUND', message => "The protein entered '$protein_name' was not found ". "in the atlas build '$atlas_build_names{$atlas_build_id}'.", ); } return; } #### If more than one protein was found, list them if ($n_biosequences > 1) { if ($sbeams->output_mode() eq 'html') { print qq~ SeveraL ($n_biosequences) entries were returned from your request for '$protein_name' in the atlas build '$atlas_build_names{$atlas_build_id}'. Please select the appropriate one:

~; while (my ($key,$value) = each (%biosequences)) { print "$value
"; } } else { $sbeams->reportException( state => 'ERROR', type => 'TOO MANY RECORDS', message => qq~More than one ($n_biosequences) was returned from your request for '$protein_name' in the atlas build '$atlas_build_names{$atlas_build_id}'.~, ); } return; } #### Extract the one biosequence_id my ($biosequence_id) = keys(%biosequences); #### Return some information about this biosequence #### In order to get complete protein identification information #### for all biosequences, even those that are identical to an #### indistinguishable, we have to do two/three layers of joins to #### biosequence_relationship and protein_identification. See #### comment below (search for term ref_ref_group_rep). $sql = qq~ SELECT BS.biosequence_id, BS.biosequence_name,BS.biosequence_gene_name, BS.biosequence_accession, BS.biosequence_desc, BS.biosequence_seq,BSS.set_name, DBX.accessor,DBX.accessor_suffix, BAG.annotated_gene_id, BPS.transmembrane_class, BPS.transmembrane_topology, BPS.has_signal_peptide, BPS.has_signal_peptide_probability, BPS.signal_peptide_length, BPS.signal_peptide_is_cleaved, BPS.published_ng_per_ml, BPS.pub_ng_ml_source, PID.estimated_ng_per_ml, PID.abundance_uncertainty, PID.probability, PID.confidence, PID.n_observations, PID.norm_PSMs_per_100K, PPL.level_name, BSRT.relationship_name, BS_REP.biosequence_name as "group_rep", BS_REF.biosequence_name as "reference_biosequence_name", BS_REF_REP.biosequence_name as "ref_group_rep", BS_REF_REF_REP.biosequence_name as "ref_ref_group_rep", PID.protein_group_number FROM $TBAT_BIOSEQUENCE_SET BSS INNER JOIN $TBAT_BIOSEQUENCE BS ON ( BSS.biosequence_set_id = BS.biosequence_set_id ) LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) LEFT JOIN $TBAT_BIOSEQUENCE_ANNOTATED_GENE BAG ON ( BAG.biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE_PROPERTY_SET BPS ON ( BS.biosequence_id = BPS.biosequence_id ) LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID ON ( PID.biosequence_id = BS.biosequence_id ) AND ( PID.atlas_build_id = $atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REP ON ( BS_REP.biosequence_id = PID.represented_by_biosequence_id ) LEFT JOIN $TBAT_PROTEIN_PRESENCE_LEVEL PPL ON ( PPL.protein_presence_level_id = PID.presence_level_id ) LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP BSR ON ( BSR.related_biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP_TYPE BSRT ON ( BSRT.biosequence_relationship_type_id = BSR.relationship_type_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REF ON ( BS_REF.biosequence_id = BSR.reference_biosequence_id ) LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID_REF ON ( PID_REF.biosequence_id = BS_REF.biosequence_id ) AND ( PID_REF.atlas_build_id = $atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REP ON ( BS_REF_REP.biosequence_id = PID_REF.represented_by_biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE_RELATIONSHIP BSR_REF ON ( BSR_REF.related_biosequence_id = BS_REF.biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REF ON ( BS_REF_REF.biosequence_id = BSR_REF.reference_biosequence_id ) LEFT JOIN $TBAT_PROTEIN_IDENTIFICATION PID_REF_REF ON ( PID_REF_REF.biosequence_id = BS_REF_REF.biosequence_id ) AND ( PID_REF_REF.atlas_build_id = $atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS_REF_REF_REP ON ( BS_REF_REF_REP.biosequence_id = PID_REF_REF.represented_by_biosequence_id ) WHERE BS.biosequence_id = $biosequence_id ~; my @rows = $sbeams->selectHashArray($sql); my $n_rows = scalar @rows; # assume first row contains all the info we want my $biosequence = $rows[0]; # Supercede with SwissProt info my $sp_sql = qq~ SELECT BS.biosequence_id, signal_end, signal_start FROM $TBAT_BIOSEQUENCE BS JOIN $TBAT_SWISS_PROT_ANNOTATION SPA ON BS.biosequence_accession = SPA.accession WHERE BS.biosequence_id = $biosequence_id ~; my $sth = $sbeams->get_statement_handle($sp_sql); while ( my @db_row = $sth->fetchrow_array() ) { @sp_rows = @db_row; last; } ############################################################################# # Widget to allow show/hide of overview section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_overview', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); #### Print out a summary of the protein $parameters{variant_type} ||= 'init_met'; my %var2txt = ( init_met => 'Initiator Methionine' ); my $variant_type = $var2txt{$parameters{variant_type}} || 'Unknown'; my $caveat_text = ''; if ($sbeams->output_mode() eq 'html') { my $section_head = "General information for $biosequence->{biosequence_name}"; my $section_header = $sbeamsMOD->encodeSectionHeader( text => $section_head, link => $link ); print qq~ $section_header ~; my $acc=$biosequence->{biosequence_name}; my $prot_link = "$acc"; my $key = "Accession"; print $sbeamsMOD->encodeSectionItem( key=>$key, tr_info => $tr, value=>$prot_link, ); $parameters{atlas_build_id} ||= $atlas_build_id; my $self_link = 'GetVariantEvidence'; my $sep = '?'; for my $param ( qw( variant_type atlas_build_id protein_name action ) ) { $self_link .= $sep . $param . '=' . $parameters{$param}; $sep = ';'; } my $presence_level = $biosequence->{level_name}; my $redundancy_rel = $biosequence->{relationship_name}; my $identification_status = $presence_level || $redundancy_rel; my $ref_link = $self_link; if ( $identification_status !~ /Canonical/i ) { $identification_status .= '' . $sbeams->makeInfoText( '†' ) . ''; $ref_link =~ s/$parameters{protein_name}/$biosequence->{reference_biosequence_name}/; $caveat_text = $sbeams->makeInfoText( "    † This protein has been categorized as subsumed under/possibly distinguished from/identical to a different protein. This may mean that they evidence shown below for this variation may be better instead applied to the canonical protein rather than this protein because the peptides may map to both." ); } if ($identification_status =~ /subsumed/i ) { $identification_status .= "by $biosequence->{reference_biosequence_name} "; } elsif ($identification_status =~ /identical/ || $identification_status =~ /indistinguishable/ ) { $identification_status .= " to $biosequence->{reference_biosequence_name} "; } print $sbeamsMOD->encodeSectionItem( key=>'Identification Status', tr_info => $tr, value=>$identification_status, ); my $group_rep = ''; # if this bioseq has a presence_level ... if (defined $biosequence->{group_rep}) { $group_rep = $biosequence->{group_rep}; # if this bioseq is ident/indist from a bioseq with a presence level ... } elsif ( defined $biosequence->{ref_group_rep}) { $group_rep = $biosequence->{ref_group_rep}; # if this bioseq is ident/indist from an indist bioseq ... } elsif ( defined $biosequence->{ref_ref_group_rep}) { $group_rep = $biosequence->{ref_ref_group_rep}; } my $group_string=qq~ $group_rep (Alignment) ~; print $sbeamsMOD->encodeSectionItem( key=>'Protein Group', tr_info => $tr, value=>$group_string, ); # my $prob_string = sprintf("%0.5f", $biosequence->{probability}); # print $sbeamsMOD->encodeSectionItem( # key=>'ProteinProphet Probability', # tr_info => $tr, # value=>$prob_string, # ); my $up = $sbeamsMOD->get_uniprot_annotation( accession => $protein_name ); # die Dumper( $up ); if ( $parameters{variant_type} eq 'init_met' ) { my $annot_init = 'None'; if ( $up->{INIT_MET} ) { $annot_init = 'Initiator Methionine ' . $up->{INIT_MET}->[0]->{info} . ''; } print $sbeamsMOD->encodeSectionItem( key => "Uniprot Annotation", tr_info => $tr, value => $annot_init, ); } } ############################################################################# #### Continue with main query #### Define the desired columns in the query #### [friendly name used in url_cols,SQL,displayed column title] # ["peptide_accession","P.peptide_accession","
Accession
"], # # my $group_by_clause = ''; my @labels = ( 'Accession', 'Pre AA', 'Sequence', 'Fol AA', 'ESS','Best Prob', 'Best Adj Prob', 'N Obs', 'EOS', 'SSRT', 'N Prot Map', 'N Gen Loc', 'N Samples', 'Subpep of', 'Charge', 'modified_peptide_instance_id' ); my @column_array = ( ["peptide_accession","P.peptide_accession","Accession"], ["preceding_residue","PI.preceding_residue","
Pre AA
"], ["peptide_sequence","P.peptide_sequence","
Sequence
"], ["following_residue","PI.following_residue","
Fol AA
"], ["suitability_score","0.0001","
ESS
"], ["best_probability","STR(PI.best_probability,7,3)","
Best Prob
"], ["best_adjusted_probability","STR(PI.best_adjusted_probability,7,3)","
Best Adj Prob
"], ["n_observations","PI.n_observations","
N Obs
"], ["empirical_proteotypic_score","STR(PI.empirical_proteotypic_score,7,2)","
EOS
"], ["SSRCalc_relative_hydrophobicity","STR(P.SSRCalc_relative_hydrophobicity,7,2)","
SSRT
"], ["n_protein_mappings","PI.n_protein_mappings","
N Prot Map
"], ["n_genome_locations","PI.n_genome_locations","
N Gen Loc
"], ["sample_ids","MPI.sample_ids","N Samples"], ["is_subpeptide_of","PI.is_subpeptide_of",'Subpep of'], ["peptide_instance_id","PI.peptide_instance_id","peptide_instance_id"], ["modified_peptide_sequence","MPI.modified_peptide_sequence","modified_peptide_sequence"], ["n_mod_observations","SUM(MPI.n_observations)","n_mod_observations"], ["peptide_charge","MPI.peptide_charge","Charge"], ["modified_peptide_instance_id","modified_peptide_instance_id","modified_peptide_instance_id"] ); $group_by_clause = 'GROUP BY ' . join( ",", (qw( P.peptide_accession PI.preceding_residue P.peptide_sequence PI.following_residue PI.best_probability PI.best_adjusted_probability PI.n_observations PI.empirical_proteotypic_score P.SSRCalc_relative_hydrophobicity PI.n_protein_mappings PI.n_genome_locations MPI.sample_ids PI.is_subpeptide_of PI.peptide_instance_id MPI.modified_peptide_sequence peptide_charge modified_peptide_instance_id ) ) ) . "\n"; #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); ## Sends @column_array_ref to build_SQL_columns_list, which ## (1) appends the 2nd element in array to $columns_clause ## (2) fills %colnameidx_ref as a hash with key = 1st element ## and value = 3rd element, and (3) fills @column_titles_ref ## array with the 3rd element my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); #### Define a query to return peptides for this protein $sql = qq~ SELECT DISTINCT $columns_clause FROM $TBAT_PEPTIDE_INSTANCE PI INNER JOIN $TBAT_PEPTIDE P ON ( PI.peptide_id = P.peptide_id ) INNER JOIN $TBAT_MODIFIED_PEPTIDE_INSTANCE MPI ON PI.peptide_instance_id = MPI.peptide_instance_id LEFT JOIN $TBAT_PEPTIDE_MAPPING PM ON ( PI.peptide_instance_id = PM.peptide_instance_id ) INNER JOIN $TBAT_ATLAS_BUILD AB ON ( PI.atlas_build_id = AB.atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) LEFT JOIN $TB_ORGANISM O ON ( BSS.organism_id = O.organism_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS ON ( PM.matched_biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) LEFT JOIN $TBAT_PEPTIDE_INSTANCE_ANNOTATION PIA ON PIA.peptide_instance_id = PI.peptide_instance_id WHERE 1 = 1 AND AB.atlas_build_id IN ( $atlas_build_id ) AND BS.biosequence_id = $biosequence_id $group_by_clause ORDER BY P.peptide_accession, SUM(MPI.n_observations) DESC ~; #### Define the hypertext links for columns that need them %url_cols = ( 'Accession' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&searchForThis=%V&action=QUERY", ); # \%$colnameidx{biosequence_gene_name}V& %hidden_cols = ( # 'Suitability Score' => 1, 'Best Adjusted Prob' => 1, 'peptide_instance_id' => 1, # 'is_subpeptide_of' => 1, # 'sample_ids' => 1 ); ######################################################################### #### If QUERY or VIEWRESULTSET was selected, display the data if ($apply_action =~ /(QUERY|GO|VIEWRESULTSET)/) { # $show_sql++; #### Show the SQL that will be or was executed $sbeams->display_sql(sql=>$sql) if ($show_sql); #### If the action contained QUERY, then fetch the results from #### the database if ($apply_action =~ /(QUERY|GO)/i) { #### Fetch the results from the database server $sbeams->fetchResultSet( sql_query=>$sql, resultset_ref=>$resultset_ref, ); #### Store the resultset and parameters to disk resultset cache $rs_params{set_name} = "SETME"; $sbeams->writeResultSet( resultset_file_ref=>\$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME", ); } #### Get protein structure information my $protein_structure = getProteinStructure( biosequence_id => $biosequence_id, ); # die Dumper( $resultset_ref ); my $pcounts = getPeptideCount( biosequence => $biosequence, resultset_ref => $resultset_ref ); # my %results = ( support => \@support, # refute => \@refute, # peptides => \%peptides, # neutral => \@neutral, # tcount => $tobs, # acount => $nobs, # group => $grp, # level => $level ); # die Dumper( $pcounts ); my $peptides = [ @{$pcounts->{support}}, @{$pcounts->{refute}} ]; my $obs = $pcounts->{acount}; my $tcounts = $pcounts->{tcount}; my $ecount = scalar( @{$peptides} ); my %support; for my $pep ( @{$pcounts->{support}} ) { $support{$pep}++; } my %refute; for my $pep ( @{$pcounts->{refute}} ) { $refute{$pep}++; } # die Dumper( $pcounts->{support} ); my $speps = 0; my $smods = 0; my $sobs = 0; my %sseen; my $mpeptides = $pcounts->{peptides}; # die Dumper( $mpeptides ); for my $pep ( keys( %{$mpeptides} ) ) { my $cleanseq = $mpeptides->{$pep}->{peptide_sequence}; next unless $support{$cleanseq}; $smods++; next if $sseen{$cleanseq}++; $speps++; $sobs += $mpeptides->{$pep}->{n_obs}; } my $rpeps = 0; my $rmods = 0; my $robs = 0; my %rseen; for my $pep ( keys( %{$mpeptides} ) ) { my $cleanseq = $mpeptides->{$pep}->{peptide_sequence}; next unless $refute{$cleanseq}; $rmods++; next if $rseen{$cleanseq}++; $rpeps++; $robs += $mpeptides->{$pep}->{n_obs}; } # @support # @refute # @neutral my $peptide_counts = scalar( keys( %{$pcounts->{peptides}} ) ); if ( $htmlmode ) { print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Distinct Peptides', value => "$peptide_counts" . ' 'x100 ); print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Observations', value => "Total: $tcounts, Adjusted: $obs" ); print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Direct Evidence Peptides', value => "$ecount" ); my $uniprot_link = "Uniprot"; my $topfind_link = "TopFind"; print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => 'External Links', value => "$uniprot_link, $topfind_link" ); print $sbeamsMOD->encodeSectionItem( key=>'Protein Description', tr_info => $tr, value=>$biosequence->{biosequence_desc}, ); print "\n"; print "
$caveat_text

\n"; } my $sequence = $biosequence->{biosequence_seq}; # Max length 75 $sequence = substr( $sequence, 0, 75 ) . '...' if length( $sequence ) > 75; my @alignments = ( [ ProteinSeq => $sequence ] ); my $rbase = 'Peptide'; my $sbase = 'Peptide'; if ( $parameters{variant_type} eq 'init_met' ) { $rbase = 'Preserve'; $sbase = 'Cleave'; } my $cnt = 1; my %seen_seq; for my $pep ( @{$pcounts->{support}} ) { next if $seen_seq{$pep}++; my $apep = $pcounts->{peptides}->{$pep}; next if $apep->{mapping} eq 'no'; my $astr = ( $apep->{posn} > 1 ) ? '-' : ''; my $rcnt = length($sequence) - length($pep) - length($astr); $astr .= $pep . '-' x $rcnt; push @alignments, [ $sbase . '_' . $cnt++, $astr ]; } $cnt = ( $rbase eq $sbase ) ? $cnt : 1; for my $pep ( @{$pcounts->{refute}} ) { next if $seen_seq{$pep}++; my $apep = $pcounts->{peptides}->{$pep}; next if $apep->{mapping} eq 'no'; my $astr = ( $apep->{posn} > 1 ) ? '-' : ''; my $rcnt = length($sequence) - length($pep) - length($astr); $astr .= $pep . '-' x $rcnt; push @alignments, [ $rbase . '_' . $cnt++, $astr ]; } print $sbeamsMOD->get_clustal_display( alignments => \@alignments ); print "

\n"; my $support_heading = ''; my $refute_heading = ''; if ( $parameters{variant_type} eq 'init_met' ) { $support_heading = "Evidence supporting cleavage of $variant_type in $biosequence->{biosequence_name}"; $refute_heading = "Evidence supporting preservation of $variant_type in $biosequence->{biosequence_name}"; } my $section_header = $sbeamsMOD->encodeSectionHeader( text => $support_heading, link => $link ); print qq~ $section_header ~; # my $speps = 0; # my $sobs = 0; # for my $pep ( @{$pcounts->{support}} ) { # $speps++; # $sobs += $pcounts->{peptides}->{$pep}->{n_obs}; # } print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Distinct Peptides', value => "$speps" . ' 'x100 ); print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Peptide Observations', value => $sobs ); print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Distinct Modified Peptides', value => $smods ); print "

\n"; my $stable = get_peptide_table ( 'support', $pcounts, \%support ); print "$stable


\n"; my $section_header = $sbeamsMOD->encodeSectionHeader( text => $refute_heading, link => $link ); print qq~ $section_header ~; print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Distinct Peptides', value => "$rpeps" . ' 'x100 ); print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Peptide Observations', value => $robs ); print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => '# Distinct Modified Peptides', value => $rmods ); print "

\n"; my $stable = get_peptide_table ( 'refute', $pcounts, \%refute ); print "$stable


\n"; my $section_header = $sbeamsMOD->encodeSectionHeader( text => "Virtual western peptide expression plot", ); my $heater = get_heatmap ( $pcounts ); print "$section_header
$heater\n"; return; ############################################################# ## Display the sequence graphic ## my $motif = $sbeamsMOD->getBuildMotif( build_id => $current_page->{atlas_build_id} ); # Widget to allow show/hide of graphic section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_graphic', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my %motif_params; my %graphic_params = ( build_id => $atlas_build_id, protein_data => $biosequence, tr_info => $tr, obs_color => $parameters{obs_color} ); my $peptide_instance_clause = ''; my $graphic_section = get_sequence_graphic(%graphic_params, %motif_params, peptide_instance_constraint => $peptide_instance_clause ); my $graphic_head = $sbeamsMOD->encodeSectionHeader( text => "Sequence Motifs", link => $link ); print "$graphic_head
$graphic_section" if $htmlmode; ############################################################# ## Display annotated sequence - this routine makes its own section header ## #### Display the annotated sequence # TMF: added for special need # print STDERR "$protein_name\t"; if ( defined $parameters{show_clustal} ) { $sbeams->setSessionAttribute( key => 'GetProtein_ShowVars', value => $parameters{show_clustal} ); } else { $parameters{show_clustal} = $sbeams->getSessionAttribute( key => 'GetProtein_ShowVars' ); return; } displayAnnotatedSequence( %parameters, peptides => $peptides, biosequence => $biosequence, resultset_ref => $resultset_ref, protein_structure => $protein_structure, glyco => $motif_params{glyco}, ); ### show sequence coverage for difference method if ($htmlmode and $atlas_build_names{$atlas_build_id} =~ /Origene/i){ my $file = "/net/db/projects/PeptideAtlas/species/Human/Origene/ExpectedProtein.txt"; open(IN, "<$file") or die "cannot open $file\n"; my %PPfile = (); my %peptides_m; foreach my $line (){ chomp $line ; my (@spids,$plate,$tube); if( $line =~ /TP\d+,([^,]+),(\w),(\d+),.*VE.+_ISBHOT(0[0123]\d).*/ || $line =~ /TP\d+,([^,]+),(\w),(\d+).*,ISBHOT(0[0123]\d).*/){ $plate = "ISBHOT$4"; @spids = split(/\./,$1); if(length($3) == 1){ $tube = "{$2".'0'."$3,$2$3}"; }else{ $tube = "$2$3"; } } foreach my $a (@spids){ my $p = "/proteomics/peptideatlas/archive/rmoritz/HumanMRMAtlas/Origene/Velos/$plate*/". "method/XTK/VE*_$tube\_*-ipro.prot.xml"; $PPfile{$a}{path} = $p; if($tube =~ /\{(\w+),.*/){ $tube = $1; } $PPfile{$a}{tube} = $plate.'-'.$tube; if($a =~ /(\S+)\-\d+/){ $a = $1; $PPfile{$a}{path} = $p; $PPfile{$a}{tube} = $plate.'-'.$tube; } } } if(defined $PPfile{$protein_name}){ foreach my $m (@methods){ my $file = $PPfile{$protein_name}{path}; my $str; if($parameters{ori_tube} ne ''){ $parameters{ori_tube} =~ /(ISBHOT\d{3})-(\w)(\d+)/; if(length($3) == 1){ $str = "$1*/method/XTK/VE*_{$2".'0'."$3,$2$3}_*-ipro.prot.xml"; }else{ $str = "$1*/method/{XTK,XTK*allmod}/VE*_$2$3\_*-ipro.prot.xml"; } $file = "/proteomics/peptideatlas/archive/rmoritz/HumanMRMAtlas/Origene/Velos/$str"; } $file =~ s/method/$m/; my @files = `ls $file`; if(@files > 1 or @files == 0 ){ print join("
", @files); goto LABEL; #}#elsif(! @files ){ # print "$PPfile
"; }else{ %{$peptides_m{$m}} = $origenecov -> getPeptides( ref_parameters => \%parameters, ProteinProphetfile => $files[0], protein_name => $protein_name); } } ($tr, $link ) = $sbeams->make_table_toggle( name => 'ori_getprotein_sequence', visible => 0, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my $ori_help = $origenecov ->get_plot_help(name => 'ori_getprotein_sequence_desc' ); my $graphic_head = $sbeamsMOD->encodeSectionHeader( text => "Sequence Coverage of Each Fragmentation Method (TPP PeptideProb >= 0.9 and ProteinProb >= 0.9)", link => $link ); my %graphic_params = ( peptide => \%peptides_m, protein_data => $biosequence, tr_info => $tr, obs_color => $parameters{obs_color} ); $graphic_section = $origenecov -> get_sequence_graphic( %graphic_params, %motif_params ); my $primary = qq~   Below show $protein_name in: $PPfile{$protein_name}{tube}
~; if($parameters{ori_tube}){ $primary = qq~   Below show $protein_name in: $parameters{ori_tube}
~; } print qq~ ~; for my $method( @methods){ my ( $sequence, $tags ,$coverage) = $origenecov -> displayAnnotatedSequence( peptides => \%{$peptides_m{$method}}, biosequence => $biosequence, method => $method, ); print "\n \n"; } print "
$graphic_head
$ori_help
  $protein_name is primary protein in: $PPfile{$protein_name}{tube}
$primary
$graphic_section
" . $origenecov -> get_html_seq( sequence => $sequence, tags => $tags, method => $method, coverage => $coverage ) . "
\n"; } LABEL: } ############################################################# # Widget to allow show/hide of observed peptides section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_observedlist', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my $col_defs = $sbeamsMOD->get_column_defs( labels => \@labels ); my $obs_help = $sbeamsMOD->make_table_help( entries => $col_defs, description => 'Observed peptides mapping to subject protein' ); my $observed_header = $sbeamsMOD->encodeSectionHeader( text => "Distinct Observed Peptides ($peptide_counts)", link => $link ); # print qq~ $obs_help$observed_header
\n" if $single; push @legend, "\n" if $multi; push @legend, "\n" if $signal; push @legend, "\n" if $anchor; push @legend, "\n" if @tmm; push @legend, "\n" if @intra; push @legend, "\n" if @extra; push @legend, "\n" if @coverage; push @legend, "\n" if @difficult; push @legend, "\n" if @glyco; # push @legend, "\n"; my $legend = ''; for my $item ( @legend ) { $legend .= $item; } # Create image map from panel objects. # links and mouseover coords for peptides, mouseover coords only for others my $baselink = "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$args{build_id}&searchWithinThis=Peptide+Name&searchForThis=_PA_Accession_&action=QUERY"; my $pid = $$; my @objects = $panel->boxes(); my $map = "\n"; for my $obj ( @objects ) { # $log->debug( join( "-", @$obj) ); my $hkey_name = $obj->[0]->display_name(); my $link_name = $hkey_name; $link_name =~ s/(.*)::::.*/$1/g; # Grrr... if ( $link_name =~ /^PAp\d+$/ ) { # Peptide, add link + mouseover coords/sequence my $coords = join( ", ", @$obj[1..4] ); my $link = $baselink; $link =~ s/_PA_Accession_/$link_name/g; $map .= "\n"; } elsif ( $hkey_name =~ /^Glycopeptide/ ) { # Glycopeptide, mouseover coords/sequence my $coords = join( ", ", @$obj[1..4] ); my $f = $obj->[0]; my $text = $f->start() . '-' . $f->end() . ', ' . $f->primary_tag(); $map .= "\n"; } else { my $f = $obj->[0]; my $coords = join( ", ", @$obj[1..4] ); my $text = $f->start() . '-' . $f->end(); $map .= "\n"; } } $map .= ''; # Create image in tmp space my $file_name = $pid . "_glyco_predict.png"; my $tmp_img_path = "images/tmp"; my $img_file = "$PHYSICAL_BASE_DIR/$tmp_img_path/$file_name"; # TMF: this is breaking for me in csv mode # D'oh! if ( $htmlmode ) { open( OUT, ">$img_file" ) || die "$!: $img_file"; binmode(OUT); print OUT $panel->png; close OUT; } my $tr = $args{tr_info} || ''; # my $tr_link = ""; # Generate and return HTML for graphic my $graphic =<<" EOG";
~ if $htmlmode; print qq~ $observed_header
$obs_help
~ if $htmlmode; #### Since we don't display the controls, make the default page size #### huge, otherwise the user may not see all peptides $rs_params{page_size} = 500; #### Calculate the best peptides to use my $best_peptide_information = $bestPeptideSelector->getBestPeptides( atlas_build_id => $atlas_build_id, biosequence_id => $biosequence_id, query_parameters_ref=>\%parameters, resultset_ref => $resultset_ref, column_titles_ref=>\@column_titles, no_escape => 1 ); my %instance_ids; my $max_peptides = 40; my $pep_cnt; for my $row ( sort { $b->[7] <=> $a->[7] } @{$resultset_ref->{data_ref}} ) { $pep_cnt++; last if $pep_cnt >= $max_peptides; $instance_ids{$row->[14]}++; } my @pa_accessions; my @sample_list; for my $row ( @{$resultset_ref->{data_ref}} ) { push @pa_accessions, $row->[0]; my $ntt = 0; if ( $row->[1] =~ /[-RK]/ ) { $ntt++; } if ( $row->[2] =~ /[RK]$/ || $row->[3] eq '-' ) { $ntt++; } my $mc = 0; if ( $row->[2] =~ /[RK][^P]/ ) { $mc++; } my $mgl = 0; if ( $row->[11] > 1 ) { $mgl++; } # Oh please please please! No again, sigh! # if ($sbeams->output_mode() eq 'html') { push @sample_list, [ $row->[0], $row->[12] ]; for my $idx ( 12, 13 ) { $row->[$idx] =~ s/\s//g; my @samples = split( /,/, $row->[$idx] ); if ( $idx == 12 ) { my $cnt = scalar( @samples ); $row->[$idx] = ( $htmlmode ) ? " $cnt " : $cnt; } else { $row->[$idx] = scalar( @samples ); } } # } my %sample_list = @sample_list; my @errs; push @errs, 'mc' if $mc; push @errs, 'ntt' if $ntt < 2; push @errs, 'mgl' if $mgl; my $annot = ''; if ( @errs ) { $annot = '[' . join( ',', @errs ) . ']'; # } else { # $instance_ids{$row->[14]}++; } if ($sbeams->output_mode() eq 'html') { $annot = "$annot"; } $row->[4] = sprintf( "%0.2f", $row->[4] ) . " $annot"; my $cnt = 0; # for my $p ( @{$resultset_ref->{precisions_list_ref}} ) { # $log->debug( "$cnt) precisely $p for $row->[$cnt], $resultset_ref->{types_list_ref}->[$cnt] " ); # $cnt++; # } } # Must... continue... hacking!!! $resultset_ref->{types_list_ref}->[4] = 'varchar'; # $resultset_ref->{precision_list_ref}->[4] = 36; #### Display the resultset $sbeams->displayResultSet( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, url_cols_ref=>\%url_cols, hidden_cols_ref=>\%hidden_cols, max_widths=>\%max_widths, column_titles_ref=>\@column_titles, base_url=>$base_url, no_escape => 1 ); #### Display the resultset controls # $sbeams->displayResultSetControls( # resultset_ref=>$resultset_ref, # query_parameters_ref=>\%parameters, # rs_params_ref=>\%rs_params, # base_url=>$base_url, # ); print "
" if $htmlmode; ############################################################# #### Display selected peptides - Experimental #### Widget to allow show/hide of section # my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_bestpeptideslist', # visible => 0, # tooltip => 'Show/Hide Section', # imglink => 1, # sticky => 1, # ); #### Display the section header # my $bestPeptidesHeader = $sbeamsMOD->encodeSectionHeader( # text => 'Best Peptides', # link => $link # ); # print qq~ $bestPeptidesHeader
~ if $htmlmode; #### Display the best peptide information # my $samples = $bestPeptideSelector->getBestPeptidesDisplay( # atlas_build_id => $atlas_build_id, # best_peptide_information => $best_peptide_information, # query_parameters_ref=>\%parameters, # column_titles_ref=>\@column_titles, # link => $link, # base_url=>$base_url, # tr_info => $tr, # ); #print "$samples
" if $htmlmode; # print "
" if $htmlmode; ############################################################# #### Display synthetic peptide info unless ( $sbeams->isGuestUser() ) { # Widget to allow show/hide of section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_dirty_peptide', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1, ); # Display the section header my $PabstHeader = $sbeamsMOD->encodeSectionHeader( text => 'Synthetic peptide info ', link => $link ); #### Display the best peptide information my $dirty_peptide_display = $bestPeptideSelector->get_dirty_peptide_display( atlas_build_id => $current_page->{atlas_build_id}, 'link' => $link, base_url=>$base_url, biosequence_id => $biosequence_id, tr_info => $tr, ); if ( $htmlmode && $dirty_peptide_display ) { print qq~ $PabstHeader
$dirty_peptide_display
~; } } ############################################################# # Display precomputed PABST peptides #### Widget to allow show/hide of section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_pabst_static', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1, ); #### Display the section header my $PabstHeader = $sbeamsMOD->encodeSectionHeader( text => 'PABST Peptide Ranking ', link => $link ); #### Display the best peptide information my $pabst_display = $bestPeptideSelector->get_pabst_static_peptide_display( 'link' => $link, base_url=>$base_url, atlas_build_id => $atlas_build_id, biosequence_name => $biosequence->{biosequence_name}, biosequence_id => $biosequence_id, tr_info => $tr, ); if ( $htmlmode && $pabst_display ) { print qq~ $PabstHeader
$pabst_display
~; } ############################################################# #### Display theoretical highest observability peptides #### Widget to allow show/hide of section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_highlyobservablelist', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1, ); #### Display the section header my $highlyObservablePeptidesHeader = $sbeamsMOD->encodeSectionHeader( text => 'Predicted Highly Observable Peptides', link => $link ); #### Calculate the best peptides to use my $best_peptide_information = $bestPeptideSelector->getHighlyObservablePeptides( atlas_build_id => $atlas_build_id, biosequence_id => $biosequence_id, ); if ( $htmlmode && $sbeams->rs_has_data( resultset_ref => $best_peptide_information->{resultset_ref}) ) { #### Display the best peptide information my $samples = $bestPeptideSelector->getHighlyObservablePeptidesDisplay( atlas_build_id => $atlas_build_id, best_peptide_information => $best_peptide_information, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, link => $link, base_url=>$base_url, tr_info => $tr, ); print qq~ $highlyObservablePeptidesHeader
$samples
~; } ############################################################# #### Display annotated transitions #### Widget to allow show/hide of section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_transitions', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1, ); #### Display the section header my $transitionsHTML = $sbeamsMOD->encodeSectionHeader( text => 'Annotated Transitions', link => $link ); my $mrm_transitions = $sbeamsMOD->get_mrm_transitions( accessions => \@pa_accessions ); #### If a result is returned if ( scalar @$mrm_transitions ) { my $link_base = "GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&action=QUERY&searchForThis="; #### Loops through resultset and do formatting for my $pep ( @$mrm_transitions ) { $pep->[0] = "[0] . ">$pep->[0]"; $pep->[3] = sprintf("%0.2f", $pep->[3]); $pep->[4] = sprintf("%0.2f", $pep->[4]); if ( $pep->[6] ) { $pep->[6] = sprintf("%0.1f", $pep->[6]); } else { $pep->[6] = $sbeams->makeInactiveText('n/a'); } if ( $pep->[7] ) { $pep->[7] = sprintf("%0.1f", $pep->[7]); } else { $pep->[7] = $sbeams->makeInactiveText('n/a'); } $pep->[8] = sprintf("%0.1f", $pep->[8]); $pep->[9] = sprintf("%0.1f", $pep->[9]); ($pep->[5]) = $pep->[5] =~ /^([a-zA-Z]\d+)/; $pep->[1] = $sbeamsMOD->formatMassMods( $pep->[1] ); for my $idx ( 0..$#{$pep} ) { $pep->[$idx] = $sbeams->makeInactiveText('n/a') if !defined $pep->[$idx]; } } my @labels = ( 'Accession', 'Sequence', 'Charge', 'q1_mz', 'q3_mz', 'Label', 'Intensity', 'CE', 'RT', 'SSRT', 'Instr', 'Annotation Set', 'Quality'); my $col_defs = $sbeamsMOD->get_column_defs( labels => \@labels ); # my $trans_help = get_table_help( 'annotated_transitions' ); my $trans_help = $sbeamsMOD->make_table_help( entries => $col_defs, description => 'Contributed Q1/Q3 transition pairs for SRM experiments' ); #### Add table column headings unshift @$mrm_transitions, \@labels; #### Format table $transitionsHTML .= $sbeamsMOD->encodeSectionTable( header => 1, tr_info => $tr, width => '600', set_download => 1, help_text => $trans_help, nowrap => [12,13], chg_bkg_idx => 3, align => [qw(left center center right right center right right right right left left left)], rows => $mrm_transitions ); #### Display table # print "$trans_help
$transitionsHTML
\n" if $#{$mrm_transitions} && $htmlmode; print "$transitionsHTML
\n" if $#{$mrm_transitions} && $htmlmode; } my $marker_peptides = get_synthesized_peptides( \@pa_accessions ); my $markerHTML = $sbeamsMOD->encodeSectionHeader( text => "Reference Peptides", 'link' => $link, bold => 1 ); #### If a result is returned if ( scalar @$marker_peptides ) { # $transitionHTML .= $sbeams->getPopupDHTML(); my $link_base = "GetPeptide?_tab=3&atlas_build_id=$atlas_build_id&searchWithinThis=Peptide+Name&action=QUERY&searchForThis="; #### Loops through resultset and do formatting for my $pep ( @$marker_peptides ) { # $pep->[2] = sprintf("%0.2f", $pep->[2]); $pep->[3] = $sbeams->makeInactiveText('n/a') if !$pep->[3]; $pep->[0] = "[0] . ">$pep->[0]encodeSectionTable( header => 1, tr_info => $tr, width => '600', align => [qw(left center left left left center)], rows => $marker_peptides ); #### Display table print "
$markerHTML
\n"; } ############################################################# ## Display the samples list ## # Widget to allow show/hide of sample map section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_samplemap', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my $sample_ids = getSampleList( sample_list => \@sample_list ); $log->debug( "UA is $ENV{HTTP_USER_AGENT}" ); my $sampleMap = $sbeamsMOD->getSampleMapDisplay( sample_ids => $sample_ids, instance_ids => \%instance_ids, atlas_build_id => $atlas_build_id, link => $link, tr_info => $tr, header_text => "Shows per-experiment expression for $max_peptides most highly observed peptides" ); print "
$sampleMap
" if $htmlmode; # Widget to allow show/hide of sample section ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_samplelist', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my $sampleDisplay = $sbeamsMOD->getSampleDisplay( sample_ids => $sample_ids, 'link' => $link, tr_info => $tr ); print "$sampleDisplay
" if $htmlmode; ############################################################################# #### User Annotations Section # TMF: remove this conditional if/when want to release this feature to production. if ($parameters{'ann'}) { my $hidden_form_fields = qq~ ~; print &printUserAnnotations( protein_name => $protein_name, parameters_href => \%parameters, form_fields => $hidden_form_fields ); } #### If QUERY was not selected, then tell the user to enter some parameters } else { if ($sbeams->invocation_mode() eq 'http') { print "

Select parameters above and press QUERY

\n"; } else { print "You need to supply some parameters to contrain the query\n"; } } } # end handle_request sub get_ortholog_information { my $biosequence = shift; return unless $biosequence->{biosequence_name}; my $sql =<<" END"; SELECT ortholog_group FROM $TBBL_ORTHOLOG WHERE entry_accession = '$biosequence->{biosequence_name}' END my $link = ''; my $sth = $sbeams->get_statement_handle( $sql ); while ( my @row = $sth->fetchrow_array() ) { $link .= qq~  $row[0]\n~; } return $link if $link; $sql =<<" END"; SELECT ortholog_group FROM $TBBL_ORTHOLOG O JOIN $TBAT_SEARCH_KEY SK ON SK.search_key_name = O.entry_accession JOIN $TBAT_BIOSEQUENCE B ON B.biosequence_name = SK.resource_name WHERE B.biosequence_name = '$biosequence->{biosequence_name}' END my $sth = $sbeams->get_statement_handle( $sql ); while ( my @row = $sth->fetchrow_array() ) { $link .= qq~  $row[0]\n~; } return $link; } sub get_mrm_transitions { shift; my $accessions = shift || return []; my $acc_string = "'" . join( "', '", @{$accessions} ) . "'" ; # Project control my @accessible = $sbeams->getAccessibleProjects(); my $projects = join( ",", @accessible ); return '' unless $projects; my $sql =<<" END"; SELECT peptide_accession, modified_peptide_sequence, peptide_charge, q1_mz, q3_mz, q3_ion_label, q3_peak_intensity, collision_energy, retention_time, instrument, CASE WHEN contact_id IS NULL THEN annotator_name ELSE username END AS name, level_name FROM $TBAT_MODIFIED_PEPTIDE_ANNOTATION MPA JOIN $TBAT_PEPTIDE P ON MPA.peptide_id = P.peptide_id JOIN $TBAT_TRANSITION_SUITABILITY_LEVEL TSL ON TSL.transition_suitability_level_id = MPA.transition_suitability_level_id LEFT JOIN $TB_USER_LOGIN UL ON UL.contact_id = MPA.annotator_contact_id WHERE peptide_accession IN ( $acc_string ) AND project_id IN ( $projects ) AND level_score > 0.2 ORDER BY peptide_accession, q1_mz, peptide_charge, level_score DESC, q3_peak_intensity DESC END my @rows = $sbeams->selectSeveralColumns($sql); return \@rows; } sub get_synthesized_peptides { my $accessions = shift || return []; my $acc_string = "'" . join( "', '", @{$accessions} ) . "'" ; # Project control my @accessible = $sbeams->getAccessibleProjects(); my $projects = join( ",", @accessible ); return '' unless $projects; my $sql =<<" END"; SELECT peptide_accession, P.peptide_sequence, peptide_annotation, publication_name, CASE WHEN contact_id IS NULL THEN annotator_name ELSE username END AS name, PA.peptide_sequence FROM $TBAT_PEPTIDE_ANNOTATION PA JOIN $TBAT_PEPTIDE P ON PA.peptide_id = P.peptide_id LEFT JOIN $TB_USER_LOGIN UL ON UL.contact_id = PA.annotator_contact_id LEFT JOIN $TBAT_PUBLICATION PP ON PP.publication_id = PA.publication_id WHERE peptide_accession IN ( $acc_string ) AND project_id IN ( $projects ); END my @rows = $sbeams->selectSeveralColumns($sql); return \@rows; } #+ # Generates a graphical overview of sequence features using bioperl routines # #- sub get_sequence_graphic { ## read passed args, set up variables ## my %args = @_;; my $tmhmm = $args{protein_data}->{transmembrane_topology}; my $id = $args{protein_data}->{biosequence_id}; my $protseq = $args{protein_data}->{biosequence_seq}; my @concat = split( /\*/, $protseq ); $protseq = $concat[0]; my $protlen = length( $protseq ); my $peptide_instance_constraint = $args{peptide_instance_constraint} || ''; my $n_observation_clause ='PI.n_observations'; if($peptide_instance_constraint){ $n_observation_clause = 'PI2.n_observations'; } # Had initially used the peptide query above, but the start/end coordinates # were problematic. Thus we need to do it again. my $sql = qq~ SELECT DISTINCT P.peptide_accession, P.peptide_sequence, PI.n_genome_locations, PM.start_in_biosequence, PM.end_in_biosequence, $n_observation_clause FROM $TBAT_PEPTIDE_INSTANCE PI INNER JOIN $TBAT_PEPTIDE P ON ( PI.peptide_id = P.peptide_id ) LEFT JOIN $TBAT_PEPTIDE_MAPPING PM ON ( PI.peptide_instance_id = PM.peptide_instance_id ) $peptide_instance_constraint INNER JOIN $TBAT_ATLAS_BUILD AB ON ( PI.atlas_build_id = AB.atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) WHERE AB.atlas_build_id IN ( $args{build_id} ) AND PM.matched_biosequence_id = $id ORDER BY P.peptide_accession ~; my @rows = $sbeams->selectSeveralColumns( $sql ); # Set up hash for storing point by point coverage info my %coverage; for my $idx ( 1..$protlen ) { $coverage{$idx} = 0; } # Define color mapping for various features. my %colors = ( Signal => 'cornflowerblue', Anchor => 'lightskyblue', Transmembrane => 'greenyellow', Intracellular => 'coral', Extracellular => 'mediumseagreen', Coverage => 'beige', Translated => 'gainsboro', Observed => $args{obs_color} || 'firebrick' , Glycopeptide => 'goldenrod', Difficult => 'bisque' ); # Define CSS classes my $sp = ' ' x 4; my $style =<<" END_STYLE"; END_STYLE ## Create main panel + sequence 'ruler' ## my $panel = Bio::Graphics::Panel->new( -length => $protlen + 2, -key_style => 'between', -width => 800, -empty_tracks => 'suppress', -pad_top => 5, -pad_bottom => 5, -pad_left => 10, -pad_right => 50 ); # open( FIL, ">/tmp/colors.html" ); # print FIL "\n"; # my @c = $panel->color_names(); # for my $c ( @c ) { print FIL "\n"; } # print FIL "
$sp$c$sp$sp
\n"; # close FIL; my $ruler = Bio::SeqFeature::Generic->new( -end => $protlen, -start => 2, -display_name => 'Sequence Position'); my $sequence = Bio::SeqFeature::Generic->new( -end => $protlen, -start => 1, -display_name => 'Sequence Position'); ## Generate observed peptide tracks ## # color peptides based on n_obs my $max = 1; my $threshold = 20; my $lten = log(10); foreach my $row (@rows) { $max = ( $max > $row->[5] ) ? $max : $row->[5]; if ( $max > $threshold ) { $max = $threshold; last; } } $max = log($max)/$lten; # Loop over peptides my @peptides; my %pep_info; my ( $multi, $single ); foreach my $row (@rows) { my $acc = $row->[0]; my $seq = $row->[1]; my $score = ( $row->[2] > 1 ) ? 0 : ( $row->[5] <= $threshold ) ? (log($row->[5])/$lten) + 0.3 : $max; my $start = $row->[3]; my $stop = $row->[4]; # accession isn't necessarily unique, need compound key... my $ugly_key = $acc . '::::' . $start . $stop; my $f = Bio::SeqFeature::Generic->new( -start => $start, -end => $stop, -primary => $acc, -display_name => $ugly_key, -score => $score ); push @peptides, $f; # Record the coverage for this peptide for my $idx ( $start..$stop ) { $coverage{$idx}++; } # count peptide type, to build appropriate legend if ( $score ) { $single++; } else { $multi++; } $pep_info{$ugly_key} = "$start - $stop, $seq ($row->[5] obs)"; } ## Generate Signal P related tracks ## my @signalp; my $seqtype = ''; my ( $anchor, $signal ); my $signal_peptide_coords = {}; my $sp_info = scalar( @sp_rows ); if ( $args{protein_data}->{has_signal_peptide} || $sp_info ) { if ( $args{protein_data}->{signal_peptide_is_cleaved} =~ /y/i || $sp_info ) { $seqtype = 'Signal'; $signal++; } else { $seqtype = 'Anchor'; $anchor++; } my $end = ( $sp_info ) ? $sp_rows[1] : $args{protein_data}->{signal_peptide_length}; $sp_rationale = ( $sp_info ) ? 'Signal peptide annoted in SwissProt' : 'Signal sequence predicted by Signal P, cleaved in mature protein'; # Cache signal peptide info for possible 'unlikely' designation. if ( $seqtype eq 'Signal' ) { $signal_peptide_coords->{start} = 1; $signal_peptide_coords->{end} = $end; $signal_peptide_coords->{seq} = substr( $protseq, 0, $end ); $signal_peptide_coords->{rationale} = $sp_rationale; } my $f = Bio::SeqFeature::Generic->new( -start => 1, -end => $end, -display_name => $seqtype , -label => 0 ); push @signalp, $f; } ## End Signal P related tracks ## ## Generate Glyco motif related tracks ## my @glyco; if ( $args{glyco} ) { my $seqtype = 'Glycopeptide'; for my $idx ( sort { $a <=> $b } keys( %{$args{glyco}} ) ) { my $peptide = $args{glyco}->{$idx}; my $num_sites = $peptide =~ tr/\*/\*/; my $p_len = length( $peptide ) - $num_sites; # $log->debug( "Peptide $peptide, starting at $idx, has $num_sites sites and is $p_len amino acids long" ); my $f = Bio::SeqFeature::Generic->new( -start => $idx, -end => $idx + $p_len - 1, -display_name => $seqtype , -primary => $peptide, -label => $peptide ); push @glyco, $f; } } ## End Glyco motif related tracks ## # Calculate the coverage 'domains' # Made this global to use elsewhere... @coverage; my @uncovered; # Keep track of coverage coordinates my $cstart = 1; my $cend = 0; # Also keep track of non-coverage coordinates my $ncstart = 1; my $ncend = 1; # Not in coverage to begin with my $in_coverage = 0; # What is 1-based index of first observed aa, used for expected coverage. my $min_covered_aa = 0; # %coverage is hash of seq depth keyed by coordinate my $last_key; for my $key ( sort{ $a <=> $b } keys( %coverage ) ) { # Cache min covered aa if not yet set and index is covered $min_covered_aa = $key if ( !$min_covered_aa && $coverage{$key} ); if ( !$in_coverage ) { # Not in coverage if ( $coverage{$key} ) { $cstart = $key; $cend = $key; $in_coverage = 1; # Store non-coverage info $ncend = $key - 1; push @uncovered, { start => $ncstart, end => $ncend } if $ncstart && $ncend; } else { $in_coverage = 0; # No-op } } else { # Already in coverage if ( $coverage{$key} ) { # Start stays the same, increment end $cend = $key; $in_coverage = 1; } else { $in_coverage = 0; # Its showtime! my $f = Bio::SeqFeature::Generic->new( -start => $cstart, -end => $cend, -primary => 'Coverage', -display_name => 'Coverage' ); push @coverage, $f; $cstart = 0; $cend = 0; # Dropped out of coverage, cache ncstart $ncstart = $key; } } $last_key = $key; } if ( !$in_coverage ) { push @uncovered, { start => $ncstart, end => $last_key } if $ncstart && $last_key; } if ( $cend ) { my $f = Bio::SeqFeature::Generic->new( -start => $cstart, -end => $cend, -primary => 'Coverage', -display_name => 'Coverage' ); push @coverage, $f; } # Keep track of how much sequence might be hard to observe my @unlikely_to_observe; # Cache and return to caller for seq annotation my @difficult; # Use to make 'track' on sequence graphic # Original logic discounted entire signal peptide as 'unlikely' if any of # the sp was actually seen. To restore this behaviour set # $penalize_obs_in_signal = 1 my $penalize_obs_in_signal = 0; if ( $signal_peptide_coords && $signal_peptide_coords->{end} ) { if ( $min_covered_aa > $signal_peptide_coords->{end} ) { push @unlikely_to_observe, $signal_peptide_coords; # $log->debug( "1 start is $signal_peptide_coords->{start}, end is $signal_peptide_coords->{end}" ); my $f = Bio::SeqFeature::Generic->new( -start => $signal_peptide_coords->{start}, -end => $signal_peptide_coords->{end}, -primary => 'Difficult', -display_name => 'Difficult' ); push @difficult, $f; } elsif ( $penalize_obs_in_signal ) { # We will not consider the signal peptide to be unlikely $log->info( "Found sequence in a predicted signal sequence for $args{protein_data}->{biosequence_name}, penalizing" ); } elsif ( $min_covered_aa != 1 ) { # Signal peptide is still considered unlikely to be observed $log->info( "Allowing obs signal sequence as unlikely for $args{protein_data}->{biosequence_name}" ); # Not strictly correct, but useful to pass this information on $signal_peptide_coords->{end} = $min_covered_aa - 1 if $min_covered_aa; my $signal_seq = substr( $protseq, 0, $signal_peptide_coords->{end} ); # $log->debug( "2 start is $signal_peptide_coords->{start}, end is $signal_peptide_coords->{end}" ); push @unlikely_to_observe, { start => $signal_peptide_coords->{start}, end => $signal_peptide_coords->{end}, seq => $signal_seq }; my $f = Bio::SeqFeature::Generic->new( -start => $signal_peptide_coords->{start}, -end => $signal_peptide_coords->{end}, -primary => 'Difficult', -display_name => 'Difficult' ); push @difficult, $f; } } # my $protseq = $args{protein_data}->{biosequence_seq}; for my $cpair ( @uncovered ) { if ( $signal_peptide_coords && $signal_peptide_coords->{end} ) { # $log->debug( "Start is $cpair->{start}, End is $cpair->{end}, seq is $cpair->{seq}" ); next if $cpair->{end} <= $signal_peptide_coords->{end}; } my $uncovered_length = $cpair->{end} - $cpair->{start} + 1; my $seq = substr( $protseq, $cpair->{start} - 1, $uncovered_length ); $cpair->{seq} = $seq; my $tryptics = $biolink->do_tryptic_digestion( aa_seq => $seq ); my $index = $cpair->{start}; for my $tryp ( @$tryptics ) { # anchor if ( length( $tryp ) < MIN_OBS_LENGTH ) { my $peptide_end = $index + length($tryp) - 1; push @unlikely_to_observe, { start => $index, end => $peptide_end, rationale => 'Short peptide', seq => $tryp }; my $f = Bio::SeqFeature::Generic->new( -start => $index, -end => $peptide_end, -primary => 'Difficult', -display_name => 'Difficult' ); push @difficult, $f if $index && $peptide_end; } elsif ( length( $tryp ) > MAX_OBS_LENGTH ) { my $peptide_end = $index + length($tryp) - 1; push @unlikely_to_observe, { start => $index, end => $peptide_end, rationale => 'Long peptide', seq => $tryp }; my $f = Bio::SeqFeature::Generic->new( -start => $index, -end => $peptide_end, -primary => 'Difficult', -display_name => 'Difficult' ); push @difficult, $f if $index && $peptide_end; } else { # $log->debug( "Might see $tryp" ); } $index += length( $tryp ); } } # Cache non-coverage sequence $args{protein_data}->{_non_coverage} = \@unlikely_to_observe; ## Generate TMHMM-derived tracks ## # parse domain info my $tm_info = $biolink->get_transmembrane_info ( tm_info => $tmhmm, end => $protlen ); # loop over domains, create features for each my @intra; my @extra; my @tmm; foreach my $region (@$tm_info) { my $primary = $region->[0]; my $tag = $primary; my $f = Bio::SeqFeature::Generic->new( -start => $region->[1], -end => $region->[2] , -primary => $primary , -display_name => $primary , -tag => { $primary => $tag } ); if ( $tag =~ /Intracellular/i ) { push @intra, $f; } elsif ( $tag =~ /Extracellular/i ) { push @extra, $f; } else { push @tmm, $f; } } # Only add intra/extra if @tmm or @signalp unless ( @tmm ) { @intra = (); @extra = (); } ## Add all the tracks to the panel ## $panel->add_track( $ruler, -glyph => 'anchored_arrow', -tick => 2, -height => 8, -key => 'Sequence Position' ); # Add observed peptide track $panel->add_track( \@peptides, -glyph => 'graded_segments', -bgcolor => $colors{Observed}, -fgcolor => 'black', -font2color => '#882222', -key => 'Observed Peptides', -bump => 1, -height => 8, -label => sub { my $f = shift; my $n = $f->display_name(); $n =~ s/(.*)::::.*/$1/g; return $n }, -min_score => 0, -max_score => $max ); ## Add glyco track (mebbe) $panel->add_track( \@glyco, -glyph => 'segments', -bgcolor => $colors{Glycopeptide}, -fgcolor => 'black', -key => 'Theoretical NXS/T peptides', -bump => +1, -height => 8, -legend => 1, -label => 0, ) if @glyco; # Add signalP track my $sigtype = ( $anchor ) ? 'Anchor' : 'Signal'; my $qualifier = ( $sp_info ) ? '' : '(predicted)'; $panel->add_track( \@signalp, -glyph => 'segments', -bgcolor => $colors{$sigtype}, -fgcolor => 'black', -key => $sigtype . ' Sequence ' . $qualifier, -bump => +1, -height => 8, -legend => 1, -label => 0, ); my %tracs = ( Intracellular => \@intra, Extracellular => \@extra, Transmembrane => \@tmm, ); # Add tmhmm-related tracks for my $t ( qw( Intracellular Transmembrane Extracellular ) ) { my %legend = ( Transmembrane => 'Transmembrane Domain', Extracellular => 'Outside membrane', Intracellular => 'Inside membrane'); $panel->add_track( $tracs{$t}, -glyph => 'segments', -bgcolor => $colors{$t}, -fgcolor => 'black', -font2color => 'red', -key => "$legend{$t} (predicted)", -bump => +1, -height => 8, -legend => 1, -label => 0, ); } # Add coverage track $panel->add_track( \@coverage, -glyph => 'segments', -bgcolor => $colors{Coverage}, -fgcolor => 'black', -key => 'Sequence Coverage', -connector => 'solid', -bump => 0, -height => 8, -label => 0, # sub { my $f = shift; return $f->display_name}, ); # Add difficult track $panel->add_track( \@difficult, -glyph => 'segments', -bgcolor => $colors{Difficult}, -fgcolor => 'black', -key => 'Unlikely (theoretical)', -connector => 'solid', -bump => 0, -height => 8, -label => 0, # sub { my $f = shift; return $f->display_name}, ) if @difficult; $panel->add_track( $ruler, -glyph => 'anchored_arrow', -tick => 2, -height => 8, -key => 'Sequence Position' ); # $panel->add_track( $sequence, # -glyph => 'segments', # -bgcolor => $colors{Translated}, # -key => 'Translated Sequence', # -tick => 2 ); # Set up graphic legend my @legend; my $sig_title = ( $sp_info ) ? 'Annotated signal peptide from direct observation or prediction' : 'Signal peptide predicted from amino acid sequence'; my %title = ( obs_sing => "Peptides which map to a single genome location", obs_multi => "Peptides which match to 2 or more genome locations", sig_seq => "Signal peptide predicted from amino acid sequence", anc_seq => "Anchor sequence predicted from amino acid sequence", tm_dom => "Transmembrane region predicted from amino acid sequence", in_dom => "Predicted orientation inside, (intracellular for cell membrane proteins)", ex_dom => "Predicted orientation outside, (extracellular for cell membrane proteins)", pep_cov => "Cumulative sequence coverage", glyco => "Potential Glycosylation Site", unlikely_obs => "tryptic peptides ≤ " . MIN_OBS_LENGTH . " residues or signal peptides" ); push @legend, "
$sp Observed peptide with single genome mapping
$sp Observed peptide with ambiguous genome mapping
$sp $sp_rationale
$sp Anchor sequence predicted by Signal P
$sp Transmembrane domain predicted by TMHMM
$sp Predicted as inside membrane by TMHMM
$sp Predicted as outside membrane by TMHMM
$sp Protein coverage by observed peptides
$sp Peptides unlikely to be observed
$sp Potential N-glycosylated peptide
$sp Entire Translated Protein Sequence
$link
Sorry No Img $map
$legend
$style EOG return $graphic; } sub getPeptideCount { my %args = @_; $parameters{variant_type} ||= 'init_met'; my $bioseq = $args{biosequence}; my $seq = $bioseq->{biosequence_seq}; my $acc = $bioseq->{biosequence_accession}; my $grp = $bioseq->{group_rep}; my $level = $bioseq->{level_name}; my $nobs = $bioseq->{n_observations}; # my $grp_id = $bioseq->{protein_group_number}; # my $desc = $bioseq->{biosequence_desc}; my $tobs = 0; my $rs = $args{resultset_ref}; my %peptides; my %clean_peptides; my %support; my %refute; my @neutral; my @nomap; for my $row ( @{$rs->{data_ref}} ) { $tobs += $row->[7]; # $peptides{$row->[2]} = { n_map => $row->[11], n_obs => $row->[7], group_number => $grp_id, desc => $desc }; my $pepion = $row->[15] . '_' . $row->[17]; my $n_samples = scalar(split( /,/, $row->[12] ) ); $peptides{$pepion} = { n_map => $row->[11], n_obs => $row->[7], n_mod_obs => $row->[16], mod_seq => $row->[15], 'N Samples' => $n_samples, 'samples' => $row->[12] }; $clean_peptides{$row->[2]}++; my $posns = $sbeamsMOD->get_site_positions( seq => $seq, pattern => $row->[2] ); my $map = ''; my $posn = -1; if ( !scalar( @{$posns} ) ) { # Non-mapper, could be I/L issue, or maybe a SNP my $lseq = $seq; $lseq =~ s/I/L/g; my $lpep = $row->[2]; $posns = $sbeamsMOD->get_site_positions( seq => $lseq, pattern => $lpep ); if ( !scalar( @{$posns} ) ) { $map = 'no'; } else { $map = 'lneutral'; $posn = $posns->[0] + 1; } } else { $map = 'yes'; $posn = $posns->[0] + 1; } $peptides{$pepion}->{posn} = $posn; $peptides{$pepion}->{mapping} = $map; $peptides{$pepion}->{charge} = $row->[17]; for my $attr ( 'peptide_accession', 'preceding_residue', 'peptide_sequence', 'following_residue', 'best_probability', 'n_observations', 'SSRCalc_relative_hydrophobicity', 'n_genome_locations', 'modified_peptide_instance_id' ) { $peptides{$pepion}->{$attr} = $row->[$rs->{column_hash_ref}->{$attr}]; } if ( $parameters{variant_type} =~ /init_met/i ) { if ( $posn == 1 ) { $refute{$row->[2]}++; } elsif ( $posn == 2 ) { $support{$row->[2]}++; } elsif ( $map eq 'no' ) { push @nomap, $row->[2]; } else { push @neutral, $row->[2]; } } } my $pcnt = scalar( keys( %clean_peptides) ); my %results = ( support => [keys(%support)], refute => [keys(%refute)], peptides => \%peptides, neutral => \@neutral, nomap => \@neutral, tcount => $tobs, acount => $nobs, group => $grp, level => $level ); return \%results; } sub get_peptide_table { my $mode = shift; my $pcounts = shift; my $members = shift; return '' unless scalar( keys( %{$members} ) ); my @headings = ( 'peptide_accession', 'preceding_residue', 'peptide_sequence', 'following_residue', 'mod_seq', 'posn', 'n_genome_locations', 'best_probability', 'SSRCalc_relative_hydrophobicity', 'n_mod_obs', 'N Samples', 'charge' ); my %head2display = ( peptide_accession => 'Accession', preceding_residue => 'Pre AA', following_residue => 'Fol AA', SSRCalc_relative_hydrophobicity => 'SSR', mod_seq => 'Modified Sequence', peptide_sequence => 'Sequence', n_genome_locations => '# Gen Loc', n_mod_obs => 'N Obs', charge => 'Chg', ); my @display_headings; for my $head ( @headings ) { my $dhead = $head2display{$head} || $head; push @display_headings, $dhead; } # my @table_data = ( \@headings ); my @table_data = ( \@display_headings ); my $pep_link_base = "PEPTIDENAME"; my %seen; # If we want to coalesce multiple charge states we can do it here (or in get_pcounts) # my %pepdata; for my $pepion ( keys( %{$pcounts->{peptides}} ) ) { # my ( $modpep, $charge ) = split( /_/, $pepion ); # die Dumper( $pcounts->{peptides}->{$pepion} ); # if ( $pepdata{$modpep} ) { # } else { # } } for my $modpep ( sort{ $pcounts->{peptides}->{$a}->{peptide_sequence} <=> $pcounts->{peptides}->{$b}->{peptide_sequence} || $pcounts->{peptides}->{$a}->{mod_seq} <=> $pcounts->{peptides}->{$b}->{mod_seq} || $pcounts->{peptides}->{$a}->{n_obs} <=> $pcounts->{peptides}->{$b}->{n_obs} } keys( %{$pcounts->{peptides}} ) ) { next unless $members->{$pcounts->{peptides}->{$modpep}->{peptide_sequence}}; my @row; for my $head ( @headings ) { if ( $head eq 'peptide_accession' && !$seen{$pcounts->{peptides}->{$modpep}->{peptide_sequence}} ) { my $pep_link = $pep_link_base; $pep_link =~ s/PEPTIDENAME/$pcounts->{peptides}->{$modpep}->{$head}/g; $pcounts->{peptides}->{$modpep}->{$head} = $pep_link; } elsif ( $head eq 'n_genome_locations' && $pcounts->{peptides}->{$modpep}->{$head} > 1 && !$seen{$pcounts->{peptides}->{$modpep}->{peptide_sequence}} ) { $pcounts->{peptides}->{$modpep}->{$head} = "$pcounts->{peptides}->{$modpep}->{$head} "; } # $seen{$pcounts->{peptides}->{$modpep}->{peptide_sequence}}++; push @row, $pcounts->{peptides}->{$modpep}->{$head}; } push @table_data, \@row; } my ( $table, $rs_name ) = $sbeamsMOD->encodeSectionTable( header => 1, width => '600', rows => \@table_data, rows_to_show => 50, max_rows => 50, help_text => '', align => [ qw( left center left center right right right right right right right right) ], bg_color => '#EAEAEA', sortable => 0, table_id => '$mode Evidence', ); return $table; } sub get_heatmap { my $pcounts = shift; my %evidence; my @ordered_peptides; for my $pep ( sort(@{$pcounts->{support}}),sort(@{$pcounts->{refute}}) ) { $evidence{$pep}++; push @ordered_peptides, $pep; } my %mpi; for my $pepion ( keys( %{$pcounts->{peptides}} ) ) { next unless $evidence{$pcounts->{peptides}->{$pepion}->{peptide_sequence}}; $mpi{$pcounts->{peptides}->{$pepion}->{modified_peptide_instance_id}}++; } my $mpi_str = join( ',', keys( %mpi ) ); my $sql = qq~ select DISTINCT sample_tag, S.sample_id, modified_peptide_sequence, peptide_charge, MPIS.n_observations FROM $TBAT_SAMPLE S JOIN $TBAT_ATLAS_SEARCH_BATCH ASB ON ASB.sample_id = S.sample_id JOIN $TBAT_MODIFIED_PEPTIDE_INSTANCE_SEARCH_BATCH MPIS on ASB.atlas_search_batch_id = MPIS.atlas_search_batch_id JOIN $TBAT_MODIFIED_PEPTIDE_INSTANCE MPI on MPI.modified_peptide_instance_id = MPIS.modified_peptide_instance_id WHERE MPI.modified_peptide_instance_id IN ( $mpi_str ) ORDER BY modified_peptide_sequence, peptide_charge ~; my $sth = $sbeams->get_statement_handle( $sql ); my %samples; my %pepions; my %peptides; my %seen_peptide; while ( my @row = $sth->fetchrow_array() ) { my $pepion = $row[2] . '_' . $row[3]; my $clean_seq = $row[2]; $clean_seq =~ s/\[\d+\]//g; $clean_seq =~ s/^n//g; $pepions{$pepion}++; $samples{$row[0]} ||= {}; $samples{$row[0]}->{$pepion} = $row[4]; $peptides{$clean_seq} ||= []; push @{$peptides{$clean_seq}}, $pepion unless $seen_peptide{$pepion}++; } my @samples = sort( keys( %samples ) ); my @pepions; my %seen; for my $pep ( @ordered_peptides ) { for my $pion ( @{$peptides{$pep}} ) { push @pepions, $pion unless $seen{$pion}++; } } my $max = 0; for my $sample ( @samples ) { for my $pepion ( @pepions ) { if ( $samples{$sample}->{$pepion} ) { $samples{$sample}->{$pepion} = 1 + log( $samples{$sample}->{$pepion} ); } else { $samples{$sample}->{$pepion} = 0; } $max = $samples{$sample}->{$pepion} if $max < $samples{$sample}->{$pepion}; } } my $num_colors = ( $max < 64 ) ? 64 : $max; my $heatmap = qq~ ~; my $content .= qq~ $heatmap

~; return $content; } ############################################################################### # displayAnnotatedSequence ############################################################################### sub displayAnnotatedSequence { my %args = @_; my $SUB_NAME = 'displayAnnotatedSequence'; #### Decode the argument list my $resultset_ref = $args{'resultset_ref'} || die "ERROR[$SUB_NAME]: resultset_ref not passed"; my $biosequence = $args{'biosequence'} || die "ERROR[$SUB_NAME]: biosequence not passed"; my $line_length = $args{'line_length'} || 70; my $word_length = $args{'word_length'} || 10; my $enzyme = $args{'enzyme'} || ''; my $protein_structure = $args{'protein_structure'}; my $total_observations = $args{'total_observations'}; #### Don't display unless HTML return unless ($sbeams->output_mode() eq 'html'); # @unlikely_to_observe, { start => $index, end => $peptide_end, rationale => 'Short peptide' }; # _non_coverage #### Get the hash of indices of the columns my %col = %{$resultset_ref->{column_hash_ref}}; #### Loop over all the peptides my $data_ref = $resultset_ref->{data_ref}; my @peptides = @{$args{peptides}}; # Widget to allow show/hide of sequence display section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getprotein_sequence', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my $section_header = $sbeamsMOD->encodeSectionHeader( text=>'Sequence', link =>$link, ); print qq~ $section_header \n"; $log->info( "get seq vars: " . time() ); my $html_seq = $sbeamsMOD->get_html_seq_vars( seq => $sequence, accession => $biosequence->{biosequence_name}, tags => $tags, peptides => \@peptides, show_clustal => $args{show_clustal} ); print "\n"; print ""; $log->info( "Done: " . time() ); if ( $html_seq->{clustal_display} ) { # If we have any variants print "\n"; print qq~ ~; my $variant_list = ''; my $cnt; if ( $html_seq->{has_variants} ) { my $peptide_str = join( ',', @peptides ); my $snp_form = qq~ (Displaying dbSNP All SNPs) ~; print qq~ \n ~; for my $var ( @{$html_seq->{variant_list}} ) { $cnt++; $variant_list .= '"; $var_toggle_link = qq~ Show More\n ~; } print qq~ ~; print "\n"; print "\n"; print "\n"; } } print "
~ if $htmlmode; my $sequence = $biosequence->{biosequence_seq}; my %start_positions; my %end_positions; foreach my $label_peptide (@peptides) { if ($label_peptide) { my $pos = -1; while (($pos = index($sequence,$label_peptide,$pos)) > -1) { $start_positions{$pos}++; $end_positions{$pos+length($label_peptide)}++; $pos++; } } } #### If transmembrane regions topology has been supplied, find the TMRs my %tmr_start_positions; my %tmr_end_positions; my %tmr_color; my $notes_buffer = ''; if ($protein_structure->{transmembrane_topology}) { my $start_side = substr($protein_structure->{transmembrane_topology},0,1); my $tmp = substr($protein_structure->{transmembrane_topology},1,9999); my @regions = split(/[io]/,$tmp); foreach my $region (@regions) { my ($start,$end) = split(/-/,$region); $tmr_start_positions{$start-1} = $start_side; $tmr_color{$start-1} = 'orange'; if ($start_side eq 'i') { $start_side = 'o'; } elsif ($start_side eq 'o') { $start_side = 'i'; } else { $start_side = '?'; } $tmr_end_positions{$end} = $start_side; $tmr_color{$end} = 'orange'; } $notes_buffer .= "(Used TMR topology string: $protein_structure->{transmembrane_topology})
\n"; #print "[See full TMHMM result]
\n"; } #### If there's a signal peptide, mark it as a blue if ($protein_structure->{has_signal_peptide} eq 'Y') { $tmr_start_positions{0} = ''; $tmr_color{0} = 'blue'; $tmr_end_positions{$protein_structure->{signal_peptide_length}} = ''; $tmr_end_positions{$protein_structure->{signal_peptide_length}} = '/' if ($protein_structure->{signal_peptide_is_cleaved} eq 'Y'); $tmr_color{$protein_structure->{signal_peptide_length}} = 'orange'; $notes_buffer = "(signal peptide: Y, length: $protein_structure->{signal_peptide_length}, cleaved: $protein_structure->{signal_peptide_is_cleaved}, probability: $protein_structure->{has_signal_peptide_probability})\n".$notes_buffer; } # print "
\n";


  my @concat = split( /\*/, $sequence );
  my $concat_length = length($concat[0]);
  my $seq_length = length($sequence);
	$seq_length = $concat_length;
#	die ( "orig is $seq_length, becomes $concat_length" );

  my $i = 0;
  my $color_level = 0;
  my $observed_residues = 0;

  my @annotation_lines;


  while ($i < $seq_length) {

    if ($end_positions{$i}) {
      if ($color_level == $end_positions{$i}) {
#	print "";
      }
      $color_level -= $end_positions{$i} unless ($color_level == 0);
    }


    if ($start_positions{$i}) {
      if ($color_level == 0) {
#	print "";
      }
      $color_level += $start_positions{$i};
    }

    if ($color_level) {
      $observed_residues++;
    }

#    print substr($sequence,$i,1);
    $i++;
    if ($i %$line_length == 0) {
#      print "\n";
    } elsif ($enzyme && $enzyme eq 'trypsin') {
      if (substr($sequence,$i-1,2) =~ /[RK][A-O,Q-Z]/) {
#	print " ";
      }
    } elsif ($i % $word_length == 0) {
#      print " ";
    }
  }


  if ($color_level) {
#    print "";
  }

  my $cnt = 0;
  for my $frag ( @{$biosequence->{_non_coverage}} ) {
    $cnt += length( $frag->{seq} );
  }
  
# TMF: added this statement for special need
# print STDERR int($observed_residues/$seq_length*1000)/1000,"\t",int(($observed_residues)/($seq_length-$cnt)*1000)/1000,"\n"; 
#  print "\n\n";
#  print "Protein Coverage = ",int($observed_residues/$seq_length*1000)/10,"%";
#  $log->debug( "OBS: $observed_residues, SEQLEN: $seq_length, CNT $cnt" );
#  print ' (' . int(($observed_residues)/($seq_length-$cnt)*1000)/10,"% of likely observable sequence)";

  my %observed = (    start => [],
                        end => [],
                      class => 'pa_observed_sequence',
                     number => 0 );
  
  for my $f ( @coverage ) {
#    $log->debug( "coverage starts at " . $f->start() . " and ends at " . $f->end() );
    push @{$observed{start}}, $f->start() - 1;
    push @{$observed{end}}, $f->end() - 1;
    $observed{number}++;
  }
  my $tags = $sbeamsMOD->make_tags( \%observed );

  if ( $args{glyco} ) {
    my %gsite = ( start => [], end => [], class => 'pa_glycosite', number => 0 );
    my $sites = $sbeamsMOD->get_site_positions( seq => $sequence,
                                          pattern => 'N[^P][S|T]' );
    for my $site ( @$sites ) {
      push @{$gsite{start}}, $site;
      push @{$gsite{end}}, $site + 2;
      $gsite{number}++;
    }
    $tags = $sbeamsMOD->make_tags( \%gsite, $tags );

    my %predicted = ( start => [], end => [], class => 'pa_predicted_pep', number => 0 );
    for my $k ( sort { $a <=> $b } keys( %{$args{glyco}} ) ) {
      push @{$predicted{start}}, $k - 1;
      my $peptide = $args{glyco}->{$k};
      $peptide =~ s/\W//g;
      push @{$predicted{end}}, $k + length( $peptide ) - 2;
      $predicted{number}++;
    } 
    $tags = $sbeamsMOD->make_tags( \%predicted, $tags ); 
   
  }


#  print "
$html_seq->{seq_display}
Protein Coverage = ",int($observed_residues/$seq_length*1000)/10,"% "; my $likely = ( $seq_length-$cnt ) ? int(($observed_residues)/($seq_length-$cnt)*1000)/10 : 0; print " ($likely% of likely observable sequence)
 
Annotated Variants: $snp_form
$variant_list
$var_toggle_link
 
Variants in Sequence Context:
$html_seq->{clustal_display}
\n\n"; #print "

Notes:
\n$notes_buffer" if ($notes_buffer); $args{show_aa_content}++; if ( $args{show_aa_content} ) { my $tot = length( $sequence ); my @aa = split( '', $sequence ); my %aa; # Make sure we get the common AA for my $aa ( qw( A C D E F G H I K L M N P Q R S T V W Y ) ) { $aa{$aa} = 0; } for my $aa ( @aa ) { $aa{$aa}++; } my %colors = ( 10 => '#555566', 9 => '#666677', 8 => '#777788', 7 => '#888899', 6 => '#9999aa', 5 => '#aaaabb', 4 => '#bbbbcc', 3 => '#ccccdd', 2 => '#ddddee', 1 => '#eeeeff', 0 => '#ffffff' ); my $name = 'AA:'; my $count = 'Cnt:'; my $perc = 'Perc:'; for my $aa ( sort( keys( %aa ) ) ) { my $pct = sprintf( "%0.1f", 100*($aa{$aa}/$tot) ); my $color_key = int( $pct ); my $font_color = ( $color_key > 6 ) ? 'white' : 'black'; $color_key = 10 if $color_key > 10; # $name .= "$aa"; $name .= "$aa"; $count .= "$aa{$aa}"; $perc .= "$pct"; } $name .= ''; $count .= ''; $perc .= ''; my $slith = 'slith'; $slith =<<" END";
$name $count $perc

END print $slith; } } # end displayAnnotatedSequence ############################################################################### # getSamples ############################################################################### sub getSamples { my %args = @_; my $SUB_NAME = 'getSamples'; my $sql = qq~ SELECT sample_id,sample_title FROM $TBAT_SAMPLE WHERE record_status != 'D' ORDER BY sample_id ~; my @samples = $sbeams->selectSeveralColumns($sql); return \@samples; } # end getSamples sub getSampleList { my %args = @_; #### Decode the argument list return [] unless $args{sample_list}; # each row has an accession, sample_string pair. my %observed_samples; foreach my $row (@{$args{sample_list}}) { my $observed_sample_list = $row->[1]; my @all = split(/[,;]/,$observed_sample_list); foreach my $element ( @all ) { $observed_samples{$element}++; } } my @keys = keys( %observed_samples ); return \@keys; } ############################################################################### # getProteinStructure ############################################################################### sub getProteinStructure { my %args = @_; my $SUB_NAME = 'getProteinStructure'; #### Decode the argument list my $biosequence_id = $args{'biosequence_id'} || die "ERROR[$SUB_NAME]: biosequence_id not passed"; #### Define query to get information my $sql = qq~ SELECT n_transmembrane_regions,transmembrane_class,transmembrane_topology, has_signal_peptide,has_signal_peptide_probability, signal_peptide_length,signal_peptide_is_cleaved FROM $TBAT_BIOSEQUENCE_PROPERTY_SET WHERE biosequence_id = $biosequence_id ~; my @rows = $sbeams->selectHashArray($sql); if (scalar(@rows) != 1) { my %tmp = (); return(\%tmp); } return($rows[0]); } sub get_table_help { my $name = shift; return '' unless $name; my @entries; my $hidetext; my $showtext; my $heading; my $description; if ( $name eq 'observed_peptides' ) { @entries = ( { key => "Peptide Accession", value => 'Peptide Atlas accession number, beginning with PAp followed by 9 digits.' }, { value => 'Preceding (towards the N terminus) amino acid', key => 'Pre AA' }, { value => 'Amino Acid sequence of this peptide', key => 'Peptide Sequence'}, { value => 'Following (towards the C terminus) amino acid', key => 'Fol AA' }, { value => 'Score derived from peptide probability, EOS, and sequence characteristics such as missed cleavage [MC] or semi-tryptic [ST], or
multiple genome locations [MGL]. These are annotated in red as shown.', key => 'Suitability Score' }, { value => 'Highest PeptideProphet probability for this observed sequence', key => 'Best Prob' }, { value => '', key => 'Best Adjusted Prob'}, { value => 'Total number of observations in all modified forms and charge states', key => 'N Obs' }, { value => 'Empirical Observability Score', key => 'EOS' }, { value => 'SSRCalc Relative Hydrophobicity score', key => 'RHS' }, { value => 'Number of proteins in the reference database to which this peptide maps', key => 'N Protein Mappings' }, { value => 'Number of discrete genome locations which encode this amino acid sequence', key => 'N Genome Locations' }, { value => 'Samples in which this sequence was seen', key => 'Sample IDs' }, { value => 'Observed peptides of which this peptide is a subsequence', key => 'Parent Peptides' }, ); $showtext = 'show column descriptions'; $hidetext = 'hide column descriptions'; $heading = 'Observed Peptides'; $description= 'Peptides observed in MS/MS experiments'; } elsif ( $name eq 'annotated_transitions' ) { @entries = ( { key => 'Sequence', value => 'Amino acid sequence of detected pepide, including any mass modifications.' }, { key => 'Charge', value => 'Charge on Q1 (precursor) peptide ion.' }, { key => 'q1_mz', value => 'Mass to charge ratio of precursor peptide ion.' }, { key => 'q3_mz', value => 'Mass to charge ratio of fragment ion.' }, { key => 'Label', value => 'Ion-series designation for fragment ion (Q3).' }, { key => 'Intensity', value => 'Intensity of peak in CID spectrum' }, { key => 'CE', value => 'Collision energy, the kinetic energy conferred to the peptide ion and resulting in peptide fragmentation. (eV)' }, { key => 'RT', value => 'Peptide retention time( in minutes ) in the LC/MS system.' }, { key => 'SSRCalc', value => "Sequence Specific Retention Factor provides a hydrophobicity measure for each peptide using the algorithm of Krohkin et al. Version 3.0 [more]" }, { key => 'Instr', value => 'Model of mass spectrometer on which transition pair was validated.' }, { key => 'Annotator', value => 'Person/lab who contributed validated transition.' }, { key => 'Quality', value => 'Crude scale of quality for the observation, currently one of Best, OK, and No. ' }, ); $showtext = 'show column descriptions'; $hidetext = 'hide column descriptions'; $heading = 'Annotated Tranitions'; $description= 'Contributed Q1/Q3 transition pairs for SRM experiments'; } return unless @entries; my $help = $sbeamsMOD->get_table_help_section( name => $name, description => $description, heading => $heading, entries => \@entries, showtext => $showtext, hidetext => $hidetext ); return $help; } # end get_table_help ############################################################################### # printUserAnnotations # get and display user annotations, if any ############################################################################### sub printUserAnnotations { my %args = @_; my $parameters_href = $args{parameters_href}; my $protein_name = $args{protein_name}; my $buffer; my $total = 0; # this starts as a counter, then becomes a string my $show_form = 1; my $line_sep = "
"; if (! defined $protein_name) { print "

ERROR: printUserAnnotations needs protein name.

\n"; } # First, get the protein_identification_id. # TODO should check for uniqueness my $sql = qq~ select pi.protein_identification_id from $TBAT_PROTEIN_IDENTIFICATION pi join $TBAT_BIOSEQUENCE bs on bs.biosequence_id = pi.biosequence_id where BS.biosequence_name = '$protein_name' and pi.atlas_build_id = '$parameters_href->{atlas_build_id}' ~; my ($protein_identification_id) = $sbeams->selectOneColumn($sql); $parameters_href->{protein_identification_id} = $protein_identification_id; if (! $protein_identification_id) { print "

ERROR: printUserAnnotations could not find protein_identification_id.

\n"; } # TMF removed items 4,5,6 from SELECT statement; changed SPECTRUM_ANNOTATION SA to # PEPTIDE_INSTANCE_ANNOTATION PIA my $sql = qq~ SELECT PIA.protein_identification_annotation_id, PIA.comment, PIA.date_modified, SL.spectrum_annotation_level_id, SL.level_name, C.first_name, C.last_name, UL.username FROM $TBAT_PROTEIN_IDENTIFICATION_ANNOTATION PIA INNER JOIN $TBAT_SPECTRUM_ANNOTATION_LEVEL SL ON ( PIA.spectrum_annotation_level_id = SL.spectrum_annotation_level_id ) INNER JOIN $TB_CONTACT C ON ( annotator_contact_id = C.contact_id ) INNER JOIN $TB_USER_LOGIN UL ON ( UL.contact_id = annotator_contact_id ) INNER JOIN $TBAT_PROTEIN_IDENTIFICATION PI ON ( PI.protein_identification_id = PIA.protein_identification_id ) WHERE PIA.record_status = 'N' AND PIA.protein_identification_id = '$protein_identification_id' ORDER BY PIA.date_modified DESC ~; my @rows = $sbeams->selectSeveralColumns($sql); $buffer = qq~ ~; if (@rows) { foreach my $row (@rows) { # TMF removed items 4,5,6 $sequence $charge my ($annot_id, $comment, $date, $level_id, $level, $first, $last, $uname, ) = @{$row}; $total++; # TMF #if ($args{modified_sequence}.$args{charge} eq $sequence.$charge) { $buffer .= "\n"; $buffer .= &printUserAnnotationsForm( form_type => 'update', uname => $uname, default_level_id => $level_id, default_comment => $comment, annot_id => $annot_id, extra_form_fields => $args{form_fields} ); } $buffer .= "\n$line_sep\n"; } } else { $buffer .= qq~ $line_sep ~; } if ($current_username eq 'guest') { my $url = $q->self_url(); $url .= '&force_login=yes'; $buffer .= qq~ $line_sep ~; } elsif ($show_form) { $buffer .= "\n"; $buffer .= &printUserAnnotationsForm( form_type => 'add', uname => $current_username, # TMF #spectrum_id => $args{spectrum_id}, protein_identification_id => $protein_identification_id, extra_form_fields => $args{form_fields} ); $buffer .= "\n$line_sep\n"; } $buffer .= "
User annotations: how good is the UNIQUE evidence for this protein in this PeptideAtlas build? Should this protein form be included in Swiss-Prot and neXtProt? 
$level$first $last ($date)
\n"; #-TMF---------------------------------------------- # } else { # my $link = $q->self_url(); # $link =~ s/\?.*//; # clear querystring # $link .= "?spectrum_identification_id=$ident_id;peptide=$sequence;assumed_charge=$charge"; #-------------------------------------------------- #-------------------------------------------------- # $buffer .= "
$level
$sequence +$charge
$first $last ($date)
\n"; #-------------------------------------------------- #} my $disp_comment = $comment; $disp_comment =~ s|\n|
\n|g; # display carriage returns $buffer .= "$disp_comment\n"; if ($uname eq $current_username) { $show_form = 0; # TMF spectrum_annotation_id => protein_identification_annotation_id $buffer .= qq~
Edit my annotation | Delete
$args{form_fields}
~; $buffer .= "
  There are no user annotations for this protein identification.
  Log into PeptideAtlas to add an annotation for this protein identification.
Add annotation 
\n"; # get average # TMF $sql = qq~ SELECT ROUND(AVG(SL.level_probability)*100,0) AS avg_prob_pct, COUNT(*) FROM $TBAT_SPECTRUM_ANNOTATION_LEVEL SL INNER JOIN $TBAT_PROTEIN_IDENTIFICATION_ANNOTATION PIA ON ( PIA.spectrum_annotation_level_id = SL.spectrum_annotation_level_id ) WHERE PIA.protein_identification_id = '$protein_identification_id' AND PIA.record_status = 'N' ~; @rows = $sbeams->selectSeveralColumns($sql); my ($avg, $num) = @{$rows[0]}; if ($total != $num) { $total = "($total total)"; } else { $total = ''; } my $innerHTML; if ($num == 1) { $innerHTML = "One user annotation $total, score $avg%"; } elsif ($num > 1) { $innerHTML = "$num user annotations $total, average score $avg%"; } elsif ($num == 0) { $innerHTML = "No user annotations"; } $buffer .=<< "EOJS" if $innerHTML; EOJS return $buffer; } ############################################################################### # printUserAnnotationsForm # get and display user annotations, if any ############################################################################### sub printUserAnnotationsForm { my %args = @_; $args{form_type} ||= 'add'; $args{uname} ||= 'user_anon'; $args{default_level_id} ||= 0; $args{default_comment} ||= ''; $args{annot_id} ||= 0; $args{extra_form_fields} ||= ''; my $spacer = " "x5; my $trname = $args{uname}; $trname .= ($args{form_type} eq 'add') ? 'add' : $args{annot_id}; my $buffer = qq~ ~; # add a hidden for edit form my $sql = qq~ SELECT spectrum_annotation_level_id, level_probability, level_name, level_description FROM $TBAT_SPECTRUM_ANNOTATION_LEVEL WHERE record_status = 'N' ORDER BY sort_order ~; my @levels = $sbeams->selectSeveralColumns($sql); my $select = ""; # TMF -- I don't think I need this stuff below. my $form_action = "ADD"; if ($args{form_type} eq 'update') { $form_action = "UPDATE"; # TMF spectrum_annotation_id => protein_identification_annotation_id $args{extra_form_fields} .= ""; } else { # TMF spectrum_id => protein_identification_id $args{extra_form_fields} .= ""; } $buffer .= qq~
$args{extra_form_fields} $select
$spacerCANCEL
~; # caller will close the td and tr return $buffer; } ############################################################################### # displayExternalLinksSection # # Display a section for information about this protein in other resources ############################################################################### sub displayExternalLinksSection { my %args = @_; #### Process the arguments list my $biosequence = $args{biosequence} || die("ERROR: No biosequence passed"); #### Create widget to allow show/hide of overview section my ($tr,$link) = $sbeams->make_table_toggle( name => 'getprotein_ExternalLinks', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my $external_links = ''; my $section_header = $sbeamsMOD->encodeSectionHeader( text => 'External Links', link => $link, ); #### Debug if ( 0 == 1 ) { use Data::Dumper; my $tmp = Dumper( $biosequence->{synonyms} ); if ($sbeams->output_mode() eq 'html') { $external_links .= $sbeamsMOD->encodeSectionItem( key => 'All synonyms', tr_info => $tr, value => $tmp, url => "", ); } } #### Display a link to Human Protein Atlas (hpr) my $currentOrganism = $current_page->{organism} || $sbeamsMOD->getCurrentAtlasOrganism(parameters_ref=>{}); my $entrezGeneID = $biosequence->{synonyms}->{'Entrez Gene Symbol'}; if ($currentOrganism eq 'Human' && $entrezGeneID) { if ($sbeams->output_mode() eq 'html') { $external_links .= $sbeamsMOD->encodeSectionItem( key => 'Human Protein Atlas', tr_info => $tr, value => qq~
$entrezGeneID~, url => "", ); } } #### Display a link to the Global Proteome Machine (GPM) my $ensemblProtein = $biosequence->{synonyms}->{'Ensembl Protein'}; if ($ensemblProtein) { if ($sbeams->output_mode() eq 'html') { $external_links .= $sbeamsMOD->encodeSectionItem( key => 'Global Proteome Machine', tr_info => $tr, value => $ensemblProtein, url => "http://gpmdb.thegpm.org/protein/accession/$ensemblProtein", ); } } #### Display a link to Protter by Bernd Wollscheid my $uniprotProtein = $biosequence->{synonyms}->{'UniProt'}; if ($uniprotProtein) { if ($sbeams->output_mode() eq 'html') { $external_links .= $sbeamsMOD->encodeSectionItem( key => 'Protter (Interactive Protein visualization)', tr_info => $tr, value => $uniprotProtein, url => "http://wlab.ethz.ch/protter/#up=${uniprotProtein}&tm=auto", ); } } #### Close section if ($sbeams->output_mode() eq 'html' && $external_links ) { print qq~ $section_header $external_links
~; } return 1; } # end displayExternalLinksSection