#!/usr/local/bin/perl ############################################################################### # Program : GetPTP # $Id: GetProtein 6439 2010-05-24 17:44:08Z dcampbel $ # # Description : Prints summary of a given protein given selection # atlas build and protein name. # # SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; $|++; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::TabMenu; $sbeams = new SBEAMS::Connection; use SBEAMS::BioLink; my $biolink = SBEAMS::BioLink->new(); $biolink->setSBEAMS($sbeams); use SBEAMS::BioLink; my $biolink = new SBEAMS::BioLink; use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; $sbeamsMOD = new SBEAMS::PeptideAtlas; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); use SBEAMS::PeptideAtlas::KeySearch; my $keySearch = new SBEAMS::PeptideAtlas::KeySearch; $keySearch->setSBEAMS($sbeams); use SBEAMS::PeptideAtlas::BestPeptideSelector; my $bestPeptideSelector = new SBEAMS::PeptideAtlas::BestPeptideSelector; $bestPeptideSelector->setSBEAMS($sbeams); $bestPeptideSelector->setAtlas($sbeamsMOD); # Global sequence coverage array, will be populated post-graphic my @coverage; # Swiss Prot annotations my @sp_rows; my $sp_rationale; use constant MIN_OBS_LENGTH => 6; use constant MAX_OBS_LENGTH => 50; ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin', 'PeptideAtlas_readonly', 'PeptideAtlas_exec'], #connect_read_only=>1, allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; my $n_params_found = $sbeams->parse_input_parameters( q=>$q, parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); #### Decide what action to take based on information so far if ($parameters{action} eq "???") { # Some action } else { my $project_id = $sbeamsMOD->getProjectID( atlas_build_id => $parameters{atlas_build_id} ); $sbeamsMOD->display_page_header(project_id => $project_id, init_tooltip => 1); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer(); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; my $spacer = $sbeams->getGifSpacer( 900 ); my $htmlmode = ( $sbeams->output_mode() eq 'html' ) ? 1 : 0; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; # put a spacer so that showing hidden content doesn't mangle the layout print "
$spacer\n" if $htmlmode; #### Get the HTML to display the tabs my $tabMenu = $sbeamsMOD->getTabMenu( parameters_ref => \%parameters, program_name => $PROG_NAME, ); print $tabMenu->asHTML() if ($sbeams->output_mode() eq 'html'); #### Get the search keyword my $protein_name = $parameters{"protein_name"}; #### If a new protein_name was supplied, store it if ($protein_name) { $sbeams->setSessionAttribute( key => 'PeptideAtlas_protein_name', value => $protein_name, ); #### Else see if we had one stored } else { $protein_name = $sbeams->getSessionAttribute( key => 'PeptideAtlas_protein_name', ); if ($protein_name) { $parameters{'apply_action'} = 'GO'; } } #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $CATEGORY="Get Protein"; my $PROGRAM_FILE_NAME = $PROG_NAME; my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; my $help_url = "$CGI_BASE_DIR/help_popup.cgi"; #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams('q' => $q); my $n_params_found = 0; if ($apply_action eq "VIEWRESULTSET") { $sbeams->readResultSet( resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, ); $n_params_found = 99; } #### Get a list of accessible project_ids #### If the output_mode is HTML, then display the form if ($sbeams->output_mode() eq 'html') { print qq~ ~; print "

"; print ""; print $q->start_form(-method=>"POST", -action=>"$base_url", -name=>"SearchForm", ); print "Protein Name: "; my $organism = $sbeamsMOD->getCurrentAtlasOrganism( parameters_ref => {} ); my $gaggle = $sbeams->getGaggleMicroformat( organism => $organism, data => [$protein_name], object => 'namelist', name => 'Protein name', type => 'direct' ); print "$gaggle\n"; print $q->textfield( "protein_name", $protein_name); print $q->hidden( "apply_action", ''); print "   "; print $q->submit(-name => "action", -value => 'QUERY', -label => 'GO'); print $q->end_form; print ""; print "

"; } ######################################################################### #### Process all the constraints #### If biosequence_name was not selected, stop here unless ($protein_name) { if ($sbeams->output_mode() eq 'html') { print "Please type in a protein name ". "(e.g. A8MVW0 for human protein), and ". "click GO"; } else { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => 'You must provide a protein_name', ); } return; } #### Build PROTEIN_NAME constraint my $biosequence_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"Protein Name", constraint_value=>$protein_name ); return if ($biosequence_name_clause eq '-1'); #### Define the SQL statement to find the biosequence my $sql = qq~ SELECT TOP 1 AB.atlas_build_id, BS.biosequence_id, AB.atlas_build_name, dbxref_id FROM $TBAT_BIOSEQUENCE BS LEFT JOIN $TBAT_ATLAS_BUILD AB ON (BS.biosequence_set_id = AB.biosequence_set_id) WHERE 1 = 1 $biosequence_name_clause AND AB.atlas_build_name not like '%synth%' AND AB.atlas_build_name not like '%dirty%' ORDER BY AB.atlas_build_id DESC ~; my @rows = $sbeams->selectSeveralColumns($sql); #### If the protein was not found, report the problem if (@rows == 0) { if ($sbeams->output_mode() eq 'html') { print qq~ The protein entered '$protein_name' was not found in the biosequence database. ~; } else { $sbeams->reportException( state => 'ERROR', type => 'RECORD NOT FOUND', message => "The protein entered '$protein_name' was not found ". "in the biosequence database", ); } return; } my ($atlas_build_id,$biosequence_id,$atlas_build_name, $dbxref_id) = @{$rows[0]}; # die "DBX is $dbxref_id\n"; #### Return some information about this biosequence $sql = qq~ SELECT BS.biosequence_id, BS.biosequence_name,BS.biosequence_gene_name, BS.biosequence_accession, BS.biosequence_desc, BS.biosequence_seq,BSS.set_name, DBX.accessor,DBX.accessor_suffix, BAG.annotated_gene_id, BPS.transmembrane_class, BPS.transmembrane_topology, BPS.has_signal_peptide, BPS.has_signal_peptide_probability, BPS.signal_peptide_length, BPS.signal_peptide_is_cleaved FROM $TBAT_BIOSEQUENCE_SET BSS INNER JOIN $TBAT_BIOSEQUENCE BS ON ( BSS.biosequence_set_id = BS.biosequence_set_id ) LEFT JOIN $TBAT_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) LEFT JOIN $TBAT_BIOSEQUENCE_ANNOTATED_GENE BAG ON ( BAG.biosequence_id = BS.biosequence_id ) LEFT JOIN $TBAT_BIOSEQUENCE_PROPERTY_SET BPS ON ( BS.biosequence_id = BPS.biosequence_id ) WHERE BS.biosequence_id = $biosequence_id ~; my @rows = $sbeams->selectHashArray($sql); my $biosequence = $rows[0]; # Supercede with SwissProt info my $sp_sql = qq~ SELECT BS.biosequence_id, signal_end, signal_start FROM $TBAT_BIOSEQUENCE BS JOIN peptideatlas.dbo.swiss_prot_annotation SPA ON BS.biosequence_accession = SPA.accession WHERE BS.biosequence_id = $biosequence_id ~; my $sth = $sbeams->get_statement_handle($sp_sql); while ( my @db_row = $sth->fetchrow_array() ) { @sp_rows = @db_row; last; } ############################################################################# # Widget to allow show/hide of overview section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getptp_overview', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); #### Print out a summary of the protein if ($sbeams->output_mode() eq 'html') { my $section_header = $sbeamsMOD->encodeSectionHeader( text => $biosequence->{biosequence_name}, link => $link ); print qq~ $section_header ~; print $sbeamsMOD->encodeSectionItem( key=>'Protein Name', tr_info => $tr, value=>$biosequence->{biosequence_name}, #url=>"$biosequence->{accessor}$biosequence->{biosequence_accession}$biosequence->{accessor_suffix}", ); print $sbeamsMOD->encodeSectionItem( key=>'Gene Name', tr_info => $tr, value=>$biosequence->{biosequence_gene_name}, ); print $sbeamsMOD->encodeSectionItem( key=>'Description', tr_info => $tr, value=>$biosequence->{biosequence_desc}, ); my @synonyms = $keySearch->getProteinSynonyms( resource_name => $biosequence->{biosequence_name} ); my %syn_types; foreach my $synonym ( @synonyms ) { #$log->debug( "Syn loop, $synonym->[0], $synonym->[1], $synonym->[2],$synonym->[3]" ); $syn_types{$synonym->[1]} ||= []; chomp( $synonym->[0] ); #### Store the synonym information $biosequence for later use $biosequence->{synonyms}->{$synonym->[1]} = $synonym->[0]; if ( $synonym->[2] ) { my $str; if($synonym->[2] =~ /DisplayGoTerm/ and $synonym->[0] =~ /GO:\d+:/){ $synonym->[0] =~ /GO:(\d+):(.*)/; $str = "GO:$1"; $synonym->[0] = "[2]$str$synonym->[3]\">$synonym->[0]"; } else { if ( $synonym->[3] ) { $synonym->[0] = "[2]$synonym->[3]$synonym->[0]\">$synonym->[0]"; } else { $synonym->[0] = "[2]$synonym->[0]\">$synonym->[0]"; } } } push @{$syn_types{$synonym->[1]}}, $synonym->[0]; } for my $key ( sort( keys( %syn_types ) ) ) { my $cnt = 0; my $max = 7; my $syn_string = ''; my $delim = ''; for my $entry ( @{$syn_types{$key}} ) { $cnt++; $syn_string .= "$delim $entry"; $delim = ','; } print $sbeamsMOD->encodeSectionItem( key => $key, value => $syn_string, tr_info => $tr ); } # Check to see if there is ortholog information my $orthologs = get_ortholog_information( $biosequence ); if ( $orthologs ) { print $sbeamsMOD->encodeSectionItem( key => 'Ortholog Group', tr_info => $tr, value => $orthologs ); } } ############################################################################# #### Continue with main query #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); ######################################################################### #### If QUERY or VIEWRESULTSET was selected, display the data if ($apply_action =~ /(QUERY|GO|VIEWRESULTSET)/) { #### Get protein structure information my $protein_structure = getProteinStructure( biosequence_id => $biosequence_id, ); my $tryptic_pep_ref = $sbeamsMOD->do_tryptic_digestion( aa_seq => $biosequence->{biosequence_seq} , min_len => 6); my $tryptic_peptide_counts = scalar @$tryptic_pep_ref; # Stragglers for protein section, doh! if ( $htmlmode ) { print $sbeamsMOD->encodeSectionItem( tr_info => $tr, key => 'Tryptic Peptides with length >=6', value => "$tryptic_peptide_counts" ); } ############################################################################# #### Display the External Links section displayExternalLinksSection(biosequence=>$biosequence); ############################################################# ## Display the sequence graphic ## my $motif = $sbeamsMOD->getBuildMotif(build_id => $atlas_build_id); # Widget to allow show/hide of graphic section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getptp_graphic', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my %motif_params; if ( $motif eq 'glyco' ) { my $pred = $sbeamsMOD->getGlycoPeptides( seq => $biosequence->{biosequence_seq}, annot => 1, 'index' => 1, symbol => '*' ); $motif_params{$motif} = $pred; } my @peptides =(); my $peptides_ref = \@peptides; my %graphic_params = ( build_id => $atlas_build_id, protein_data => $biosequence, tr_info => $tr, obs_color => $parameters{obs_color} , peptides_ref => $peptides_ref); my $graphic_section = get_sequence_graphic( %graphic_params, %motif_params ); my $graphic_head = $sbeamsMOD->encodeSectionHeader( text => "Sequence Motifs", link => $link ); print "
$graphic_head
$graphic_section" if $htmlmode; ############################################################# ## Display annotated sequence - this routine makes its own section header ## #### Display the annotated sequence displayAnnotatedSequence( %parameters, peptides => $peptides_ref, biosequence => $biosequence, #resultset_ref=>$resultset_ref, protein_structure => $protein_structure, glyco => $motif_params{glyco}, ); #### Widget to allow show/hide of section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getptp_highlyobservablelist', visible => 0, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1, ); #### Display the section header my $highlyObservablePeptidesHeader = $sbeamsMOD->encodeSectionHeader( text => 'Predicted Highly Observable Peptides', link => $link ); #### Calculate the best peptides to use my $best_peptide_information = $bestPeptideSelector->getHighlyObservablePeptides( atlas_build_id => $atlas_build_id, biosequence_id => $biosequence_id, ); if ( $htmlmode && $sbeams->rs_has_data( resultset_ref =>$best_peptide_information->{resultset_ref}) ) { #### Display the best peptide information my $samples = $bestPeptideSelector->getHighlyObservablePeptidesDisplay( atlas_build_id => $atlas_build_id, add_num_mapped => 1, dbxref_id => $dbxref_id, best_peptide_information => $best_peptide_information, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, link => $link, base_url=>$base_url, tr_info => $tr, ); print qq~ $highlyObservablePeptidesHeader
$samples
~; } #### If QUERY was not selected, then tell the user to enter some parameters } else { if ($sbeams->invocation_mode() eq 'http') { print "

Select parameters above and press QUERY

\n"; } else { print "You need to supply some parameters to contrain the query\n"; } } } # end handle_request sub get_ortholog_information { my $biosequence = shift; return unless $biosequence->{biosequence_name}; my $sql =<<" END"; SELECT ortholog_group FROM biolink.dbo.ortholog WHERE entry_accession = '$biosequence->{biosequence_name}' END my $link = ''; my $sth = $sbeams->get_statement_handle( $sql ); while ( my @row = $sth->fetchrow_array() ) { $link .= qq~  $row[0]\n~; } return $link if $link; $sql =<<" END"; SELECT ortholog_group FROM biolink.dbo.ortholog O JOIN $TBAT_SEARCH_KEY SK ON SK.search_key_name = O.entry_accession JOIN $TBAT_BIOSEQUENCE B ON B.biosequence_name = SK.resource_name WHERE B.biosequence_name = '$biosequence->{biosequence_name}' END my $sth = $sbeams->get_statement_handle( $sql ); while ( my @row = $sth->fetchrow_array() ) { $link .= qq~  $row[0]\n~; } return $link; } sub get_mrm_transitions { shift; my $accessions = shift || return []; my $acc_string = "'" . join( "', '", @{$accessions} ) . "'" ; # Project control my @accessible = $sbeams->getAccessibleProjects(); my $projects = join( ",", @accessible ); return '' unless $projects; my $sql =<<" END"; SELECT peptide_accession, modified_peptide_sequence, peptide_charge, q1_mz, q3_mz, q3_ion_label, q3_peak_intensity, collision_energy, retention_time, instrument, CASE WHEN contact_id IS NULL THEN annotator_name ELSE username END AS name, level_name FROM $TBAT_MODIFIED_PEPTIDE_ANNOTATION MPA JOIN $TBAT_PEPTIDE P ON MPA.peptide_id = P.peptide_id JOIN $TBAT_TRANSITION_SUITABILITY_LEVEL TSL ON TSL.transition_suitability_level_id = MPA.transition_suitability_level_id LEFT JOIN $TB_USER_LOGIN UL ON UL.contact_id = MPA.annotator_contact_id WHERE peptide_accession IN ( $acc_string ) AND project_id IN ( $projects ) AND level_score > 0.2 ORDER BY peptide_accession, q1_mz, peptide_charge, level_score DESC, q3_peak_intensity DESC END my @rows = $sbeams->selectSeveralColumns($sql); return \@rows; } sub get_synthesized_peptides { my $accessions = shift || return []; my $acc_string = "'" . join( "', '", @{$accessions} ) . "'" ; # Project control my @accessible = $sbeams->getAccessibleProjects(); my $projects = join( ",", @accessible ); return '' unless $projects; my $sql =<<" END"; SELECT peptide_accession, P.peptide_sequence, peptide_annotation, publication_name, CASE WHEN contact_id IS NULL THEN annotator_name ELSE username END AS name, PA.peptide_sequence FROM $TBAT_PEPTIDE_ANNOTATION PA JOIN $TBAT_PEPTIDE P ON PA.peptide_id = P.peptide_id LEFT JOIN $TB_USER_LOGIN UL ON UL.contact_id = PA.annotator_contact_id LEFT JOIN $TBAT_PUBLICATION PP ON PP.publication_id = PA.publication_id WHERE peptide_accession IN ( $acc_string ) AND project_id IN ( $projects ); END my @rows = $sbeams->selectSeveralColumns($sql); return \@rows; } #+ # Generates a graphical overview of sequence features using bioperl routines # #- sub get_sequence_graphic { ## read passed args, set up variables ## my %args = @_; my $tmhmm = $args{protein_data}->{transmembrane_topology}; my $id = $args{protein_data}->{biosequence_id}; my $protseq = $args{protein_data}->{biosequence_seq}; my $protlen = length( $protseq ); my $peptides_ref = $args{peptides_ref}; my $sql = qq~ SELECT DISTINCT BS.BIOSEQUENCE_ID FROM $TBAT_PROTEOTYPIC_PEPTIDE PP,$TBAT_PROTEOTYPIC_PEPTIDE_MAPPING PPM, $TBAT_BIOSEQUENCE BS WHERE PP.PROTEOTYPIC_PEPTIDE_ID = PPM.PROTEOTYPIC_PEPTIDE_ID AND PPM.SOURCE_BIOSEQUENCE_ID = BS.BIOSEQUENCE_ID AND BS.BIOSEQUENCE_NAME = ( SELECT BS3.BIOSEQUENCE_NAME FROM $TBAT_BIOSEQUENCE BS3 WHERE BS3.BIOSEQUENCE_ID = $id ) GROUP BY BS.BIOSEQUENCE_ID HAVING (COUNT (PP.COMBINED_PREDICTOR_SCORE)*100/COUNT(BS.BIOSEQUENCE_ID))>= 90 ORDER BY BS.BIOSEQUENCE_ID DESC ~; my @biosequece_ids = $sbeams->selectOneColumn($sql); if(@biosequece_ids and $biosequece_ids[0] ne $id ){ $id = $biosequece_ids[0]; } $sql = qq~ SELECT DISTINCT PTP.peptide_sequence, PTP.combined_predictor_score, PTPM.n_protein_mappings, CAST(BS.biosequence_seq AS VARCHAR(8000)) FROM $TBAT_PROTEOTYPIC_PEPTIDE PTP LEFT JOIN $TBAT_PROTEOTYPIC_PEPTIDE_MAPPING PTPM ON ( PTP.proteotypic_peptide_id = PTPM.proteotypic_peptide_id ) LEFT JOIN $TBAT_PEPTIDE P ON ( PTP.matched_peptide_id = P.peptide_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS ON ( PTPM.source_biosequence_id = BS.biosequence_id ) WHERE 1 = 1 AND PTPM.source_biosequence_id = $id AND ( PTP.combined_predictor_score is not null OR peptide_accession IS NOT NULL ) ORDER BY PTP.combined_predictor_score DESC ~; my @rows = $sbeams->selectSeveralColumns( $sql ); # Set up hash for storing point by point coverage info my %coverage; for my $idx ( 1..$protlen ) { $coverage{$idx} = 0; } # Define color mapping for various features. my %colors = ( Signal => 'cornflowerblue', Anchor => 'lightskyblue', Transmembrane => 'greenyellow', Intracellular => 'coral', Extracellular => 'mediumseagreen', Coverage => 'beige', Translated => 'gainsboro', Observed => $args{obs_color} || 'firebrick' , Glycopeptide => 'goldenrod', Difficult => 'bisque' ); # Define CSS classes my $sp = ' ' x 4; my $style =<<" END_STYLE"; END_STYLE ## Create main panel + sequence 'ruler' ## my $panel = Bio::Graphics::Panel->new( -length => $protlen + 2, -key_style => 'between', -width => 800, -empty_tracks => 'suppress', -pad_top => 5, -pad_bottom => 5, -pad_left => 10, -pad_right => 50 ); # open( FIL, ">/tmp/colors.html" ); # print FIL "\n"; # my @c = $panel->color_names(); # for my $c ( @c ) { print FIL "\n"; } # print FIL "
$sp$c$sp$sp
\n"; # close FIL; my $ruler = Bio::SeqFeature::Generic->new( -end => $protlen, -start => 2, -display_name => 'Sequence Position'); my $sequence = Bio::SeqFeature::Generic->new( -end => $protlen, -start => 1, -display_name => 'Sequence Position'); ## Generate observed peptide tracks ## # color peptides based on combined score # my $max = 0; # my $threshold = 1; # my $lten = log(10); # foreach my $row (@rows) { # $max = ( $max > $row->[1] ) ? $max : $row->[1]; # if ( $max > $threshold ) { # $max = $threshold; # last; # } # } # $max = log($max)/$lten; my $max = 1; # Loop over peptides my @peptides; my %pep_info; my ( $multi, $single ); my $acc =1; foreach my $row (@rows) { #my $acc = $row->[0]; my $pepseq = $row->[0]; my $score = $row->[1]; my $proteinseq = $row ->[3]; my $str = $proteinseq; $str =~ s/$pepseq.*//g; my $start = length($str); my $stop = length($str)+length($pepseq); # accession isn't necessarily unique, need compound key... my $ugly_key = $acc . '::::' . $start . $stop; my $f = Bio::SeqFeature::Generic->new( -start => $start, -end => $stop, -primary => $acc, -display_name => $ugly_key, -score => $score ); push @peptides, $f; # Record the coverage for this peptide for my $idx ( $start..$stop ) { $coverage{$idx}++; } # count peptide type, to build appropriate legend if ( $score ) { $single++; } else { $multi++; } $pep_info{$ugly_key} = "$start - $stop, $pepseq ($score combined)"; $acc++; } @$peptides_ref = @peptides; ## Generate Signal P related tracks ## my @signalp; my $seqtype = ''; my ( $anchor, $signal ); my $signal_peptide_coords = {}; my $sp_info = scalar( @sp_rows ); if ( $args{protein_data}->{has_signal_peptide} || $sp_info ) { if ( $args{protein_data}->{signal_peptide_is_cleaved} =~ /y/i || $sp_info ) { $seqtype = 'Signal'; $signal++; } else { $seqtype = 'Anchor'; $anchor++; } $log->info( join("::", @sp_rows ) ); my $end = ( $sp_info ) ? $sp_rows[1] : $args{protein_data}->{signal_peptide_length}; $sp_rationale = ( $sp_info ) ? 'Signal peptide annoted in SwissProt' : 'Signal sequence predicted by Signal P, cleaved in mature protein'; # Cache signal peptide info for possible 'unlikely' designation. if ( $seqtype eq 'Signal' ) { $signal_peptide_coords->{start} = 1; $signal_peptide_coords->{end} = $end; $signal_peptide_coords->{seq} = substr( $protseq, 0, $end ); $signal_peptide_coords->{rationale} = $sp_rationale; } my $f = Bio::SeqFeature::Generic->new( -start => 1, -end => $end, -display_name => $seqtype , -label => 0 ); push @signalp, $f; } ## End Signal P related tracks ## ## Generate Glyco motif related tracks ## my @glyco; if ( $args{glyco} ) { my $seqtype = 'Glycopeptide'; for my $idx ( sort { $a <=> $b } keys( %{$args{glyco}} ) ) { my $peptide = $args{glyco}->{$idx}; my $num_sites = $peptide =~ tr/\*/\*/; my $p_len = length( $peptide ) - $num_sites; # $log->debug( "Peptide $peptide, starting at $idx, has $num_sites sites and is $p_len amino acids long" ); my $f = Bio::SeqFeature::Generic->new( -start => $idx, -end => $idx + $p_len - 1, -display_name => $seqtype , -primary => $peptide, -label => $peptide ); push @glyco, $f; } } ## End Glyco motif related tracks ## # Calculate the coverage 'domains' # Made this global to use elsewhere... @coverage; my @uncovered; # Keep track of coverage coordinates my $cstart = 1; my $cend = 0; # Also keep track of non-coverage coordinates my $ncstart = 1; my $ncend = 1; # Not in coverage to begin with my $in_coverage = 0; # What is 1-based index of first observed aa, used for expected coverage. my $min_covered_aa = 0; # %coverage is hash of seq depth keyed by coordinate my $last_key; for my $key ( sort{ $a <=> $b } keys( %coverage ) ) { # Cache min covered aa if not yet set and index is covered $min_covered_aa = $key if ( !$min_covered_aa && $coverage{$key} ); if ( !$in_coverage ) { # Not in coverage if ( $coverage{$key} ) { $cstart = $key; $cend = $key; $in_coverage = 1; # Store non-coverage info $ncend = $key - 1; push @uncovered, { start => $ncstart, end => $ncend } if $ncstart && $ncend; } else { $in_coverage = 0; # No-op } } else { # Already in coverage if ( $coverage{$key} ) { # Start stays the same, increment end $cend = $key; $in_coverage = 1; } else { $in_coverage = 0; # Its showtime! my $f = Bio::SeqFeature::Generic->new( -start => $cstart, -end => $cend, -primary => 'Coverage', -display_name => 'Coverage' ); push @coverage, $f; $cstart = 0; $cend = 0; # Dropped out of coverage, cache ncstart $ncstart = $key; } } $last_key = $key; } if ( !$in_coverage ) { push @uncovered, { start => $ncstart, end => $last_key } if $ncstart && $last_key; } if ( $cend ) { my $f = Bio::SeqFeature::Generic->new( -start => $cstart, -end => $cend, -primary => 'Coverage', -display_name => 'Coverage' ); push @coverage, $f; } # Keep track of how much sequence might be hard to observe my @unlikely_to_observe; # Cache and return to caller for seq annotation my @difficult; # Use to make 'track' on sequence graphic # Original logic discounted entire signal peptide as 'unlikely' if any of # the sp was actually seen. To restore this behaviour set # $penalize_obs_in_signal = 1 my $penalize_obs_in_signal = 0; if ( $signal_peptide_coords && $signal_peptide_coords->{end} ) { if ( $min_covered_aa > $signal_peptide_coords->{end} ) { push @unlikely_to_observe, $signal_peptide_coords; # $log->debug( "1 start is $signal_peptide_coords->{start}, end is $signal_peptide_coords->{end}" ); my $f = Bio::SeqFeature::Generic->new( -start => $signal_peptide_coords->{start}, -end => $signal_peptide_coords->{end}, -primary => 'Difficult', -display_name => 'Difficult' ); push @difficult, $f; } elsif ( $penalize_obs_in_signal ) { # We will not consider the signal peptide to be unlikely $log->info( "Found sequence in a predicted signal sequence for $args{protein_data}->{biosequence_name}, penalizing" ); } elsif ( $min_covered_aa != 1 ) { # Signal peptide is still considered unlikely to be observed $log->info( "Allowing obs signal sequence as unlikely for $args{protein_data}->{biosequence_name}" ); # Not strictly correct, but useful to pass this information on $signal_peptide_coords->{end} = $min_covered_aa - 1 if $min_covered_aa; my $signal_seq = substr( $protseq, 0, $signal_peptide_coords->{end} ); # $log->debug( "2 start is $signal_peptide_coords->{start}, end is $signal_peptide_coords->{end}" ); push @unlikely_to_observe, { start => $signal_peptide_coords->{start}, end => $signal_peptide_coords->{end}, seq => $signal_seq }; my $f = Bio::SeqFeature::Generic->new( -start => $signal_peptide_coords->{start}, -end => $signal_peptide_coords->{end}, -primary => 'Difficult', -display_name => 'Difficult' ); push @difficult, $f; } } # my $protseq = $args{protein_data}->{biosequence_seq}; for my $cpair ( @uncovered ) { if ( $signal_peptide_coords && $signal_peptide_coords->{end} ) { # $log->debug( "Start is $cpair->{start}, End is $cpair->{end}, seq is $cpair->{seq}" ); next if $cpair->{end} <= $signal_peptide_coords->{end}; } my $uncovered_length = $cpair->{end} - $cpair->{start} + 1; my $seq = substr( $protseq, $cpair->{start} - 1, $uncovered_length ); $cpair->{seq} = $seq; my $tryptics = $biolink->do_tryptic_digestion( aa_seq => $seq ); my $index = $cpair->{start}; for my $tryp ( @$tryptics ) { # anchor if ( length( $tryp ) < MIN_OBS_LENGTH ) { my $peptide_end = $index + length($tryp) - 1; push @unlikely_to_observe, { start => $index, end => $peptide_end, rationale => 'Short peptide', seq => $tryp }; my $f = Bio::SeqFeature::Generic->new( -start => $index, -end => $peptide_end, -primary => 'Difficult', -display_name => 'Difficult' ); push @difficult, $f if $index && $peptide_end; } elsif ( length( $tryp ) > MAX_OBS_LENGTH ) { my $peptide_end = $index + length($tryp) - 1; push @unlikely_to_observe, { start => $index, end => $peptide_end, rationale => 'Long peptide', seq => $tryp }; my $f = Bio::SeqFeature::Generic->new( -start => $index, -end => $peptide_end, -primary => 'Difficult', -display_name => 'Difficult' ); push @difficult, $f if $index && $peptide_end; } else { # $log->debug( "Might see $tryp" ); } $index += length( $tryp ); } } # Cache non-coverage sequence $args{protein_data}->{_non_coverage} = \@unlikely_to_observe; ## Generate TMHMM-derived tracks ## # parse domain info my $tm_info = $biolink->get_transmembrane_info ( tm_info => $tmhmm, end => $protlen ); # loop over domains, create features for each my @intra; my @extra; my @tmm; foreach my $region (@$tm_info) { my $primary = $region->[0]; my $tag = $primary; my $f = Bio::SeqFeature::Generic->new( -start => $region->[1], -end => $region->[2] , -primary => $primary , -display_name => $primary , -tag => { $primary => $tag } ); if ( $tag =~ /Intracellular/i ) { push @intra, $f; } elsif ( $tag =~ /Extracellular/i ) { push @extra, $f; } else { push @tmm, $f; } } # Only add intra/extra if @tmm or @signalp unless ( @tmm ) { @intra = (); @extra = (); } ## Add all the tracks to the panel ## $panel->add_track( $ruler, -glyph => 'anchored_arrow', -tick => 2, -height => 8, -key => 'Sequence Position' ); # Add observed peptide track $panel->add_track( \@peptides, -glyph => 'graded_segments', -bgcolor => $colors{Observed}, -fgcolor => 'black', -font2color => '#882222', -key => 'Predicted Peptides', -bump => 1, -height => 8, -label => sub { my $f = shift; my $n = $f->display_name(); $n =~ /^(\d+)/; $n = $1; $n = 0 x (2-length($n)) .$n ; return $n }, -min_score => 0, -max_score => $max ); ## Add glyco track (mebbe) $panel->add_track( \@glyco, -glyph => 'segments', -bgcolor => $colors{Glycopeptide}, -fgcolor => 'black', -key => 'Theoretical NXS/T peptides', -bump => +1, -height => 8, -legend => 1, -label => 0, ) if @glyco; # Add signalP track my $sigtype = ( $anchor ) ? 'Anchor' : 'Signal'; $panel->add_track( \@signalp, -glyph => 'segments', -bgcolor => $colors{$sigtype}, -fgcolor => 'black', -key => $sigtype . ' Sequence (predicted)', -bump => +1, -height => 8, -legend => 1, -label => 0, ); my %tracs = ( Intracellular => \@intra, Extracellular => \@extra, Transmembrane => \@tmm, ); # Add tmhmm-related tracks for my $t ( qw( Intracellular Transmembrane Extracellular ) ) { my %legend = ( Transmembrane => 'Transmembrane Domain', Extracellular => 'Outside membrane', Intracellular => 'Inside membrane'); $panel->add_track( $tracs{$t}, -glyph => 'segments', -bgcolor => $colors{$t}, -fgcolor => 'black', -font2color => 'red', -key => "$legend{$t} (predicted)", -bump => +1, -height => 8, -legend => 1, -label => 0, ); } # # Add coverage track # $panel->add_track( \@coverage, # -glyph => 'segments', # -bgcolor => $colors{Coverage}, # -fgcolor => 'black', # -key => 'Sequence Coverage', # -connector => 'solid', # -bump => 0, # -height => 8, # -label => 0, # sub { my $f = shift; return $f->display_name}, # ); # # Add difficult track $panel->add_track( \@difficult, -glyph => 'segments', -bgcolor => $colors{Difficult}, -fgcolor => 'black', -key => 'Unlikely (theoretical)', -connector => 'solid', -bump => 0, -height => 8, -label => 0, # sub { my $f = shift; return $f->display_name}, ) if @difficult; $panel->add_track( $ruler, -glyph => 'anchored_arrow', -tick => 2, -height => 8, -key => 'Sequence Position' ); # $panel->add_track( $sequence, # -glyph => 'segments', # -bgcolor => $colors{Translated}, # -key => 'Translated Sequence', # -tick => 2 ); # Set up graphic legend my @legend; my %title = ( obs_sing => "Peptides which map to a single genome location", obs_multi => "Peptides which match to 2 or more genome locations", sig_seq => "Signal peptide predicted from amino acid sequence", anc_seq => "Anchor sequence predicted from amino acid sequence", tm_dom => "Transmembrane region predicted from amino acid sequence", in_dom => "Predicted orientation inside, (intracellular for cell membrane proteins)", ex_dom => "Predicted orientation outside, (extracellular for cell membrane proteins)", pep_cov => "Cumulative sequence coverage", unlikely_obs => "tryptic peptides ≤ " . MIN_OBS_LENGTH . ", > " . MAX_OBS_LENGTH . " residues or signal peptides" ); push @legend, " $sp Predicted peptides \n" if $single; push @legend, " $sp Observed peptide with ambiguous genome mapping \n" if $multi; push @legend, " $sp Signal sequence predicted by Signal P \n" if $signal; push @legend, " $sp Anchor sequence predicted by Signal P \n" if $anchor; push @legend, " $sp Transmembrane domain predicted by TMHMM \n" if @tmm; push @legend, " $sp Predicted as inside membrane by TMHMM \n" if @intra; push @legend, " $sp Predicted as outside membrane by TMHMM \n" if @extra; #push @legend, " $sp Protein coverage by observed peptides \n" if @coverage; push @legend, " $sp Peptides unlikely to be observed \n" if @difficult; # push @legend, " $sp Entire Translated Protein Sequence \n"; my $legend = ''; for my $item ( @legend ) { $legend .= $item; } # Create image map from panel objects. # links and mouseover coords for peptides, mouseover coords only for others my $baselink = "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$args{build_id}&searchWithinThis=Peptide+Name&searchForThis=_PA_Accession_&action=QUERY"; my $pid = $$; my @objects = $panel->boxes(); my $map = "\n"; for my $obj ( @objects ) { # $log->debug( join( "-", @$obj) ); my $hkey_name = $obj->[0]->display_name(); my $link_name = $hkey_name; $link_name =~ s/(.*)::::.*/$1/g; # Grrr... if ( $link_name =~ /^PAp\d+$/ ) { # Peptide, add link + mouseover coords/sequence my $coords = join( ", ", @$obj[1..4] ); my $link = $baselink; $link =~ s/_PA_Accession_/$link_name/g; $map .= "\n"; } elsif ( $hkey_name =~ /^Glycopeptide/ ) { # Glycopeptide, mouseover coords/sequence my $coords = join( ", ", @$obj[1..4] ); my $f = $obj->[0]; my $text = $f->start() . '-' . $f->end() . ', ' . $f->primary_tag(); $map .= "\n"; } else { my $f = $obj->[0]; my $coords = join( ", ", @$obj[1..4] ); my $text = $f->start() . '-' . $f->end(); $map .= "\n"; } } $map .= ''; # Create image in tmp space my $file_name = $pid . "_glyco_predict.png"; my $tmp_img_path = "images/tmp"; my $img_file = "$PHYSICAL_BASE_DIR/$tmp_img_path/$file_name"; open( OUT, ">$img_file" ) || die "$!: $img_file"; binmode(OUT); print OUT $panel->png; close OUT; my $tr = $args{tr_info} || ''; # my $tr_link = "$link"; # Generate and return HTML for graphic my $graphic =<<" EOG";
Sorry No Img $map
$legend
$style EOG return $graphic; } sub getPeptideCount { my %args = @_; my $SUB_NAME = $sbeams->get_subname(); #### Decode the argument list my $resultset_ref = $args{'resultset_ref'} || die "ERROR[$SUB_NAME]: resultset_ref not passed"; my $biosequence = $args{'biosequence'} || die "ERROR[$SUB_NAME]: biosequence not passed"; my $line_length = $args{'line_length'} || 70; my $word_length = $args{'word_length'} || 10; my $enzyme = $args{'enzyme'} || ''; my $protein_structure = $args{'protein_structure'}; my $total_observations = $args{'total_observations'}; #### Get the hash of indices of the columns my %col = %{$resultset_ref->{column_hash_ref}}; #### Loop over all the peptides my $data_ref = $resultset_ref->{data_ref}; my @peptides = (); foreach my $row (@{$data_ref}) { push(@peptides,$row->[$col{peptide_sequence}]); $total_observations += $row->[$col{n_observations}]; } return( $total_observations, \@peptides ); } ############################################################################### # displayAnnotatedSequence ############################################################################### sub displayAnnotatedSequence { my %args = @_; my $SUB_NAME = 'displayAnnotatedSequence'; #### Decode the argument list #my $resultset_ref = $args{'resultset_ref'} # || die "ERROR[$SUB_NAME]: resultset_ref not passed"; my $biosequence = $args{'biosequence'} || die "ERROR[$SUB_NAME]: biosequence not passed"; my $line_length = $args{'line_length'} || 70; my $word_length = $args{'word_length'} || 10; my $enzyme = $args{'enzyme'} || ''; my $protein_structure = $args{'protein_structure'}; my $total_observations = $args{'total_observations'}; #### Don't display unless HTML return unless ($sbeams->output_mode() eq 'html'); #### Loop over all the peptides my @peptides = @{$args{peptides}}; # Widget to allow show/hide of sequence display section my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'getptp_sequence', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my $section_header = $sbeamsMOD->encodeSectionHeader( text=>'Sequence', link =>$link, ); my $htmlmode = ( $sbeams->output_mode() eq 'html' ) ? 1 : 0; print qq~ $section_header
~ if $htmlmode; my $sequence = $biosequence->{biosequence_seq}; my %start_positions; my %end_positions; foreach my $label_peptide (@peptides) { if ($label_peptide) { my $pos = -1; while (($pos = index($sequence,$label_peptide,$pos)) > -1) { $start_positions{$pos}++; $end_positions{$pos+length($label_peptide)}++; $pos++; } } } #### If transmembrane regions topology has been supplied, find the TMRs # my %tmr_start_positions; # my %tmr_end_positions; # my %tmr_color; # my $notes_buffer = ''; # if ($protein_structure->{transmembrane_topology}) { # my $start_side = substr($protein_structure->{transmembrane_topology},0,1); # my $tmp = substr($protein_structure->{transmembrane_topology},1,9999); # my @regions = split(/[io]/,$tmp); # foreach my $region (@regions) { # my ($start,$end) = split(/-/,$region); # $tmr_start_positions{$start-1} = $start_side; # $tmr_color{$start-1} = 'orange'; # if ($start_side eq 'i') { # $start_side = 'o'; # } elsif ($start_side eq 'o') { # $start_side = 'i'; # } else { # $start_side = '?'; # } # $tmr_end_positions{$end} = $start_side; # $tmr_color{$end} = 'orange'; # } # $notes_buffer .= "(Used TMR topology string: #$protein_structure->{transmembrane_topology})
\n"; # #print "[See full TMHMM result]
\n"; # } # # #### If there's a signal peptide, mark it as a blue # if ($protein_structure->{has_signal_peptide} eq 'Y') { # $tmr_start_positions{0} = ''; # $tmr_color{0} = 'blue'; # $tmr_end_positions{$protein_structure->{signal_peptide_length}} = ''; # $tmr_end_positions{$protein_structure->{signal_peptide_length}} = '/' # if ($protein_structure->{signal_peptide_is_cleaved} eq 'Y'); # $tmr_color{$protein_structure->{signal_peptide_length}} = 'orange'; # $notes_buffer = "(signal peptide: Y, length: # $protein_structure->{signal_peptide_length}, cleaved: # $protein_structure->{signal_peptide_is_cleaved}, probability: # $protein_structure->{has_signal_peptide_probability})\n".$notes_buffer; # } # my $seq_length = length($sequence); # my $i = 0; # my $color_level = 0; # my $observed_residues = 0; # # my @annotation_lines; # # while ($i < $seq_length) { # # if ($end_positions{$i}) { # if ($color_level == $end_positions{$i}) { # } # $color_level -= $end_positions{$i} unless ($color_level == 0); # } # # # if ($start_positions{$i}) { # if ($color_level == 0) { # } # $color_level += $start_positions{$i}; # } # # if ($color_level) { # $observed_residues++; # } # # $i++; # if ($i %$line_length == 0) { # } elsif ($enzyme && $enzyme eq 'trypsin') { # if (substr($sequence,$i-1,2) =~ /[RK][A-O,Q-Z]/) { # } # } elsif ($i % $word_length == 0) { # } # } # # my $cnt = 0; # for my $frag ( @{$biosequence->{_non_coverage}} ) { # $cnt += length( $frag->{seq} ); # } # # my %observed = ( start => [], # end => [], # class => 'pa_observed_sequence', # number => 0 ); # # for my $f ( @coverage ) { # push @{$observed{start}}, $f->start() - 1; # push @{$observed{end}}, $f->end() - 1; # $observed{number}++; # } # my $tags = $sbeamsMOD->make_tags( \%observed ); #if ( $args{glyco} ) { # my %gsite = ( start => [], end => [], class => 'pa_glycosite', number => 0 ); # my $sites = $sbeamsMOD->get_site_positions( seq => $sequence, # pattern => 'N[^P][S|T]' ); # for my $site ( @$sites ) { # push @{$gsite{start}}, $site; # push @{$gsite{end}}, $site + 2; # $gsite{number}++; # } #$tags = $sbeamsMOD->make_tags( \%gsite, $tags ); #my %predicted = ( start => [], end => [], class => 'pa_predicted_pep', number => 0); #for my $k ( sort { $a <=> $b } keys( %{$args{glyco}} ) ) { # push @{$predicted{start}}, $k - 1; # my $peptide = $args{glyco}->{$k}; # $peptide =~ s/\W//g; # push @{$predicted{end}}, $k + length( $peptide ) - 2; # $predicted{number}++; #} #$tags = $sbeamsMOD->make_tags( \%predicted, $tags ); #} my $html_seq = $sbeamsMOD->get_html_seq_vars( seq => $sequence ); use Data::Dumper; print "
$html_seq->{seq_display}
\n\n\n"; if ( $args{show_aa_content} ) { my $tot = length( $sequence ); my @aa = split( '', $sequence ); my %aa; # Make sure we get the common AA for my $aa ( qw( A C D E F G H I K L M N P Q R S T V W Y ) ) { $aa{$aa} = 0; } for my $aa ( @aa ) { $aa{$aa}++; } my %colors = ( 10 => '#555566', 9 => '#666677', 8 => '#777788', 7 => '#888899', 6 => '#9999aa', 5 => '#aaaabb', 4 => '#bbbbcc', 3 => '#ccccdd', 2 => '#ddddee', 1 => '#eeeeff', 0 => '#ffffff' ); my $name = 'AA:'; my $count = 'Cnt:'; my $perc = 'Perc:'; for my $aa ( sort( keys( %aa ) ) ) { my $pct = sprintf( "%0.1f", 100*($aa{$aa}/$tot) ); my $color_key = int( $pct ); my $font_color = ( $color_key > 6 ) ? 'white' : 'black'; $color_key = 10 if $color_key > 10; # $name .= "$aa"; $name .= "$aa"; $count .= "$aa{$aa}"; $perc .= "$pct"; } $name .= ''; $count .= ''; $perc .= ''; my $slith = 'slith'; $slith =<<" END";
$name $count $perc

END print $slith; } } # end displayAnnotatedSequence ############################################################################### # getSamples ############################################################################### sub getSamples { my %args = @_; my $SUB_NAME = 'getSamples'; my $sql = qq~ SELECT sample_id,sample_title FROM $TBAT_SAMPLE WHERE record_status != 'D' ORDER BY sample_id ~; my @samples = $sbeams->selectSeveralColumns($sql); return \@samples; } # end getSamples sub getSampleList { my %args = @_; my $SUB_NAME = 'getSampleList'; #### Decode the argument list my $resultset_ref = $args{'resultset_ref'} || die "ERROR[$SUB_NAME]: resultset_ref not passed"; #### Get the hash of indices of the columns my %col = %{$resultset_ref->{column_hash_ref}}; #### Loop over all the peptides my $data_ref = $resultset_ref->{data_ref}; my %observed_samples; foreach my $row (@{$data_ref}) { my $observed_sample_list = $row->[$col{sample_ids}]; my @all = split(/[,;]/,$observed_sample_list); foreach my $element ( @all ) { $observed_samples{$element}++; } } my @keys = keys( %observed_samples ); return \@keys; } ############################################################################### # getProteinStructure ############################################################################### sub getProteinStructure { my %args = @_; my $SUB_NAME = 'getProteinStructure'; #### Decode the argument list my $biosequence_id = $args{'biosequence_id'} || die "ERROR[$SUB_NAME]: biosequence_id not passed"; #### Define query to get information my $sql = qq~ SELECT n_transmembrane_regions,transmembrane_class,transmembrane_topology, has_signal_peptide,has_signal_peptide_probability, signal_peptide_length,signal_peptide_is_cleaved FROM $TBAT_BIOSEQUENCE_PROPERTY_SET WHERE biosequence_id = $biosequence_id ~; my @rows = $sbeams->selectHashArray($sql); if (scalar(@rows) != 1) { my %tmp = (); return(\%tmp); } return($rows[0]); } sub get_table_help { my $name = shift; return '' unless $name; my @entries; my $hidetext; my $showtext; my $heading; my $description; if ( $name eq 'observed_peptides' ) { @entries = ( { key => "Peptide Accession", value => 'Peptide Atlas accession number, beginning with PAp followed by 9 digits.' }, { value => 'Preceding (towards the N terminus) amino acid', key => 'Pre AA' }, { value => 'Amino Acid sequence of this peptide', key => 'Peptide Sequence'}, { value => 'Following (towards the C terminus) amino acid', key => 'Fol AA' }, { value => 'Score derived from peptide probability, EOS, and sequence characteristics such as missed cleavage [MC] or semi-tryptic [ST], or
multiple genome locations [MGL]. These are annotated in red as shown.', key => 'Suitability Score' }, { value => 'Highest PeptideProphet probability for this observed sequence', key => 'Best Prob' }, { value => '', key => 'Best Adjusted Prob'}, { value => 'Total number of observations in all modified forms and charge states', key => 'N Obs' }, { value => 'Empirical Observability Score', key => 'EOS' }, { value => 'SSRCalc Relative Hydrophobicity score', key => 'RHS' }, { value => 'Number of proteins in the reference database to which this peptide maps', key => 'N Protein Mappings' }, { value => 'Number of discrete genome locations which encode this amino acid sequence', key => 'N Genome Locations' }, { value => 'Samples in which this sequence was seen', key => 'Sample IDs' }, { value => 'Observed peptides of which this peptide is a subsequence', key => 'Parent Peptides' }, ); $showtext = 'show column descriptions'; $hidetext = 'hide column descriptions'; $heading = 'Observed Peptides'; $description= 'Peptides observed in MS/MS experiments'; } elsif ( $name eq 'annotated_transitions' ) { @entries = ( { key => 'Sequence', value => 'Amino acid sequence of detected pepide, including any mass modifications.' }, { key => 'Charge', value => 'Charge on Q1 (precursor) peptide ion.' }, { key => 'q1_mz', value => 'Mass to charge ratio of precursor peptide ion.' }, { key => 'q3_mz', value => 'Mass to charge ratio of fragment ion.' }, { key => 'Label', value => 'Ion-series designation for fragment ion (Q3).' }, { key => 'Intensity', value => 'Intensity of peak in CID spectrum' }, { key => 'CE', value => 'Collision energy, the kinetic energy conferred to the peptide ion and resulting in peptide fragmentation. (eV)' }, { key => 'RT', value => 'Peptide retention time( in minutes ) in the LC/MS system.' }, { key => 'SSRCalc', value => "Sequence Specific Retention Factor provides a hydrophobicity measure for each peptide using the algorithm of Krohkin et al. Version 3.0 [more]" }, { key => 'Instr', value => 'Model of mass spectrometer on which transition pair was validated.' }, { key => 'Annotator', value => 'Person/lab who contributed validated transition.' }, { key => 'Quality', value => 'Crude scale of quality for the observation, currently one of Best, OK, and No. ' }, ); $showtext = 'show column descriptions'; $hidetext = 'hide column descriptions'; $heading = 'Annotated Tranitions'; $description= 'Contributed Q1/Q3 transition pairs for SRM experiments'; } return unless @entries; my $help = $sbeamsMOD->get_table_help_section( name => $name, description => $description, heading => $heading, entries => \@entries, showtext => $showtext, hidetext => $hidetext ); return $help; } # end get_table_help ############################################################################### # displayExternalLinksSection # # Display a section for information about this protein in other resources ############################################################################### sub displayExternalLinksSection { my %args = @_; #### Process the arguments list my $biosequence = $args{biosequence} || die("ERROR: No biosequence passed"); #### Create widget to allow show/hide of overview section my ($tr,$link) = $sbeams->make_table_toggle( name => 'getptp_ExternalLinks', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); my $external_links = ''; my $section_header = $sbeamsMOD->encodeSectionHeader( text => 'External Links', link => $link, ); #### Debug if ( 0 == 1 ) { use Data::Dumper; my $tmp = Dumper( $biosequence->{synonyms} ); if ($sbeams->output_mode() eq 'html') { $external_links .= $sbeamsMOD->encodeSectionItem( key => 'All synonyms', tr_info => $tr, value => $tmp, url => "", ); } } #### Display a link to Human Protein Atlas (hpr) my $currentOrganism = $sbeamsMOD->getCurrentAtlasOrganism(parameters_ref=>{}); my $entrezGeneID = $biosequence->{synonyms}->{'Entrez Gene Symbol'}; if ($currentOrganism eq 'Human' && $entrezGeneID) { if ($sbeams->output_mode() eq 'html') { $external_links .= $sbeamsMOD->encodeSectionItem( key => 'Human Protein Atlas', tr_info => $tr, value => qq~
$entrezGeneID~, url => "", ); } } #### Display a link to the Global Proteome Machine (GPM) my $ensemblProtein = $biosequence->{synonyms}->{'Ensembl Protein'}; if ($ensemblProtein) { if ($sbeams->output_mode() eq 'html') { $external_links .= $sbeamsMOD->encodeSectionItem( key => 'Global Proteome Machine', tr_info => $tr, value => $ensemblProtein, url => "http://gpmdb.rockefeller.edu/thegpm-cgi/dblist_label.pl?label=$ensemblProtein", ); } } #### Close section if ($sbeams->output_mode() eq 'html' && $external_links ) { print qq~ $section_header $external_links
~; } return 1; } # end displayExternalLinksSection