#!/usr/local/bin/perl ############################################################################### # $Id: $ # # SBEAMS is Copyright (C) 2000-2014 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. ############################################################################### ############################################################################### # Get the script set up with everything it will need ############################################################################### use strict; use lib qw (../../lib/perl); use Data::Dumper; use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::DataTable; use SBEAMS::Connection::TabMenu; use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; ############################################################################### # Global Variables ############################################################################### my $sbeams = new SBEAMS::Connection; $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); my $atlas = new SBEAMS::PeptideAtlas; $atlas->setSBEAMS($sbeams); # Read input parameters my $params = process_params(); my $show_image = 0; { # Main # Authenticate or exit my $username = $sbeams->Authenticate( allow_anonymous_access => 1) || exit; $sbeams->setSessionAttribute( key => 'PA_resource', value => 'DIAAtlas' ); ## get current settings my $project_id = $sbeams->getCurrent_project_id(); my $page = $sbeams->getGifSpacer( 700 ) . "
\n"; # Get the HTML to display the tabs my $tabMenu = $atlas->getTabMenu( parameters_ref => $params, program_name => 'DIA_processing', ); $page .=<<" END"; $tabMenu
END # Add general section $page .= "
"; $page .= get_form(); # Print what we already have, speed up apparent page loading time. $atlas->display_page_header( onload => "set_toggle_box( 'protein_list_table' );sortables_init()", sortable => 1 ); print $page; $atlas->display_page_footer(); $sbeams->setSessionAttribute( key => 'PA_resource', value => '' ); } # end main sub get_form { my $help = qq~
Custom Library Download

Currently under construction, this form will allow you to run an online analysis of set of SWATH runs with your own custom SWATH library in peakview or TraML format.


~; return $help; my %org2species = ( mtb => 'Mycobacterium tuberculosis', human => 'Homo sapiens', ecoli => 'Eschiera coli', yeast => 'Saccharomyces cerevisie' ); my %orgs; my %inst; while ( my $line = ) { next if $line =~ /^\s*$/; chomp $line; my @row = split( /\t/, $line ); $orgs{$row[0]} ||= []; push ( @{$orgs{$row[0]}}, $row[2] ); my @name = split( /_/, $row[2] ); my $inst = $name[1]; if ( $name[1] =~ /Dirty/ ) { $inst = $name[2]; } $inst =~ s/\.\w+//; $inst{$inst}++; } my $org_select = ""; for my $org ( sort( keys( %orgs ) ) ) { $org_select .= "\n"; for my $file ( @{$orgs{$org}} ) { $lib_select .= "\n"; } } $org_select .= ""; $lib_select .= ""; my $inst_select = ""; my $sql =<<" END"; SELECT title, first_name, last_name, protein_list_id FROM $TBAT_DOMAIN_PROTEIN_LIST DPL JOIN $TB_CONTACT C ON DPL.owner_contact_id = C.contact_id ORDER BY title END my $list_select = "\n"; $log->info( $list_select ); my $radio = qq~ Upload protein list    HPP B/D list ~; my $upload = ""; #
HPP B/D list:
$list_select # $radio my $form = qq~ $help

~; return $form; } sub get_table_help { my %args = @_; my $name = $args{table}; return '' unless $name; $args{mode} ||= 'section'; my @entries; my $hidetext; my $showtext; my $heading; my $description; if ( $name eq 'build' ) { @entries = ( { key => 'Build Name', value => 'The simple name for this build, usually contains organism, prophet cutoff, and other information. ' }, { key => 'Build Description', value => 'More detailed information about build. ' }, { key => 'Reference Database', value => 'Database to which peptides were mapped, generally different than search database. This mapping is done by running BLAST, and allows the peptides to be mapped the the organism\'s genomic sequence. ' }, { key => 'Build Date', value => 'Date upon which build was finished. ' }, { key => '# Samples', value => 'The number of individual samples which comprise this build. Each sample contains one or more LCMS/MS runs, and generally corresponds to a single scientific experiment.' } , { key => 'Distinct Peptides', value => 'This shows the number of distinct peptide sequences that were seen in this build. Observations of the peptide in different charge states or with different modifications are coalesced.' } , { key => 'Total Observations ', value => 'The total number of spectra that yeilded identifications above the build threshold. Observations of the same base peptide sequences multiple times or in various charge states/modifications, whould each contribute to the total' } ); $showtext = 'show row descriptions'; $hidetext = 'hide row descriptions'; $heading = 'Build Overview'; $description= 'These values pertain to the atlas build as a whole'; } elsif ( $name eq 'batch' ) { @entries = ( { key => 'ID', value => 'Database ID for this sample (search batch) ' }, { key => 'Sample_Name', value => 'Simple name for this sample/experiment. ' }, { key => '#_Spectra_Searched', value => 'The total number of spectra searched in the sample. ' }, { key => "#_Spectra_ID'd", value => 'The number of spectra identifed with a probability greater than the atlas threshold ' }, { key => '#_Distinct', value => 'The number of distinct peptide sequences, seen more than once (multiobs), in this build that are seen in this sample. ' }, { key => '#_Unique', value => "The number of distinct, multiobs peptides that are seen only in this sample (unique contribution). This discriminates against smaller samples, and is less useful in atlas' with a large number of samples. " }, { key => '#_Progressive', value => 'Order-dependent unique multiobs peptides contributed by a given sample. The contribution for each sample is based on the samples that have gone before it, so later samples tend to have a lower progressive contribution. ' }, { key => '#_Cumulative', value => 'Order-dependent cumulative number of unique multiobs peptides contributed to build by this and previous samples. ' }, { key => '#_Proteins', value => 'The number of canonical (highly distinguishable, non-redundant) protein sequences identified from the peptides in this sample.' }, { key => '#_Cum_Prots', value => 'Order-dependent cumulative number of canonical proteins contributed to build by this and previous samples.
Counts non-human contaminants, so final tally may be greater than Canonical Proteins count in Build Overview. ' }, # { key => 'Sens', value => 'The sensitivity of the Peptide Prophet model at a probablility of 0.9, the percent of true positives that would be included at that threshold was used as a cutoff. ' }, { key => 'FDR_(%)', value => 'The error rate of peptides above the threshold Peptide Prophet model at a probablility of 0.9, the percent of false positives that would be included at the build threshold. ' }, { key => 'Sample_Date', value => ''}, ); $heading = 'Sample Overview'; $description = 'These values pertain to individual samples within the atlas'; } elsif ( $name eq 'mayu' ) { @entries = ( { key => 'nr_runs', value => 'Number of MS runs contributing to this build '}, { key => 'nr_files', value => 'Always 1 '}, { key => 'mFDR', value => 'Data in current row applies to all data meting this PSM (spectrum) FDR threshold. '}, { key => 'target_PSM', value => 'Number of non-decoy PSMs at this mFDR (counts peptides mappable to protein reference set only)'}, { key => 'decoy_PSM', value => 'Number of decoy PSMs at this mFDR '}, { key => 'FP_PSM', value => 'Number of false positive PSMs predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '}, { key => 'TP_PSM', value => 'target_PSM - FP_PSM '}, { key => 'target_pepID', value => 'Number of non-decoy unique peptides at this mFDR (counts peptides mappable to protein reference set only) '}, { key => 'decoy_pepID', value => 'Number of decoy unique peptides at this mFDR '}, { key => 'FP_pepID', value => 'Number of false positive unique peptides predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '}, { key => 'FP_pepID_stdev', value => ' '}, { key => 'TP_pepID', value => 'target_pepID - FP_pepID '}, { key => 'pepFDR', value => 'Peptide FDR (unique peptides)'}, { key => 'target_protID', value => 'Number of non-decoy protein identifications at this mFDR. Applied to the covering set of proteins -- a set that is close to the smallest necessary to explain all the pepIDs. Includes all canonicals and some possibly_distinguished. '}, { key => 'decoy_protID', value => 'Number of decoy protein identifications at this mFDR. '}, { key => 'FP_protID', value => 'Number of false postiive protein identifications predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '}, { key => 'FP_protID_stdev', value => ' '}, { key => 'TP_protID', value => 'target_protID - FP_protID '}, { key => 'protFDR', value => 'Protein FDR. The largest value in this column is the protein FDR for the entire build. '}, { key => 'lFDR1, lFDR5, lFDR10,2 lFDR5', value => 'Local protein FDR, computed over the previous step (i.e. between the previous row in the table and the current row), the previous 5 steps, the previous 10 steps, and the previous 25 steps.
Often there are fewer than 25 rows in the table, in which case column lFDR25 is uninformative. '}, { key => 'target_protIDs, decoy_protIDs, etc.', value => 'Same as above, except for singleton proteins (those identified by only one PSM) only. '}, { key => 'target_protIDns, decoy_protIDns, etc.', value => 'Same as above, except for multiply-observed proteins only. '}, ); $heading = 'Mayu'; $description = 'Reiter L, Claassen M, et al., Protein identification false discovery rates for very large proteomics data sets generated by tandem mass spectrometry, Mol Cell Proteomics. 2009 Nov;8(11):2405-17 '; } return unless @entries; return \@entries if $args{mode} eq 'entries_only'; my $help = $atlas->get_table_help_section( name => $name, description => $description, heading => $heading, entries => \@entries, showtext => $showtext, hidetext => $hidetext ); return $help; } # end get_table_help # General list information sub get_list_overview { my $build_id = shift; # Get a list of accessible project_ids my @project_ids = $sbeams->getAccessibleProjects(); my $project_ids = join( ",", @project_ids ) || '0'; my $info = $sbeams->selectrow_hashref( <<" BUILD" ); SELECT title, description, n_proteins, original_file, protein_list_id, contributors, url, image_path, abstract, image_caption FROM $TBAT_DOMAIN_PROTEIN_LIST WHERE protein_list_id = $params->{protein_list_id} AND record_status <> 'D' AND project_id IN ( $project_ids ); BUILD my $table = "
Library:$lib_select
Organism:$org_select
Instrument:$inst_select
Upload File:
$upload
\n"; my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'build_overview', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); $table .= $atlas->encodeSectionHeader( text => ' Protein List Overview', span => 4, link => $link ); for my $key ( keys( %{$info} ) ) { $info->{lc($key)} = $info->{$key}; } my $file_url = "$CGI_BASE_DIR/PeptideAtlas/ManageTable.cgi/$info->{original_file}?TABLE_NAME=AT_Domain_Protein_list;ForceDownload=1;protein_list_id=" . $info->{protein_list_id} . ";GetFile=original_file"; my $file_link = "$info->{original_file}"; my $spc = $sbeams->getGifSpacer(500); my $tab = $sbeams->getGifSpacer(25); my $img = ''; my $caption = ''; if ( $info->{image_path} ) { my @path = split( /\//, $info->{image_path}); my $web_path = "/devDC/sbeams/images/$path[$#path]"; $img = "$tab\n"; $caption = $info->{image_caption} || ''; } my $abstract = ''; if ( $info->{abstract} ) { $abstract = $info->{abstract}; $abstract =~ s/\r//; $abstract =~ s/\n/
/g; } $table .= $atlas->encodeSectionItem( key => 'List Name', tr_info => $tr, value => $info->{title} . $spc, vspan => 3 ) . "\n"; $table .= $atlas->encodeSectionItem( key => 'Contributors', tr_info => $tr, value => $info->{contributors}, vspan => 3 ) . "\n"; $table .= $atlas->encodeSectionItem( key => 'Description', tr_info => $tr, value => $info->{description}, vspan => 3 ) . "\n"; $table .= $atlas->encodeSectionItem( key => 'Number of Proteins', tr_info => $tr, value => $info->{n_proteins}, vspan => 3 ) . "\n"; $table .= $atlas->encodeSectionItem( key => 'Original File', tr_info => $tr, value => $file_link, vspan => 3 ) . "\n"; $table .= ""; $table .= ""; $table .= "
$img$caption
$abstract
\n"; return $table; } # Peptide build stats sub get_list_table { my $build_id = shift; # Get a list of accessible project_ids my @project_ids = $sbeams->getAccessibleProjects(); my $project_ids = join( ",", @project_ids ) || '0'; my $table = "\n"; my $sql =<<" END"; SELECT list_protein_id, uniprot_accession, protein_symbol, original_name, protein_full_name, gene_symbol, comment, priority FROM $TBAT_DOMAIN_LIST_PROTEIN WHERE protein_list_id = $params->{protein_list_id} ORDER BY uniprot_accession ASC END my @samples; my $sth = $sbeams->get_statement_handle( $sql ); while( my @row = $sth->fetchrow_array() ) { $row[0] = ""; $row[3] ||= $row[1]; $row[7] = $sbeams->makeInfoText( "n/a" ); $row[4] = $sbeams->truncateStringWithMouseover( string => $row[4], len => 60, nowrap => 1 ); $row[6] = $sbeams->truncateStringWithMouseover( string => $row[6], len => 60 ); push @samples, \@row; } my $dag = ''; my @headings = ( "" => 'list_protein_id', UniProt => 'uniprot_accession', ProteinSymbol => 'protein_symbol', OriginalName => 'original_name', ProteinFullName => 'protein_full_name', GeneSymbol => 'gene_symbol', Comment => 'comment', Priority => 'priority' ); # for my $h ( @headings ) { # $log->info( $h ); # } my $headings_ref = $atlas->make_sort_headings( headings => \@headings, default => 'UniProt', asc => 1 ); # $table .= $atlas->encodeSectionHeader( # text => 'List Proteins', # width => 920 # ); my $table = SBEAMS::Connection::DataTable->new( class => 'scrolltable', id => 'protein_list_table', '__use_thead' => 1 ); $table->addRow( $headings_ref ); $table->setRowAttr( ROWS => [1], BGCOLOR => '#0000A0', CLASS => 'sortheader' ); my $rnum = 2; for my $row ( @samples ) { $table->addRow( $row ); $table->setRowAttr( ROWS => [$rnum], BGCOLOR => '#EAEAEA' ); $rnum++; } # $atlas->encodeSectionTable( rows => [ $headings_ref, @samples ], # header => 1, # nowrap => [1..scalar(@headings)], # table_id => 'protein_list_table', # class => 'scrolltable', # align => [ qw(center left right right right right right right right right right center) ], # bg_color => '#EAEAEA', # sortable => 1 ); # $table =~ s/()/$1<\/THEAD>/m; # die Dumper( $table ); my $btxt = get_btxt( qw( Selected All Uncheck Check Search ) ); # die Dumper( $btxt ); my $fbox_help = ''; my $cbox_help = ''; my $show_help = ''; my $submit_help = ''; my $spc = ' ' x 3; my $fbox = "Filter List: $spc"; my $atlas_select = ""; my $submit = "$atlas_select $spc"; my $show = "Show: $spc"; my $build_id = ( $sbeams->isGuestUser() ) ? 146 : 146; my $form = qq~ ~; my $old_school = 0; my $controls = ''; if ( $old_school ) { $controls = qq~
' . $sbeams->makeInfoText( "Text entered into the box will filter the protein list (all fields)" ) . ' ' . $sbeams->makeInfoText( "Use these buttons to check or uncheck the visible proteins" ) . ' ' . $sbeams->makeInfoText( "Use these buttons to show all your selected proteins or the entire list (resets filter box)" ) . ' ' . $sbeams->makeInfoText( "This will submit your list of selected proteins to the SRM Atlas for a transitions query" ) . '
$fbox_help $cbox_help $show_help $submit_help $table ~; } else { $controls = qq~
Filter List:
Visible Checkboxes:
Show:
SRM Atlas:
  $show
$table
$submit  
~; } $form .= $controls; return ( "$form" ); } sub get_btxt { my @tags = @_; my $max = 0; for my $tag ( @tags ) { $max = length( $tag ) if length( $tag ) > $max; } my %btxt; for my $tag ( @tags ) { my $len = length( $tag ); my $delta = $max - $len; my $pad = 0; if ( $delta ) { $pad = int($delta/2 + 0.5); } my $prepad = ' ' x $pad; my $postpad = ' ' x ($max - $len - $pad); $btxt{$tag} = $prepad . $tag . $postpad; } return \%btxt; } sub get_list_selector { #
Foofoo
my $sql =<<" END"; SELECT title, first_name, last_name, protein_list_id FROM $TBAT_DOMAIN_PROTEIN_LIST DPL JOIN $TB_CONTACT C ON DPL.owner_contact_id = C.contact_id ORDER BY title END my $sth = $sbeams->get_statement_handle( $sql ); my $help = qq~
Protein List Query

This tool helps you select proteins to explore in PeptideAtlas or SRMAtlas based on lists of high-priority proteins from the various participating groups of the Biology/Disease-driven Human Proteome Project (B/D-HPP).


~; my $select = qq~ $help
Choose an HPP B/D protein list to explore:   


~; # die Dumper($select); return $select; } sub process_params { my $params = {}; $sbeams->parse_input_parameters( q => $q, parameters_ref => $params ); $sbeams->processStandardParameters( parameters_ref => $params ); $params->{mode} ||= 'download_libs'; return( $params ); } sub get_build_path { my %args = @_; return unless $args{build_id}; my $path = $atlas->getAtlasBuildDirectory( atlas_build_id => $args{build_id} ); $path =~ s/DATA_FILES//; return $path; } sub get_draw_chart_function { my $sample_arrayref = shift || return ''; my @samples; for my $s ( @{$sample_arrayref} ) { push @samples, [ $s->[1], $s->[4], $s->[7] ]; } my $GV = SBEAMS::Connection::GoogleVisualization->new(); my ( $chart ) = $GV->setDrawBarChart( samples => \@samples, data_types => [ 'string', 'number', 'number' ], headings => [ 'Sample', 'Distinct peptides (n_obs > 1)', 'Cumulative peptides (n_obs > 1)' ], show_table => 0, truncate_labels => 24 ); my $header = $GV->getHeaderInfo(); return ( $chart, $header ); }