#!/usr/local/bin/perl ############################################################################### # $Id: $ # # SBEAMS is Copyright (C) 2000-2014 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. ############################################################################### ############################################################################### # Get the script set up with everything it will need ############################################################################### use strict; use lib qw (../../lib/perl); use Data::Dumper; use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::DataTable; use SBEAMS::Connection::TabMenu; use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; ############################################################################### # Global Variables ############################################################################### my $sbeams = new SBEAMS::Connection; $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); my $atlas = new SBEAMS::PeptideAtlas; $atlas->setSBEAMS($sbeams); # Read input parameters my $params = process_params(); my $show_image = 0; { # Main # Authenticate or exit my $username = $sbeams->Authenticate( allow_anonymous_access => 1) || exit; $sbeams->setSessionAttribute( key => 'PA_resource', value => 'DIAAtlas' ); my $program_name = ( $params->{mode} =~ /download/ ) ? 'DIA_library_download' : 'DIA_library_subset'; ## get current settings my $project_id = $sbeams->getCurrent_project_id(); my $page = $sbeams->getGifSpacer( 700 ) . "
\n"; # Get the HTML to display the tabs my $tabMenu = $atlas->getTabMenu( parameters_ref => $params, program_name => $program_name, ); $page .=<<" END"; $tabMenu
END # Add general section my $load_script = "set_toggle_box( 'protein_list_table' );sortables_init()"; $params->{mode} ||= 'download_libs'; if ( $params->{mode} eq 'subset_libs' ) { $page .= get_subset_form(); $load_script = ''; } else { $page .= get_library_table(); } $page .= "
"; # Print what we already have, speed up apparent page loading time. $atlas->display_page_header( onload => $load_script, sortable => 1 ); print $page; $atlas->display_page_footer(); } # end main sub get_table_help { my %args = @_; my $name = $args{table}; return '' unless $name; $args{mode} ||= 'section'; my @entries; my $hidetext; my $showtext; my $heading; my $description; if ( $name eq 'build' ) { @entries = ( { key => 'Build Name', value => 'The simple name for this build, usually contains organism, prophet cutoff, and other information. ' }, { key => 'Build Description', value => 'More detailed information about build. ' }, { key => 'Reference Database', value => 'Database to which peptides were mapped, generally different than search database. This mapping is done by running BLAST, and allows the peptides to be mapped the the organism\'s genomic sequence. ' }, { key => 'Build Date', value => 'Date upon which build was finished. ' }, { key => '# Samples', value => 'The number of individual samples which comprise this build. Each sample contains one or more LCMS/MS runs, and generally corresponds to a single scientific experiment.' } , { key => 'Distinct Peptides', value => 'This shows the number of distinct peptide sequences that were seen in this build. Observations of the peptide in different charge states or with different modifications are coalesced.' } , { key => 'Total Observations ', value => 'The total number of spectra that yeilded identifications above the build threshold. Observations of the same base peptide sequences multiple times or in various charge states/modifications, whould each contribute to the total' } ); $showtext = 'show row descriptions'; $hidetext = 'hide row descriptions'; $heading = 'Build Overview'; $description= 'These values pertain to the atlas build as a whole'; } elsif ( $name eq 'batch' ) { @entries = ( { key => 'ID', value => 'Database ID for this sample (search batch) ' }, { key => 'Sample_Name', value => 'Simple name for this sample/experiment. ' }, { key => '#_Spectra_Searched', value => 'The total number of spectra searched in the sample. ' }, { key => "#_Spectra_ID'd", value => 'The number of spectra identifed with a probability greater than the atlas threshold ' }, { key => '#_Distinct', value => 'The number of distinct peptide sequences, seen more than once (multiobs), in this build that are seen in this sample. ' }, { key => '#_Unique', value => "The number of distinct, multiobs peptides that are seen only in this sample (unique contribution). This discriminates against smaller samples, and is less useful in atlas' with a large number of samples. " }, { key => '#_Progressive', value => 'Order-dependent unique multiobs peptides contributed by a given sample. The contribution for each sample is based on the samples that have gone before it, so later samples tend to have a lower progressive contribution. ' }, { key => '#_Cumulative', value => 'Order-dependent cumulative number of unique multiobs peptides contributed to build by this and previous samples. ' }, { key => '#_Proteins', value => 'The number of canonical (highly distinguishable, non-redundant) protein sequences identified from the peptides in this sample.' }, { key => '#_Cum_Prots', value => 'Order-dependent cumulative number of canonical proteins contributed to build by this and previous samples.
Counts non-human contaminants, so final tally may be greater than Canonical Proteins count in Build Overview. ' }, # { key => 'Sens', value => 'The sensitivity of the Peptide Prophet model at a probablility of 0.9, the percent of true positives that would be included at that threshold was used as a cutoff. ' }, { key => 'FDR_(%)', value => 'The error rate of peptides above the threshold Peptide Prophet model at a probablility of 0.9, the percent of false positives that would be included at the build threshold. ' }, { key => 'Sample_Date', value => ''}, ); $heading = 'Sample Overview'; $description = 'These values pertain to individual samples within the atlas'; } elsif ( $name eq 'mayu' ) { @entries = ( { key => 'nr_runs', value => 'Number of MS runs contributing to this build '}, { key => 'nr_files', value => 'Always 1 '}, { key => 'mFDR', value => 'Data in current row applies to all data meting this PSM (spectrum) FDR threshold. '}, { key => 'target_PSM', value => 'Number of non-decoy PSMs at this mFDR (counts peptides mappable to protein reference set only)'}, { key => 'decoy_PSM', value => 'Number of decoy PSMs at this mFDR '}, { key => 'FP_PSM', value => 'Number of false positive PSMs predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '}, { key => 'TP_PSM', value => 'target_PSM - FP_PSM '}, { key => 'target_pepID', value => 'Number of non-decoy unique peptides at this mFDR (counts peptides mappable to protein reference set only) '}, { key => 'decoy_pepID', value => 'Number of decoy unique peptides at this mFDR '}, { key => 'FP_pepID', value => 'Number of false positive unique peptides predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '}, { key => 'FP_pepID_stdev', value => ' '}, { key => 'TP_pepID', value => 'target_pepID - FP_pepID '}, { key => 'pepFDR', value => 'Peptide FDR (unique peptides)'}, { key => 'target_protID', value => 'Number of non-decoy protein identifications at this mFDR. Applied to the covering set of proteins -- a set that is close to the smallest necessary to explain all the pepIDs. Includes all canonicals and some possibly_distinguished. '}, { key => 'decoy_protID', value => 'Number of decoy protein identifications at this mFDR. '}, { key => 'FP_protID', value => 'Number of false postiive protein identifications predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '}, { key => 'FP_protID_stdev', value => ' '}, { key => 'TP_protID', value => 'target_protID - FP_protID '}, { key => 'protFDR', value => 'Protein FDR. The largest value in this column is the protein FDR for the entire build. '}, { key => 'lFDR1, lFDR5, lFDR10,2 lFDR5', value => 'Local protein FDR, computed over the previous step (i.e. between the previous row in the table and the current row), the previous 5 steps, the previous 10 steps, and the previous 25 steps.
Often there are fewer than 25 rows in the table, in which case column lFDR25 is uninformative. '}, { key => 'target_protIDs, decoy_protIDs, etc.', value => 'Same as above, except for singleton proteins (those identified by only one PSM) only. '}, { key => 'target_protIDns, decoy_protIDns, etc.', value => 'Same as above, except for multiply-observed proteins only. '}, ); $heading = 'Mayu'; $description = 'Reiter L, Claassen M, et al., Protein identification false discovery rates for very large proteomics data sets generated by tandem mass spectrometry, Mol Cell Proteomics. 2009 Nov;8(11):2405-17 '; } return unless @entries; return \@entries if $args{mode} eq 'entries_only'; my $help = $atlas->get_table_help_section( name => $name, description => $description, heading => $heading, entries => \@entries, showtext => $showtext, hidetext => $hidetext ); return $help; } # end get_table_help # General list information sub get_list_overview { my $build_id = shift; # Get a list of accessible project_ids my @project_ids = $sbeams->getAccessibleProjects(); my $project_ids = join( ",", @project_ids ) || '0'; my $info = $sbeams->selectrow_hashref( <<" BUILD" ); SELECT title, description, n_proteins, original_file, protein_list_id, contributors, url, image_path, abstract, image_caption FROM $TBAT_DOMAIN_PROTEIN_LIST WHERE protein_list_id = $params->{protein_list_id} AND record_status <> 'D' AND project_id IN ( $project_ids ); BUILD my $table = "\n"; my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'build_overview', visible => 1, tooltip => 'Show/Hide Section', imglink => 1, sticky => 1 ); $table .= $atlas->encodeSectionHeader( text => ' Protein List Overview', span => 4, link => $link ); for my $key ( keys( %{$info} ) ) { $info->{lc($key)} = $info->{$key}; } my $file_url = "$CGI_BASE_DIR/PeptideAtlas/ManageTable.cgi/$info->{original_file}?TABLE_NAME=AT_Domain_Protein_list;ForceDownload=1;protein_list_id=" . $info->{protein_list_id} . ";GetFile=original_file"; my $file_link = "$info->{original_file}"; my $spc = $sbeams->getGifSpacer(500); my $tab = $sbeams->getGifSpacer(25); my $img = ''; my $caption = ''; if ( $info->{image_path} ) { my @path = split( /\//, $info->{image_path}); my $web_path = "/devDC/sbeams/images/$path[$#path]"; $img = "$tab\n"; $caption = $info->{image_caption} || ''; } my $abstract = ''; if ( $info->{abstract} ) { $abstract = $info->{abstract}; $abstract =~ s/\r//; $abstract =~ s/\n/
/g; } $table .= $atlas->encodeSectionItem( key => 'List Name', tr_info => $tr, value => $info->{title} . $spc, vspan => 3 ) . "\n"; $table .= $atlas->encodeSectionItem( key => 'Contributors', tr_info => $tr, value => $info->{contributors}, vspan => 3 ) . "\n"; $table .= $atlas->encodeSectionItem( key => 'Description', tr_info => $tr, value => $info->{description}, vspan => 3 ) . "\n"; $table .= $atlas->encodeSectionItem( key => 'Number of Proteins', tr_info => $tr, value => $info->{n_proteins}, vspan => 3 ) . "\n"; $table .= $atlas->encodeSectionItem( key => 'Original File', tr_info => $tr, value => $file_link, vspan => 3 ) . "\n"; $table .= ""; $table .= ""; $table .= "
$img$caption
$abstract
\n"; return $table; } # Peptide build stats sub get_library_table { my $build_id = shift; my $table = "\n"; my %libs; my %org2species = ( mtb => 'Mycobacterium tuberculosis', human => 'Homo sapiens', ecoli => 'Eschiera coli', yeast => 'Saccharomyces cerevisie' ); while ( my $line = ) { next if $line =~ /^\s*$/; chomp $line; my @row = split( /\t/, $line ); $libs{$row[0]} ||= {}; my @names = split( /\./, $row[2] ); my $basename = $names[0]; $basename =~ s/_peakview//; $libs{$row[0]}->{$basename} ||= {}; $row[1] = "/devDC/sbeams/tmp/download/PeptideAtlas/GetDIALibs/$row[0]/"; if ( $row[2] =~ /sptxt/i ) { $libs{$row[0]}->{$basename}->{sptxt} = $row[1] . '/' . $row[2]; } elsif ( $row[2] =~ /traml/i ) { $libs{$row[0]}->{$basename}->{traml} = $row[1] . '/' . $row[2]; } elsif ( $row[2] =~ /peakview/i ) { $libs{$row[0]}->{$basename}->{peakview} = $row[1] . '/' . $row[2]; } if ( $row[2] =~ /Orbi/i ) { $libs{$row[0]}->{$basename}->{instrument} = 'Orbitrap'; } elsif ( $row[2] =~ /QTrap4000/i ) { $libs{$row[0]}->{$basename}->{instrument} = 'QTrap4000'; } elsif ( $row[2] =~ /QTrap5500/i ) { $libs{$row[0]}->{$basename}->{instrument} = 'QTrap5500'; } elsif ( $row[2] =~ /TripleTOF/i ) { $libs{$row[0]}->{$basename}->{instrument} = 'TripleTOF'; } elsif ( $row[2] =~ /QTOF/i ) { $libs{$row[0]}->{$basename}->{instrument} = 'QTOF'; } else { $libs{$row[0]}->{$basename}->{instrument} = 'Other'; } } my @samples; for my $org ( sort( keys( %libs ) ) ) { for my $base ( sort( keys( %{$libs{$org}} ) ) ) { $log->info( "ORG $org and base $base" ); my $downloads; my $data; for my $type ( qw( sptxt peakview traml ) ) { my $link = ' ' x length( $type ); my $con = ( $data ) ? ', ' : ''; if ( $libs{$org}->{$base}->{$type} ) { $link = "{$base}->{$type}>$type"; } $data = $data . $con . $link; } my @row = ( '', $org2species{$org}, $base, $libs{$org}->{$base}->{instrument}, $data ); push @samples, \@row; } } my $dag = ''; my @headings = ( 'spc' => '', Organism => 'Organism', Library => 'Library', Instrument => 'Instrument', Downloads => 'Downloads' ); # for my $h ( @headings ) { # $log->info( $h ); # } my $headings_ref = $atlas->make_sort_headings( headings => \@headings, default => 'Organism', asc => 1 ); # $table .= $atlas->encodeSectionHeader( # text => 'List Proteins', # width => 920 # ); my $table = SBEAMS::Connection::DataTable->new( class => 'scrolltable', id => 'protein_list_table', '__use_thead' => 1 ); $table->addRow( $headings_ref ); $table->setRowAttr( ROWS => [1], BGCOLOR => '#0000A0', CLASS => 'sortheader' ); my $rnum = 2; for my $row ( @samples ) { $table->addRow( $row ); $table->setRowAttr( ROWS => [$rnum], BGCOLOR => '#EAEAEA' ); $rnum++; } # $atlas->encodeSectionTable( rows => [ $headings_ref, @samples ], # header => 1, # nowrap => [1..scalar(@headings)], # table_id => 'protein_list_table', # class => 'scrolltable', # align => [ qw(center left right right right right right right right right right center) ], # bg_color => '#EAEAEA', # sortable => 1 ); # $table =~ s/()/$1<\/THEAD>/m; # die Dumper( $table ); my $btxt = get_btxt( qw( Selected All Uncheck Check Search ) ); # die Dumper( $btxt ); my $fbox_help = ''; my $cbox_help = ''; my $show_help = ''; my $submit_help = ''; my $spc = ' ' x 3; my $fbox = "Filter List: $spc"; my $atlas_select = ""; my $submit = "$atlas_select $spc"; my $show = "Show: $spc"; my $build_id = ( $sbeams->isGuestUser() ) ? 146 : 146; my $form = qq~ ~; my $old_school = 0; my $controls = qq~
' . $sbeams->makeInfoText( "Text entered into the box will filter the protein list (all fields)" ) . ' ' . $sbeams->makeInfoText( "Use these buttons to check or uncheck the visible proteins" ) . ' ' . $sbeams->makeInfoText( "Use these buttons to show all your selected proteins or the entire list (resets filter box)" ) . ' ' . $sbeams->makeInfoText( "This will submit your list of selected proteins to the SRM Atlas for a transitions query" ) . '
 
$table
~; $form .= $controls; return ( "$form" ); } sub get_btxt { my @tags = @_; my $max = 0; for my $tag ( @tags ) { $max = length( $tag ) if length( $tag ) > $max; } my %btxt; for my $tag ( @tags ) { my $len = length( $tag ); my $delta = $max - $len; my $pad = 0; if ( $delta ) { $pad = int($delta/2 + 0.5); } my $prepad = ' ' x $pad; my $postpad = ' ' x ($max - $len - $pad); $btxt{$tag} = $prepad . $tag . $postpad; } return \%btxt; } sub get_subset_form { my $help = qq~
Protein List Query

Currently being implemented, this form will allow you to a list of proteins to extract a subset SWATH/DIA library. This can be done with a user-uploaded list, or with any of the Biology/Disease-driven Human Proteome Project (B/D-HPP) contributed lists.


~; return $help; my %org2species = ( mtb => 'Mycobacterium tuberculosis', human => 'Homo sapiens', ecoli => 'Eschiera coli', yeast => 'Saccharomyces cerevisie' ); my %orgs; my %inst; while ( my $line = ) { next if $line =~ /^\s*$/; chomp $line; my @row = split( /\t/, $line ); $orgs{$row[0]} ||= []; push ( @{$orgs{$row[0]}}, $row[2] ); my @name = split( /_/, $row[2] ); my $inst = $name[1]; if ( $name[1] =~ /Dirty/ ) { $inst = $name[2]; } $inst =~ s/\.\w+//; $inst{$inst}++; } my $org_select = ""; for my $org ( sort( keys( %orgs ) ) ) { $org_select .= "\n"; for my $file ( @{$orgs{$org}} ) { $lib_select .= "\n"; } } $org_select .= ""; $lib_select .= ""; my $inst_select = ""; my $sql =<<" END"; SELECT title, first_name, last_name, protein_list_id FROM $TBAT_DOMAIN_PROTEIN_LIST DPL JOIN $TB_CONTACT C ON DPL.owner_contact_id = C.contact_id ORDER BY title END my $list_select = "\n"; $log->info( $list_select ); my $radio = qq~ Upload protein list    HPP B/D list ~; my $upload = ""; my $form = qq~ $help


~; return $form; } sub process_params { my $params = {}; $sbeams->parse_input_parameters( q => $q, parameters_ref => $params ); $sbeams->processStandardParameters( parameters_ref => $params ); $params->{mode} ||= 'download_libs'; return( $params ); } sub get_build_path { my %args = @_; return unless $args{build_id}; my $path = $atlas->getAtlasBuildDirectory( atlas_build_id => $args{build_id} ); $path =~ s/DATA_FILES//; return $path; } sub get_draw_chart_function { my $sample_arrayref = shift || return ''; my @samples; for my $s ( @{$sample_arrayref} ) { push @samples, [ $s->[1], $s->[4], $s->[7] ]; } my $GV = SBEAMS::Connection::GoogleVisualization->new(); my ( $chart ) = $GV->setDrawBarChart( samples => \@samples, data_types => [ 'string', 'number', 'number' ], headings => [ 'Sample', 'Distinct peptides (n_obs > 1)', 'Cumulative peptides (n_obs > 1)' ], show_table => 0, truncate_labels => 24 ); my $header = $GV->getHeaderInfo(); return ( $chart, $header ); } __DATA__ mtb /net/dblocal/wwwspecial/mrmatlas/mtb Mtb_DirtyPeptides_ORBI_filtered_cons.sptxt.gz mtb /net/dblocal/wwwspecial/mrmatlas/mtb Mtb_DirtyPeptides_ORBI_filtered_cons.traml mtb /net/dblocal/wwwspecial/mrmatlas/mtb Mtb_DirtyPeptides_ORBI_filtered_cons_peakview.tsv mtb /net/dblocal/wwwspecial/mrmatlas/mtb Mtb_DirtyPeptides_TripleTOF_filtered_cons.sptxt.gz mtb /net/dblocal/wwwspecial/mrmatlas/mtb Mtb_DirtyPeptides_TripleTOF_filtered_cons.traml mtb /net/dblocal/wwwspecial/mrmatlas/mtb Mtb_DirtyPeptides_TripleTOF_filtered_cons_peakview.tsv yeast /regis/sbeams/bin/openswath/data/yeast Yeast_Orbitrap_DP_final_peakview.tsv yeast /regis/sbeams/bin/openswath/data/yeast Yeast_Orbitrap_DP_final.sptxt yeast /regis/sbeams/bin/openswath/data/yeast Yeast_Orbitrap_DP_final.traml yeast /regis/sbeams/bin/openswath/data/yeast Yeast_QTrap4000_DP_final_peakview.tsv yeast /regis/sbeams/bin/openswath/data/yeast Yeast_QTrap4000_DP_final.sptxt yeast /regis/sbeams/bin/openswath/data/yeast Yeast_QTrap4000_DP_final.traml ecoli /regis/sbeams/bin/openswath/data/ecoli ecoli_TripleTOF_peakview.tsv ecoli /regis/sbeams/bin/openswath/data/ecoli ecoli_TripleTOF.sptxt ecoli /regis/sbeams/bin/openswath/data/ecoli ecoli_TripleTOF.traml human /regis/sbeams/bin/openswath/data/human human_QTrap5500_peakview.tsv human /regis/sbeams/bin/openswath/data/human human_QTrap5500.sptxt human /regis/sbeams/bin/openswath/data/human human_QTrap5500.traml human /regis/sbeams/bin/openswath/data/human human_QTOF_peakview.tsv human /regis/sbeams/bin/openswath/data/human human_QTOF.sptxt human /regis/sbeams/bin/openswath/data/human human_QTOF.traml
Library:$lib_select
Organism:$org_select
Instrument:$inst_select
$radio
Upload File:
$upload
HPP B/D list:
$list_select