#!/usr/local/bin/perl
###############################################################################
# $Id: $
#
# SBEAMS is Copyright (C) 2000-2014 Institute for Systems Biology
# This program is governed by the terms of the GNU General Public License (GPL)
# version 2 as published by the Free Software Foundation. It is provided
# WITHOUT ANY WARRANTY. See the full description of GPL terms in the
# LICENSE file distributed with this software.
###############################################################################
###############################################################################
# Get the script set up with everything it will need
###############################################################################
use strict;
use lib qw (../../lib/perl);
use Data::Dumper;
use SBEAMS::Connection qw($q $log);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::Tables;
use SBEAMS::Connection::DataTable;
use SBEAMS::Connection::TabMenu;
use SBEAMS::PeptideAtlas;
use SBEAMS::PeptideAtlas::Settings;
use SBEAMS::PeptideAtlas::Tables;
###############################################################################
# Global Variables
###############################################################################
my $sbeams = new SBEAMS::Connection;
$sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR);
my $atlas = new SBEAMS::PeptideAtlas;
$atlas->setSBEAMS($sbeams);
# Read input parameters
my $params = process_params();
my $show_image = 0;
{ # Main
# Authenticate or exit
my $username = $sbeams->Authenticate( allow_anonymous_access => 1) || exit;
$sbeams->setSessionAttribute( key => 'PA_resource', value => 'DIAAtlas' );
my $program_name = ( $params->{mode} =~ /download/ ) ? 'DIA_library_download' : 'DIA_library_subset';
## get current settings
my $project_id = $sbeams->getCurrent_project_id();
my $page = $sbeams->getGifSpacer( 700 ) . "
\n";
# Get the HTML to display the tabs
my $tabMenu = $atlas->getTabMenu(
parameters_ref => $params,
program_name => $program_name,
);
$page .=<<" END";
$tabMenu
END
# Add general section
my $load_script = "set_toggle_box( 'protein_list_table' );sortables_init()";
$params->{mode} ||= 'download_libs';
if ( $params->{mode} eq 'subset_libs' ) {
$page .= get_subset_form();
$load_script = '';
} else {
$page .= get_library_table();
}
$page .= "
";
# Print what we already have, speed up apparent page loading time.
$atlas->display_page_header( onload => $load_script, sortable => 1 );
print $page;
$atlas->display_page_footer();
} # end main
sub get_table_help {
my %args = @_;
my $name = $args{table};
return '' unless $name;
$args{mode} ||= 'section';
my @entries;
my $hidetext;
my $showtext;
my $heading;
my $description;
if ( $name eq 'build' ) {
@entries = ( { key => 'Build Name', value => 'The simple name for this build, usually contains organism, prophet cutoff, and other information. ' },
{ key => 'Build Description', value => 'More detailed information about build. ' },
{ key => 'Reference Database', value => 'Database to which peptides were mapped, generally different than search database. This mapping is done by running BLAST, and allows the peptides to be mapped the the organism\'s genomic sequence. ' },
{ key => 'Build Date', value => 'Date upon which build was finished. ' },
{ key => '# Samples', value => 'The number of individual samples which comprise this build. Each sample contains one or more LCMS/MS runs, and generally corresponds to a single scientific experiment.' } ,
{ key => 'Distinct Peptides', value => 'This shows the number of distinct peptide sequences that were seen in this build. Observations of the peptide in different charge states or with different modifications are coalesced.' } ,
{ key => 'Total Observations ', value => 'The total number of spectra that yeilded identifications above the build threshold. Observations of the same base peptide sequences multiple times or in various charge states/modifications, whould each contribute to the total' }
);
$showtext = 'show row descriptions';
$hidetext = 'hide row descriptions';
$heading = 'Build Overview';
$description= 'These values pertain to the atlas build as a whole';
} elsif ( $name eq 'batch' ) {
@entries = ( { key => 'ID', value => 'Database ID for this sample (search batch) ' },
{ key => 'Sample_Name', value => 'Simple name for this sample/experiment. ' },
{ key => '#_Spectra_Searched', value => 'The total number of spectra searched in the sample. ' },
{ key => "#_Spectra_ID'd", value => 'The number of spectra identifed with a probability greater than the atlas threshold ' },
{ key => '#_Distinct', value => 'The number of distinct peptide sequences, seen more than once (multiobs), in this build that are seen in this sample. ' },
{ key => '#_Unique', value => "The number of distinct, multiobs peptides that are seen only in this sample (unique contribution). This discriminates against smaller samples, and is less useful in atlas' with a large number of samples. " },
{ key => '#_Progressive', value => 'Order-dependent unique multiobs peptides contributed by a given sample. The contribution for each sample is based on the samples that have gone before it, so later samples tend to have a lower progressive contribution. ' },
{ key => '#_Cumulative', value => 'Order-dependent cumulative number of unique multiobs peptides contributed to build by this and previous samples. ' },
{ key => '#_Proteins', value => 'The number of canonical (highly distinguishable, non-redundant) protein sequences identified from the peptides in this sample.' },
{ key => '#_Cum_Prots', value => 'Order-dependent cumulative number of canonical proteins contributed to build by this and previous samples.
Counts non-human contaminants, so final tally may be greater than Canonical Proteins count in Build Overview. ' },
# { key => 'Sens', value => 'The sensitivity of the Peptide Prophet model at a probablility of 0.9, the percent of true positives that would be included at that threshold was used as a cutoff. ' },
{ key => 'FDR_(%)', value => 'The error rate of peptides above the threshold Peptide Prophet model at a probablility of 0.9, the percent of false positives that would be included at the build threshold. ' },
{ key => 'Sample_Date', value => ''},
);
$heading = 'Sample Overview';
$description = 'These values pertain to individual samples within the atlas';
} elsif ( $name eq 'mayu' ) {
@entries = ( { key => 'nr_runs', value => 'Number of MS runs contributing to this build '},
{ key => 'nr_files', value => 'Always 1 '},
{ key => 'mFDR', value => 'Data in current row applies to all data meting this PSM (spectrum) FDR threshold. '},
{ key => 'target_PSM', value => 'Number of non-decoy PSMs at this mFDR (counts peptides mappable to protein reference set only)'},
{ key => 'decoy_PSM', value => 'Number of decoy PSMs at this mFDR '},
{ key => 'FP_PSM', value => 'Number of false positive PSMs predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '},
{ key => 'TP_PSM', value => 'target_PSM - FP_PSM '},
{ key => 'target_pepID', value => 'Number of non-decoy unique peptides at this mFDR (counts peptides mappable to protein reference set only) '},
{ key => 'decoy_pepID', value => 'Number of decoy unique peptides at this mFDR '},
{ key => 'FP_pepID', value => 'Number of false positive unique peptides predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '},
{ key => 'FP_pepID_stdev', value => ' '},
{ key => 'TP_pepID', value => 'target_pepID - FP_pepID '},
{ key => 'pepFDR', value => 'Peptide FDR (unique peptides)'},
{ key => 'target_protID', value => 'Number of non-decoy protein identifications at this mFDR. Applied to the covering set of proteins -- a set that is close to the smallest necessary to explain all the pepIDs. Includes all canonicals and some possibly_distinguished. '},
{ key => 'decoy_protID', value => 'Number of decoy protein identifications at this mFDR. '},
{ key => 'FP_protID', value => 'Number of false postiive protein identifications predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '},
{ key => 'FP_protID_stdev', value => ' '},
{ key => 'TP_protID', value => 'target_protID - FP_protID '},
{ key => 'protFDR', value => 'Protein FDR. The largest value in this column is the protein FDR for the entire build. '},
{ key => 'lFDR1, lFDR5, lFDR10,2 lFDR5', value => 'Local protein FDR, computed over the previous step (i.e. between the previous row in the table and the current row), the previous 5 steps, the previous 10 steps, and the previous 25 steps.
Often there are fewer than 25 rows in the table, in which case column lFDR25 is uninformative. '},
{ key => 'target_protIDs, decoy_protIDs, etc.', value => 'Same as above, except for singleton proteins (those identified by only one PSM) only. '},
{ key => 'target_protIDns, decoy_protIDns, etc.', value => 'Same as above, except for multiply-observed proteins only. '},
);
$heading = 'Mayu';
$description = 'Reiter L, Claassen M, et al., Protein identification false discovery rates for very large proteomics data sets generated by tandem mass spectrometry, Mol Cell Proteomics. 2009 Nov;8(11):2405-17 ';
}
return unless @entries;
return \@entries if $args{mode} eq 'entries_only';
my $help = $atlas->get_table_help_section( name => $name,
description => $description,
heading => $heading,
entries => \@entries,
showtext => $showtext,
hidetext => $hidetext );
return $help;
} # end get_table_help
# General list information
sub get_list_overview {
my $build_id = shift;
# Get a list of accessible project_ids
my @project_ids = $sbeams->getAccessibleProjects();
my $project_ids = join( ",", @project_ids ) || '0';
my $info = $sbeams->selectrow_hashref( <<" BUILD" );
SELECT title, description, n_proteins, original_file, protein_list_id,
contributors, url, image_path, abstract, image_caption
FROM $TBAT_DOMAIN_PROTEIN_LIST
WHERE protein_list_id = $params->{protein_list_id}
AND record_status <> 'D'
AND project_id IN ( $project_ids );
BUILD
my $table = "
\n";
my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'build_overview',
visible => 1,
tooltip => 'Show/Hide Section',
imglink => 1,
sticky => 1 );
$table .= $atlas->encodeSectionHeader(
text => ' Protein List Overview',
span => 4,
link => $link
);
for my $key ( keys( %{$info} ) ) {
$info->{lc($key)} = $info->{$key};
}
my $file_url = "$CGI_BASE_DIR/PeptideAtlas/ManageTable.cgi/$info->{original_file}?TABLE_NAME=AT_Domain_Protein_list;ForceDownload=1;protein_list_id=" . $info->{protein_list_id} . ";GetFile=original_file";
my $file_link = "$info->{original_file}";
my $spc = $sbeams->getGifSpacer(500);
my $tab = $sbeams->getGifSpacer(25);
my $img = '';
my $caption = '';
if ( $info->{image_path} ) {
my @path = split( /\//, $info->{image_path});
my $web_path = "/devDC/sbeams/images/$path[$#path]";
$img = "$tab\n";
$caption = $info->{image_caption} || '';
}
my $abstract = '';
if ( $info->{abstract} ) {
$abstract = $info->{abstract};
$abstract =~ s/\r//;
$abstract =~ s/\n/
/g;
}
$table .= $atlas->encodeSectionItem( key => 'List Name', tr_info => $tr,
value => $info->{title} . $spc, vspan => 3 ) . "\n";
$table .= $atlas->encodeSectionItem( key => 'Contributors', tr_info => $tr,
value => $info->{contributors}, vspan => 3 ) . "\n";
$table .= $atlas->encodeSectionItem( key => 'Description', tr_info => $tr,
value => $info->{description}, vspan => 3 ) . "\n";
$table .= $atlas->encodeSectionItem( key => 'Number of Proteins', tr_info => $tr,
value => $info->{n_proteins}, vspan => 3 ) . "\n";
$table .= $atlas->encodeSectionItem( key => 'Original File', tr_info => $tr,
value => $file_link, vspan => 3 ) . "\n";
$table .= "$img | $caption |
";
$table .= "$abstract |
";
$table .= "
\n";
return $table;
}
# Peptide build stats
sub get_library_table {
my $build_id = shift;
my $table = "\n";
my %libs;
my %org2species = ( mtb => 'Mycobacterium tuberculosis',
human => 'Homo sapiens',
ecoli => 'Eschiera coli',
yeast => 'Saccharomyces cerevisie' );
while ( my $line = ) {
next if $line =~ /^\s*$/;
chomp $line;
my @row = split( /\t/, $line );
$libs{$row[0]} ||= {};
my @names = split( /\./, $row[2] );
my $basename = $names[0];
$basename =~ s/_peakview//;
$libs{$row[0]}->{$basename} ||= {};
$row[1] = "/devDC/sbeams/tmp/download/PeptideAtlas/GetDIALibs/$row[0]/";
if ( $row[2] =~ /sptxt/i ) {
$libs{$row[0]}->{$basename}->{sptxt} = $row[1] . '/' . $row[2];
} elsif ( $row[2] =~ /traml/i ) {
$libs{$row[0]}->{$basename}->{traml} = $row[1] . '/' . $row[2];
} elsif ( $row[2] =~ /peakview/i ) {
$libs{$row[0]}->{$basename}->{peakview} = $row[1] . '/' . $row[2];
}
if ( $row[2] =~ /Orbi/i ) {
$libs{$row[0]}->{$basename}->{instrument} = 'Orbitrap';
} elsif ( $row[2] =~ /QTrap4000/i ) {
$libs{$row[0]}->{$basename}->{instrument} = 'QTrap4000';
} elsif ( $row[2] =~ /QTrap5500/i ) {
$libs{$row[0]}->{$basename}->{instrument} = 'QTrap5500';
} elsif ( $row[2] =~ /TripleTOF/i ) {
$libs{$row[0]}->{$basename}->{instrument} = 'TripleTOF';
} elsif ( $row[2] =~ /QTOF/i ) {
$libs{$row[0]}->{$basename}->{instrument} = 'QTOF';
} else {
$libs{$row[0]}->{$basename}->{instrument} = 'Other';
}
}
my @samples;
for my $org ( sort( keys( %libs ) ) ) {
for my $base ( sort( keys( %{$libs{$org}} ) ) ) {
$log->info( "ORG $org and base $base" );
my $downloads;
my $data;
for my $type ( qw( sptxt peakview traml ) ) {
my $link = ' ' x length( $type );
my $con = ( $data ) ? ', ' : '';
if ( $libs{$org}->{$base}->{$type} ) {
$link = "{$base}->{$type}>$type";
}
$data = $data . $con . $link;
}
my @row = ( '', $org2species{$org}, $base, $libs{$org}->{$base}->{instrument}, $data );
push @samples, \@row;
}
}
my $dag = '†';
my @headings = ( 'spc' => '',
Organism => 'Organism',
Library => 'Library',
Instrument => 'Instrument',
Downloads => 'Downloads'
);
# for my $h ( @headings ) {
# $log->info( $h );
# }
my $headings_ref = $atlas->make_sort_headings( headings => \@headings, default => 'Organism', asc => 1 );
# $table .= $atlas->encodeSectionHeader(
# text => 'List Proteins',
# width => 920
# );
my $table = SBEAMS::Connection::DataTable->new( class => 'scrolltable',
id => 'protein_list_table',
'__use_thead' => 1 );
$table->addRow( $headings_ref );
$table->setRowAttr( ROWS => [1], BGCOLOR => '#0000A0', CLASS => 'sortheader' );
my $rnum = 2;
for my $row ( @samples ) {
$table->addRow( $row );
$table->setRowAttr( ROWS => [$rnum], BGCOLOR => '#EAEAEA' );
$rnum++;
}
# $atlas->encodeSectionTable( rows => [ $headings_ref, @samples ],
# header => 1,
# nowrap => [1..scalar(@headings)],
# table_id => 'protein_list_table',
# class => 'scrolltable',
# align => [ qw(center left right right right right right right right right right center) ],
# bg_color => '#EAEAEA',
# sortable => 1 );
# $table =~ s/(