#!/usr/local/bin/perl
###############################################################################
# $Id: $
#
# SBEAMS is Copyright (C) 2000-2014 Institute for Systems Biology
# This program is governed by the terms of the GNU General Public License (GPL)
# version 2 as published by the Free Software Foundation. It is provided
# WITHOUT ANY WARRANTY. See the full description of GPL terms in the
# LICENSE file distributed with this software.
###############################################################################
###############################################################################
# Get the script set up with everything it will need
###############################################################################
use strict;
use lib qw (../../lib/perl);
use Data::Dumper;
use SBEAMS::Connection qw($q $log);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::Tables;
use SBEAMS::Connection::DataTable;
use SBEAMS::Connection::TabMenu;
use SBEAMS::PeptideAtlas;
use SBEAMS::PeptideAtlas::Settings;
use SBEAMS::PeptideAtlas::Tables;
###############################################################################
# Global Variables
###############################################################################
my $sbeams = new SBEAMS::Connection;
$sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR);
my $atlas = new SBEAMS::PeptideAtlas;
$atlas->setSBEAMS($sbeams);
# Read input parameters
my $params = process_params();
my $show_image = 0;
{ # Main
# Authenticate or exit
my $username = $sbeams->Authenticate( allow_anonymous_access => 1) || exit;
$sbeams->setSessionAttribute( key => 'PA_resource', value => 'DIAAtlas' );
## get current settings
my $project_id = $sbeams->getCurrent_project_id();
my $page = $sbeams->getGifSpacer( 700 ) . " \n";
# Get the HTML to display the tabs
my $tabMenu = $atlas->getTabMenu(
parameters_ref => $params,
program_name => 'DIA_processing',
);
$page .=<<" END";
$tabMenu
END
# Add general section
$page .= " ";
$page .= get_form();
# Print what we already have, speed up apparent page loading time.
$atlas->display_page_header( onload => "set_toggle_box( 'protein_list_table' );sortables_init()", sortable => 1 );
print $page;
$atlas->display_page_footer();
$sbeams->setSessionAttribute( key => 'PA_resource', value => '' );
} # end main
sub get_form {
my $help = qq~
Custom Library Download
Currently under construction, this form will allow you to run an online analysis of set of SWATH runs with your own custom SWATH library in peakview or TraML format.
~;
return $help;
my %org2species = ( mtb => 'Mycobacterium tuberculosis',
human => 'Homo sapiens',
ecoli => 'Eschiera coli',
yeast => 'Saccharomyces cerevisie' );
my %orgs;
my %inst;
while ( my $line = ) {
next if $line =~ /^\s*$/;
chomp $line;
my @row = split( /\t/, $line );
$orgs{$row[0]} ||= [];
push ( @{$orgs{$row[0]}}, $row[2] );
my @name = split( /_/, $row[2] );
my $inst = $name[1];
if ( $name[1] =~ /Dirty/ ) {
$inst = $name[2];
}
$inst =~ s/\.\w+//;
$inst{$inst}++;
}
my $org_select = "";
my $inst_select = "";
for my $inst ( sort( keys( %inst ) ) ) {
$inst_select .= "\n";
}
$inst_select .= "";
my $sql =<<" END";
SELECT title, first_name, last_name, protein_list_id
FROM $TBAT_DOMAIN_PROTEIN_LIST DPL
JOIN $TB_CONTACT C ON DPL.owner_contact_id = C.contact_id
ORDER BY title
END
my $list_select = "\n";
my $sth = $sbeams->get_statement_handle( $sql );
while( my @row = $sth->fetchrow_array() ) {
$list_select .= "\n";
}
$list_select .= "\n";
$log->info( $list_select );
my $radio = qq~
Upload protein list
HPP B/D list
~;
my $upload = "";
#
HPP B/D list:
$list_select
#
$radio
my $form = qq~
$help
~;
return $form;
}
sub get_table_help {
my %args = @_;
my $name = $args{table};
return '' unless $name;
$args{mode} ||= 'section';
my @entries;
my $hidetext;
my $showtext;
my $heading;
my $description;
if ( $name eq 'build' ) {
@entries = ( { key => 'Build Name', value => 'The simple name for this build, usually contains organism, prophet cutoff, and other information. ' },
{ key => 'Build Description', value => 'More detailed information about build. ' },
{ key => 'Reference Database', value => 'Database to which peptides were mapped, generally different than search database. This mapping is done by running BLAST, and allows the peptides to be mapped the the organism\'s genomic sequence. ' },
{ key => 'Build Date', value => 'Date upon which build was finished. ' },
{ key => '# Samples', value => 'The number of individual samples which comprise this build. Each sample contains one or more LCMS/MS runs, and generally corresponds to a single scientific experiment.' } ,
{ key => 'Distinct Peptides', value => 'This shows the number of distinct peptide sequences that were seen in this build. Observations of the peptide in different charge states or with different modifications are coalesced.' } ,
{ key => 'Total Observations ', value => 'The total number of spectra that yeilded identifications above the build threshold. Observations of the same base peptide sequences multiple times or in various charge states/modifications, whould each contribute to the total' }
);
$showtext = 'show row descriptions';
$hidetext = 'hide row descriptions';
$heading = 'Build Overview';
$description= 'These values pertain to the atlas build as a whole';
} elsif ( $name eq 'batch' ) {
@entries = ( { key => 'ID', value => 'Database ID for this sample (search batch) ' },
{ key => 'Sample_Name', value => 'Simple name for this sample/experiment. ' },
{ key => '#_Spectra_Searched', value => 'The total number of spectra searched in the sample. ' },
{ key => "#_Spectra_ID'd", value => 'The number of spectra identifed with a probability greater than the atlas threshold ' },
{ key => '#_Distinct', value => 'The number of distinct peptide sequences, seen more than once (multiobs), in this build that are seen in this sample. ' },
{ key => '#_Unique', value => "The number of distinct, multiobs peptides that are seen only in this sample (unique contribution). This discriminates against smaller samples, and is less useful in atlas' with a large number of samples. " },
{ key => '#_Progressive', value => 'Order-dependent unique multiobs peptides contributed by a given sample. The contribution for each sample is based on the samples that have gone before it, so later samples tend to have a lower progressive contribution. ' },
{ key => '#_Cumulative', value => 'Order-dependent cumulative number of unique multiobs peptides contributed to build by this and previous samples. ' },
{ key => '#_Proteins', value => 'The number of canonical (highly distinguishable, non-redundant) protein sequences identified from the peptides in this sample.' },
{ key => '#_Cum_Prots', value => 'Order-dependent cumulative number of canonical proteins contributed to build by this and previous samples. Counts non-human contaminants, so final tally may be greater than Canonical Proteins count in Build Overview. ' },
# { key => 'Sens', value => 'The sensitivity of the Peptide Prophet model at a probablility of 0.9, the percent of true positives that would be included at that threshold was used as a cutoff. ' },
{ key => 'FDR_(%)', value => 'The error rate of peptides above the threshold Peptide Prophet model at a probablility of 0.9, the percent of false positives that would be included at the build threshold. ' },
{ key => 'Sample_Date', value => ''},
);
$heading = 'Sample Overview';
$description = 'These values pertain to individual samples within the atlas';
} elsif ( $name eq 'mayu' ) {
@entries = ( { key => 'nr_runs', value => 'Number of MS runs contributing to this build '},
{ key => 'nr_files', value => 'Always 1 '},
{ key => 'mFDR', value => 'Data in current row applies to all data meting this PSM (spectrum) FDR threshold. '},
{ key => 'target_PSM', value => 'Number of non-decoy PSMs at this mFDR (counts peptides mappable to protein reference set only)'},
{ key => 'decoy_PSM', value => 'Number of decoy PSMs at this mFDR '},
{ key => 'FP_PSM', value => 'Number of false positive PSMs predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '},
{ key => 'TP_PSM', value => 'target_PSM - FP_PSM '},
{ key => 'target_pepID', value => 'Number of non-decoy unique peptides at this mFDR (counts peptides mappable to protein reference set only) '},
{ key => 'decoy_pepID', value => 'Number of decoy unique peptides at this mFDR '},
{ key => 'FP_pepID', value => 'Number of false positive unique peptides predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '},
{ key => 'FP_pepID_stdev', value => ' '},
{ key => 'TP_pepID', value => 'target_pepID - FP_pepID '},
{ key => 'pepFDR', value => 'Peptide FDR (unique peptides)'},
{ key => 'target_protID', value => 'Number of non-decoy protein identifications at this mFDR. Applied to the covering set of proteins -- a set that is close to the smallest necessary to explain all the pepIDs. Includes all canonicals and some possibly_distinguished. '},
{ key => 'decoy_protID', value => 'Number of decoy protein identifications at this mFDR. '},
{ key => 'FP_protID', value => 'Number of false postiive protein identifications predicted by Mayu for this mFDR. Usually near, but not exactly the same as, the number of decoys. '},
{ key => 'FP_protID_stdev', value => ' '},
{ key => 'TP_protID', value => 'target_protID - FP_protID '},
{ key => 'protFDR', value => 'Protein FDR. The largest value in this column is the protein FDR for the entire build. '},
{ key => 'lFDR1, lFDR5, lFDR10,2 lFDR5', value => 'Local protein FDR, computed over the previous step (i.e. between the previous row in the table and the current row), the previous 5 steps, the previous 10 steps, and the previous 25 steps. Often there are fewer than 25 rows in the table, in which case column lFDR25 is uninformative. '},
{ key => 'target_protIDs, decoy_protIDs, etc.', value => 'Same as above, except for singleton proteins (those identified by only one PSM) only. '},
{ key => 'target_protIDns, decoy_protIDns, etc.', value => 'Same as above, except for multiply-observed proteins only. '},
);
$heading = 'Mayu';
$description = 'Reiter L, Claassen M, et al., Protein identification false discovery rates for very large proteomics data sets generated by tandem mass spectrometry, Mol Cell Proteomics. 2009 Nov;8(11):2405-17 ';
}
return unless @entries;
return \@entries if $args{mode} eq 'entries_only';
my $help = $atlas->get_table_help_section( name => $name,
description => $description,
heading => $heading,
entries => \@entries,
showtext => $showtext,
hidetext => $hidetext );
return $help;
} # end get_table_help
# General list information
sub get_list_overview {
my $build_id = shift;
# Get a list of accessible project_ids
my @project_ids = $sbeams->getAccessibleProjects();
my $project_ids = join( ",", @project_ids ) || '0';
my $info = $sbeams->selectrow_hashref( <<" BUILD" );
SELECT title, description, n_proteins, original_file, protein_list_id,
contributors, url, image_path, abstract, image_caption
FROM $TBAT_DOMAIN_PROTEIN_LIST
WHERE protein_list_id = $params->{protein_list_id}
AND record_status <> 'D'
AND project_id IN ( $project_ids );
BUILD
my $table = "
\n";
my ( $tr, $link ) = $sbeams->make_table_toggle( name => 'build_overview',
visible => 1,
tooltip => 'Show/Hide Section',
imglink => 1,
sticky => 1 );
$table .= $atlas->encodeSectionHeader(
text => ' Protein List Overview',
span => 4,
link => $link
);
for my $key ( keys( %{$info} ) ) {
$info->{lc($key)} = $info->{$key};
}
my $file_url = "$CGI_BASE_DIR/PeptideAtlas/ManageTable.cgi/$info->{original_file}?TABLE_NAME=AT_Domain_Protein_list;ForceDownload=1;protein_list_id=" . $info->{protein_list_id} . ";GetFile=original_file";
my $file_link = "$info->{original_file}";
my $spc = $sbeams->getGifSpacer(500);
my $tab = $sbeams->getGifSpacer(25);
my $img = '';
my $caption = '';
if ( $info->{image_path} ) {
my @path = split( /\//, $info->{image_path});
my $web_path = "/devDC/sbeams/images/$path[$#path]";
$img = "$tab\n";
$caption = $info->{image_caption} || '';
}
my $abstract = '';
if ( $info->{abstract} ) {
$abstract = $info->{abstract};
$abstract =~ s/\r//;
$abstract =~ s/\n/ /g;
}
$table .= $atlas->encodeSectionItem( key => 'List Name', tr_info => $tr,
value => $info->{title} . $spc, vspan => 3 ) . "\n";
$table .= $atlas->encodeSectionItem( key => 'Contributors', tr_info => $tr,
value => $info->{contributors}, vspan => 3 ) . "\n";
$table .= $atlas->encodeSectionItem( key => 'Description', tr_info => $tr,
value => $info->{description}, vspan => 3 ) . "\n";
$table .= $atlas->encodeSectionItem( key => 'Number of Proteins', tr_info => $tr,
value => $info->{n_proteins}, vspan => 3 ) . "\n";
$table .= $atlas->encodeSectionItem( key => 'Original File', tr_info => $tr,
value => $file_link, vspan => 3 ) . "\n";
$table .= "
$img
$caption
";
$table .= "
$abstract
";
$table .= "
\n";
return $table;
}
# Peptide build stats
sub get_list_table {
my $build_id = shift;
# Get a list of accessible project_ids
my @project_ids = $sbeams->getAccessibleProjects();
my $project_ids = join( ",", @project_ids ) || '0';
my $table = "
\n";
my $sql =<<" END";
SELECT list_protein_id, uniprot_accession, protein_symbol, original_name, protein_full_name, gene_symbol, comment, priority
FROM $TBAT_DOMAIN_LIST_PROTEIN
WHERE protein_list_id = $params->{protein_list_id}
ORDER BY uniprot_accession ASC
END
my @samples;
my $sth = $sbeams->get_statement_handle( $sql );
while( my @row = $sth->fetchrow_array() ) {
$row[0] = "";
$row[3] ||= $row[1];
$row[7] = $sbeams->makeInfoText( "n/a" );
$row[4] = $sbeams->truncateStringWithMouseover( string => $row[4], len => 60, nowrap => 1 );
$row[6] = $sbeams->truncateStringWithMouseover( string => $row[6], len => 60 );
push @samples, \@row;
}
my $dag = '†';
my @headings = ( "" => 'list_protein_id',
UniProt => 'uniprot_accession',
ProteinSymbol => 'protein_symbol',
OriginalName => 'original_name',
ProteinFullName => 'protein_full_name',
GeneSymbol => 'gene_symbol',
Comment => 'comment',
Priority => 'priority'
);
# for my $h ( @headings ) {
# $log->info( $h );
# }
my $headings_ref = $atlas->make_sort_headings( headings => \@headings, default => 'UniProt', asc => 1 );
# $table .= $atlas->encodeSectionHeader(
# text => 'List Proteins',
# width => 920
# );
my $table = SBEAMS::Connection::DataTable->new( class => 'scrolltable',
id => 'protein_list_table',
'__use_thead' => 1 );
$table->addRow( $headings_ref );
$table->setRowAttr( ROWS => [1], BGCOLOR => '#0000A0', CLASS => 'sortheader' );
my $rnum = 2;
for my $row ( @samples ) {
$table->addRow( $row );
$table->setRowAttr( ROWS => [$rnum], BGCOLOR => '#EAEAEA' );
$rnum++;
}
# $atlas->encodeSectionTable( rows => [ $headings_ref, @samples ],
# header => 1,
# nowrap => [1..scalar(@headings)],
# table_id => 'protein_list_table',
# class => 'scrolltable',
# align => [ qw(center left right right right right right right right right right center) ],
# bg_color => '#EAEAEA',
# sortable => 1 );
# $table =~ s/(
)/$1<\/THEAD>/m;
# die Dumper( $table );
my $btxt = get_btxt( qw( Selected All Uncheck Check Search ) );
# die Dumper( $btxt );
my $fbox_help = '
' . $sbeams->makeInfoText( "Text entered into the box will filter the protein list (all fields)" ) . '
';
my $cbox_help = '
' . $sbeams->makeInfoText( "Use these buttons to check or uncheck the visible proteins" ) . '
';
my $show_help = '
' . $sbeams->makeInfoText( "Use these buttons to show all your selected proteins or the entire list (resets filter box)" ) . '
';
my $submit_help = '
' . $sbeams->makeInfoText( "This will submit your list of selected proteins to the SRM Atlas for a transitions query" ) . '
';
my $spc = ' ' x 3;
my $fbox = "Filter List: $spc";
my $atlas_select = "";
my $submit = "$atlas_select $spc";
my $show = "Show: $spc";
my $build_id = ( $sbeams->isGuestUser() ) ? 146 : 146;
my $form = qq~