#!/usr/local/bin/perl -w
###############################################################################
# Program showPathways
# $Id: $
#
# Description : Show protein information overlayed on KEGG pathway maps.
#
# SBEAMS is Copyright (C) 2000-2021 Institute for Systems Biology
# This program is governed by the terms of the GNU General Public License (GPL)
# version 2 as published by the Free Software Foundation. It is provided
# WITHOUT ANY WARRANTY. See the full description of GPL terms in the
# LICENSE file distributed with this software.
#
###############################################################################
use strict;
use lib qw (../../lib/perl);
use File::Basename;
use Benchmark;
use SBEAMS::Connection qw($q $log);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::TabMenu;
use SBEAMS::Connection::DataTable;
use SBEAMS::BioLink::KeggMaps;
use SBEAMS::PeptideAtlas;
use SBEAMS::PeptideAtlas::Settings;
use SBEAMS::PeptideAtlas::Tables;
use SBEAMS::PeptideAtlas::BestPeptideSelector;
use Data::Dumper;
## Globals ##
my $sbeams = new SBEAMS::Connection;
$sbeams->setSBEAMS_SUBDIR( 'PeptideAtlas' );
my $atlas = new SBEAMS::PeptideAtlas;
$atlas->setSBEAMS($sbeams);
my $best_peptide = new SBEAMS::PeptideAtlas::BestPeptideSelector;
$best_peptide->setSBEAMS($sbeams);
my $program = basename( $0 );
my $keggmap = SBEAMS::BioLink::KeggMaps->new();
my $atlas_build_id;
my $kegg_organism;
my $verbose = 1;
my $script_name = basename( $0 );
# Don't buffer output
$|++;
{ # Main
# Authenticate user.
my $current_username = $sbeams->Authenticate( allow_anonymous_access => 1 ) || die "Authentication failed";
# Process cgi parameters
my $params = process_params();
# Print cgi headers. The hideTimer function won't be in all pages,
# but this failure is not apparent to user.
$atlas->printPageHeader( onload => 'hideTimerInfo()' );
#### Get the current atlas_build_id based on parameters or session
$atlas_build_id = $atlas->getCurrentAtlasBuildID( parameters_ref => $params );
my $paramstr = '';
my $organism = $atlas->getCurrentAtlasOrganism( atlas_build_id => $atlas_build_id,
parameters_ref => $params,
type => 'organism_id',
);
$kegg_organism = $keggmap->getOrganismCode( organism_id => $organism );
$params->{apply_action} ||= 'list_pathways';
my $content = '';
#### Get the HTML to display the tabs
my $tabMenu = $atlas->getTabMenu(
parameters_ref => $params,
program_name => $script_name,
);
if ( $sbeams->output_mode() eq 'html' ) {
$content .= "\n";
$content .= $tabMenu->asHTML();
}
# Decision block, what type of page are we going to display?
if ( $params->{apply_action} eq 'list_pathways' ) {
if ( $kegg_organism ) {
$content .= ' ' . add_build_selector( $atlas_build_id );
$content .= list_pathways( $params );
} else {
my $txt = $sbeams->makeInfoText("No KEGG map information available for the current atlas build, please select another to continue.");
$content .= " $txt
\n";
$content .= add_build_selector( $atlas_build_id );
}
} elsif ( $params->{apply_action} eq 'pathway_details' ) {
# Until caching is available, will print out as we go.
print $content;
$content = pathway_details( $params );
$atlas->printPageFooter( close_tables=>'NO');
exit;
} else {
$content .= list_pathways( $params );
}
# Don't think I really need this, but...
# $sbeams->printUserContext();
print $content;
$atlas->printPageFooter( close_tables=>'NO');
} # end Main
sub add_build_selector {
my $build_id = shift;
my $selector = ' Select Atlas Build:';
$selector .= $atlas->getBuildSelector( atlas_build_id => $atlas_build_id, inline => 1 );
$selector .= '';
return $selector;
}
#+
# Routine to list all available pathways for current organism
#-
sub list_pathways {
my $params = shift;
my $url = $q->url( -full=>1 );
my $page = ''; #$sbeams->getGifSpacer(900);
if ( !$atlas->has_search_key_data( parameters_ref => $params ) ) {
$sbeams->set_page_message( type => 'Error', msg => <<" END" );
Current build does not have search index built, KEGG map will show no hits.
Follow PeptideAtlas Home link and choose a default atlas to make this work.
END
}
my $t0 = new Benchmark;
my $pathways = $keggmap->getKeggPathways( organism => $kegg_organism );
my $table = SBEAMS::Connection::DataTable->new();
$table->addRow( ['Pathway ID', 'View Data','Pathway Definition' ] );
$table->setRowAttr( ROWS => [1], BGCOLOR => '#002664', ALIGN=>'CENTER', NOWRAP => 0, HEIGHT => 35 );
$table->setHeaderAttr(WHITE_TEXT=>1,BOLD=>1); # this actually has effect via sub formatHeader
$page .= "
";
my $pa_img = "";
my $srm_img = "";
for my $path ( @{$pathways} ) {
my $desc = $q->escape( $path->{definition} );
my $links = "{entry_id};path_def=$desc;searchdb=pa TARGET=_pa_pathdetails TITLE='View data from pathway proteins in PeptideAtlas'>$pa_img ";
$links .= " {entry_id};path_def=$desc;searchdb=srm TARGET=_srm_pathdetails TITLE='View data from pathway proteins in SRMAtlas'>$srm_img ";
$table->addRow( [$path->{entry_id},$links,$path->{definition} ] );
}
$table->alternateColors(PERIOD => 1, FIRSTROW=> 2, DEF_BGCOLOR => '#f3f1e4', BGCOLOR => '#d3d1c4');
my $nrows = $table->getRowNum();
$table->setColAttr( COLS => [2], ROWS => [2..$nrows], ALIGN => 'CENTER' );
$page .= "$table \n";
my $t1 = new Benchmark;
$log->debug( parseTime( $t1, $t0, "seconds to fetch pathways for $kegg_organism" ) );
return $page;
}
#+
# Routine to list all available pathways for current organism
#-
sub pathway_details {
my $params = shift;
my $url = $q->self_url();
my @url = split( /\?/, $url );
$url = $url[0];
# Due to performance issues, we will print this out as we go along until
# feature is better implemented.
print $sbeams->getGifSpacer(1200) . " \n";
# Turned off
# my $addpath = ( $params->{path_def} =~ /pathway/i ) ? '' : 'Pathway ';
# print "
The image below shows the pathway for $params->{path_def} from KEGG, annotated with information from the
$src_atlas. As shown in the legend, proteins for which there are peptides in the $src_atlas are colored green, while proteins for
which there is no information are colored yellow. Some nodes in the network may have no color, this is because there is no information
on them from KEGG. Each colored node is clickable, linking to a page showing more detailed information. The link below will load a
query with all known proteins in the pathway, you can modify options as desired and submit.
END
print "Looking up genes in $params->{path_def} ($params->{path_id}) at KEGG - ";
my $t2 = new Benchmark;
$keggmap->setPathway( pathway => $params->{path_id} );
# my $relationships = $keggmap->getPathwayRelationships();
$params->{searchdb} ||= 'pa';
my $pabst_build_id = $best_peptide->get_pabst_build( $params );
$params->{pabst_build_id} = $pabst_build_id;
# Get info from pathway XML
my $processed = $keggmap->parsePathwayXML();
my $organism = keggOrg2Std( $kegg_organism );
my $protstr = '';
if ( $kegg_organism =~ /mmu|hsa/ ) {
$protstr = join "%3B", @{$processed->{uniprot}};
} else {
$protstr = join "%3B", @{$processed->{allgenes}};
}
my $path_link = '';
if ( $params->{searchdb} eq 'srm' ) {
$path_link = "Search for all pathway proteins";
} else {
$path_link = "Search for all pathway proteins";
}
# Some page-specific javascript/css. Allows loading info to get hidden,
# draws box around legend
print <<" END";
END
my %proteins;
my @links;
my @text;
if ( $params->{searchdb} eq 'srm' ) {
my %entry2prots = ( $kegg_organism =~ /mmu|hsa/ ) ? %{$processed->{entry2uniprot}} : %{$processed->{entry2genes}};
for my $en ( @{$processed->{entries}} ) {
if ( $entry2prots{$en} ) {
my @egenes = @{$entry2prots{$en}};
my $protstr = join( '%3B', @egenes );
push @links, "GetTransitions?pabst_build_id=$pabst_build_id;organism_name=$organism;protein_name_constraint=$protstr;action=QUERY;C160=on;y_ions=on;b_ions=on;SwissProt=checked;speclinks=on";
push @text, "Gene ID(s) " . join( ", ", @egenes );
for my $prot ( @egenes ) {
$proteins{$prot}++;
}
} else {
log->warn( "No entry for $en" );
}
}
} else {
my %entry2genes = %{$processed->{entry2genes}};
for my $en ( @{$processed->{entries}} ) {
if ( $entry2genes{$en} ) {
my @genes;
if ($kegg_organism !~ /bbu/){
@genes = map( /$kegg_organism:(.*)/, @{$entry2genes{$en}} );
}else{
@genes = @{$entry2genes{$en}};
}
my $gene = join( '%3B', @genes ); # @{$entry2genes{$en}} );
push @links, "Search?build_type_name=$organism;search_key=$gene;action=GO";
push @text, "Gene ID(s) " . join( ", ", @genes );
for my $prot ( @genes ) {
$proteins{$prot}++;
}
} else {
log->warn( "No entry for $en" );
}
}
}
my $t3 = new Benchmark;
$log->debug( parseTime( $t3, $t2, "seconds to get KGML" ) );
# my $gene_list = $keggmap->getPathwayGenes( source => 'xml' );
my $gene_list = [ keys( %proteins ) ];
my $tot = scalar( @{$gene_list} );
my $t4 = new Benchmark;
$log->debug( parseTime( $t4, $t3, "seconds to fetch genes for $params->{path_def}" ) );
print parseTime( $t4, $t3, '' ) . " seconds \n";
print "Found $tot genes in pathway, looking up data in Atlas - ";
# Fetches expression info from db, returns arrays of bg/fg colors
my ( $seen, $atlas_hits, $cnt ) = getExpressionValues( gene_list => $gene_list, %{$params} );
my $t5 = new Benchmark;
$log->debug( parseTime( $t5, $t4, "seconds to get Atlas data for $params->{path_def}" ) );
print parseTime( $t5, $t4, '' ) . " seconds \n";
print "Found Atlas data for $cnt of $tot ";
my $table = SBEAMS::Connection::DataTable->new( BORDER => 0 );
$table->addRow( [ ' ', 'Protein', 'Peptides Observed' ] );
$table->setHeaderAttr( BOLD => 1 );
my $acc_string ='';
my $sep = '';
for my $gene ( @{$gene_list} ) {
my $cnt = 0;
if ( $atlas_hits->{$gene} ) {
$cnt += $atlas_hits->{$gene};
}
my $chk = ( $cnt ) ? 'checked' : '';
my $chk_box = "";
$table->addRow( [ $chk_box, $gene, $cnt ] );
$acc_string .= $sep . $gene;
$sep ||= ',';
}
$table->alternateColors( FIRSTROW => 2, PERIOD => 1, BGCOLOR => '#DDDDDD', DEF_BGCOLOR => '#FFFFFF' );
$table->setColAttr( ALIGN => 'RIGHT', COLS => [3], ROWS => [2..$table->getRowNum()] );
if ($kegg_organism =~ /bbu/){
my @genes = map( /$kegg_organism:(.*)/, @{$gene_list} );
$gene_list = \@genes;
}
my $organism = $atlas->getCurrentAtlasOrganism( parameters_ref => {} );
my $gaggle_info = $sbeams->getGaggleMicroformat(
name => "Proteins in $params->{path_def}",
organism => $organism,
data => $gene_list,
object => 'namelist',
type => 'direct'
);
my $t6 = new Benchmark;
print parseTime( $t6, $t5, '' ) . " seconds \n";
print "Getting colored map from KEGG - ";
# Send gene/color info to kegg for a map
my $build_id = ( $params->{searchdb} eq 'srm' ) ? $atlas_build_id : $pabst_build_id;
my $url = $keggmap->getColoredPathway( seen => $seen,
build_id => $build_id,
searchdb => $params->{searchdb},
genes => $gene_list,
);
my $t7 = new Benchmark;
$log->debug( parseTime( $t7, $t6, "seconds to get color pathway" ) );
print parseTime( $t7, $t6, '' ) . " seconds \n";
print <<" END";
$gaggle_info
$table
END
print "\n
\n";
# Get hashref of pathway info (mostly for image URL);
# Get image map for links to peptide atlas
my $image_map = $keggmap->get_image_map( coords => $processed->{coords},
name => 'kegg_map',
links => \@links,
colors => $seen,
searchdb => $params->{search_db},
text => \@text,
img_src => $url );
my $legend = getExpressionLegend();
print "$legend \n";
# Put pathway link here...
print " $image_map \n";
my @coordinates = @{$processed->{coords}};
my $t8 = new Benchmark;
$log->debug( parseTime( $t5, $t4, "seconds to get finish" ) );
# return $page;
return '';
}
sub getExpressionValues {
my %args = @_;
return '' unless $args{gene_list} && scalar( @{$args{gene_list}} );
my $gene_string = "'" . join( "','", @{$args{gene_list}} ) . "'";
# list of hits, either from Peptide or SRM Atlas
my %atlas_hits;
if ( $args{searchdb} eq 'srm' ) {
my $sql = qq~
SELECT DISTINCT biosequence_name, COUNT(*)
FROM $TBAT_PABST_PEPTIDE PP
JOIN $TBAT_PABST_PEPTIDE_MAPPING PM
ON PM.pabst_peptide_id = PP.pabst_peptide_id
JOIN $TBAT_BIOSEQUENCE BS
ON BS.biosequence_id = PM.biosequence_id
WHERE pabst_build_id = $args{pabst_build_id}
AND biosequence_name IN ( $gene_string )
GROUP BY biosequence_name
~;
my $sth = $sbeams->get_statement_handle( $sql );
while ( my @row = $sth->fetchrow_array() ) {
$atlas_hits{$row[0]} = $row[1];
}
} else { # Fetch from PeptideAtlas, original
my $search_type = ( $kegg_organism =~ /hsa|mmu|ssc|bta|rno|ecb|ptr/ ) ? 'Entrez GeneID' : 'ORF NAME';
$search_type = 'KEGG' if ( $kegg_organism =~ /bbu/);
my $sk_sql = qq~
SELECT DISTINCT resource_name, search_key_name
FROM $TBAT_SEARCH_KEY_ENTITY
WHERE search_key_name IN ( $gene_string )
AND search_key_type = '$search_type'
~;
my $sth = $sbeams->get_statement_handle( $sk_sql );
my %kegg2other;
my %other2kegg;
while ( my @row = $sth->fetchrow_array() ) {
$kegg2other{$row[1]} ||= {};
$kegg2other{$row[1]}->{$row[0]}++;
$other2kegg{$row[0]} ||= {};
$other2kegg{$row[0]}->{$row[1]}++;
}
my $protstr = "'" . join( "','", keys( %other2kegg ) ) . "'\n";
my $sql =<<" END";
SELECT DISTINCT biosequence_name, SUM(PI.n_observations), SUM(n_protein_mappings)
FROM $TBAT_PEPTIDE_INSTANCE PI
LEFT JOIN $TBAT_PEPTIDE_MAPPING PM
ON ( PI.peptide_instance_id = PM.peptide_instance_id )
LEFT JOIN $TBAT_BIOSEQUENCE BS
ON ( PM.matched_biosequence_id = BS.biosequence_id )
LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS
ON ( BS.biosequence_set_id = BSS.biosequence_set_id )
WHERE biosequence_name IN ( $protstr )
AND atlas_build_id = $atlas_build_id
AND biosequence_name NOT LIKE 'ENS%'
GROUP BY biosequence_name
ORDER BY biosequence_name
END
$sth = $sbeams->get_statement_handle( $sql );
if ( $gene_string ) {
while ( my @row = $sth->fetchrow_array() ) {
for my $prot ( keys( %{$other2kegg{$row[0]}} ) ) {
$atlas_hits{$prot} += $row[1];
}
}
} else {
}
}
# define colors
my $seen = 'green';
my @bg;
my @fg;
my $gene_cnt = 0;
my @seen;
my $see;
for my $gene ( @{$args{gene_list}} ) {
# Has quotes if it is a string
$gene =~ s/\'//g;
# Assume it's neutral
my $color = '#E0FFFF';
if ( $atlas_hits{$gene} ) {
$gene_cnt++;
$color = $seen;
push @seen, 1;
$see .= "Saw $gene\n";
} else {
push @seen, 0;
$see .= "No saw $gene\n";
}
push @bg, $color;
push @fg, 'black';
}
return ( \@seen, \%atlas_hits, $gene_cnt );
}
sub getExpressionLegend {
my $cell = $sbeams->getGifSpacer(20); # . " \n";
my $table =<<" END";
Peptides Observed
$cell
No Peptides observed
$cell
N/A
$cell
END
return $table;
#
#
No peptides observed
#
$cell
#
}
#+
# Read/process CGI parameters
#-
sub process_params {
my $params = { null => 'filler' };
# Standard SBEAMS processing
$sbeams->parse_input_parameters( parameters_ref => $params, q => $q );
#for ( keys( %$params ) ){ print "$_ = $params->{$_} " }
# Process "state" parameters
$sbeams->processStandardParameters( parameters_ref => $params );
return $params;
}
#+
# Extract wallclock seconds from time diff, append message, and return
#-
sub parseTime {
my @args = @_;
my $time = timestr(timediff( $args[0], $args[1] ));
$time =~ /(\d+) wallclock.*/;
return "$1 $args[2]";
}
#+
# Translate kegg organism to Standard Atlas
#-
sub keggOrg2Std {
my $korg = shift;
return '' unless $korg;
my %k2s = ( hsa => 'Human',
dme => 'Drosophila',
sce => 'Yeast',
bbu => 'BBurgdorferi_B31',
mmu => 'Mouse',
ssc => 'Pig',
eco => 'E Coli',
rno => 'Rat',
bta => 'Cow',
ecb => 'Horse',
ptr => 'Chimpanzee',
hal => 'Halobacterium',
mtu => 'Mybacterium tuberculosis'
);
return $k2s{$korg} || '';
}
__DATA__
sub error_redirect {
my $msg = shift || '';
my $type = shift || 'Error';
$sbeams->set_page_message( msg => $msg, type => $type );
print $q->redirct( "treatmentList.cgi" );
exit;
}