#!/usr/local/bin/perl ############################################################################### # Program : GetPTPPeptides # Author : Zhi Sun # $Id: GetPTPPeptides 6576 2011-09-21 17:07:59Z zsun $ # # Description : This program that allows users to # get PTP peptides from the PeptideAtlas based on various criteria. # # SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::TabMenu; use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; $sbeams = new SBEAMS::Connection; $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); $sbeamsMOD = new SBEAMS::PeptideAtlas; $sbeamsMOD->setSBEAMS($sbeams); use constant MIN_PEP_LEN => 6; use constant MAX_PEP_LEN => 50; ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( #permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin'], # connect_read_only=>1, allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; $parameters{uploaded_file_not_saved} = 1; my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); #### Decide what action to take based on information so far if ($parameters{action} eq "???") { # Some action } else { $sbeamsMOD->display_page_header( use_tabbed_panes=> '1', ); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer( use_tabbed_panes=> '1', ); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Show current user context information print "
\n" if ($sbeams->output_mode() eq 'html'); #$sbeams->printUserContext(); #### Get the HTML to display the tabs my $tabMenu = $sbeamsMOD->getTabMenu( parameters_ref => \%parameters, program_name => $PROG_NAME, ); print $tabMenu->asHTML() if ($sbeams->output_mode() eq 'html'); #my @accessibleBuilds = $sbeamsMOD->getAccessibleBuilds(); ### Define some generic variables my ($i,$element,$key,$value,$line,$result,$sql); #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $CATEGORY="Get PTP Peptides"; $TABLE_NAME="AT_GetPTPPeptides" unless ($TABLE_NAME); ($PROGRAM_FILE_NAME) = $sbeamsMOD->returnTableInfo($TABLE_NAME,"PROGRAM_FILE_NAME"); my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; #### Get the columns and input types for this table/query my @columns = $sbeamsMOD->returnTableInfo($TABLE_NAME,"ordered_columns"); my %input_types = $sbeamsMOD->returnTableInfo($TABLE_NAME,"input_types"); #### Read the input parameters for each column my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters, columns_ref=>\@columns,input_types_ref=>\%input_types); #$sbeams->printDebuggingInfo($q); #### Set some reasonable defaults if no parameters supplied unless ($n_params_found) { $parameters{input_form_format} = "minimum_detail"; } if( not defined $parameters{organism_id}){ $parameters{organism_id} = 2; } #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams(q=>$q); if ($apply_action eq "VIEWRESULTSET" || $apply_action eq "VIEWPLOT") { $sbeams->readResultSet( resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters ); $n_params_found = 99; } #### Display the user-interaction input form $sbeams->display_input_form( TABLE_NAME=>$TABLE_NAME, CATEGORY=>$CATEGORY, apply_action=>$apply_action, PROGRAM_FILE_NAME=>$PROG_NAME, parameters_ref=>\%parameters, input_types_ref=>\%input_types, mask_user_context=> '1', use_tabbed_panes=> '1', ); #### Display the form action buttons $sbeams->display_form_buttons( TABLE_NAME=>$TABLE_NAME, use_tabbed_panes=> '1',); ### validate form input if ($sbeams->output_mode() eq 'html'){ print qq~ ~; } #### Finish the upper part of the page and go begin the full-width #### data portion of the page $sbeams->display_page_footer( close_tables=>'NO', separator_bar=>'NO', display_footer=>'NO', use_tabbed_panes=> '1', ); ######################################################################### #### Process all the constraints ### check required field -- need to remove the empty selection from the optionlist. if(! $parameters{organism_id}){ print "ERROR: Organism Name is required
"; exit; } my ($organism_id) = $parameters{organism_id}; # my $sql = qq~ # SELECT O.ORGANISM_NAME, BSS.BIOSEQUENCE_SET_ID # FROM $TBAT_BIOSEQUENCE_SET BSS # JOIN $TB_ORGANISM O ON (O.ORGANISM_ID = BSS.ORGANISM_ID) # WHERE O.ORGANISM_ID in (6,2,3) # AND (BSS.SET_NAME LIKE '%ENSEMBL%' # AND BSS.SET_NAME LIKE '%IPI%' # AND BSS.SET_NAME LIKE '%SWISS%') # OR ( BSS.SET_NAME LIKE '%SGD%' # AND BSS.SET_NAME LIKE '%SWISS%' # AND BSS.SET_NAME LIKE '%ENSEMBL%') # ORDER BY O.ORGANISM_NAME , BSS.BIOSEQUENCE_SET_ID DESC # ~; # # my @rows = $sbeams->selectSeveralColumns($sql); # my %biosequence_set=(); # my $pre = ''; # my $biosequence_set_id = ''; # my $organism_name = getOrganismName(organism_id => $organism_id); # foreach my $row(@rows) { # my ($name, $set_id) = @$row; # next if($pre ne '' and $pre eq $name); # $biosequence_set{$set_id} = lc($name); # if($organism_name =~ /$name/i){ # $biosequence_set_id = $set_id; # } # $pre = $name; # } # ## get latest biosequence_set_ids that have xspecies mapping my $sql = qq~ SELECT DISTINCT BIOSEQUENCE_SET_IDS FROM $TBAT_PROTEOTYPIC_PEPTIDE_XSPECIES_MAPPING ~; my @rows = $sbeams->selectOneColumn($sql); my @tmp = sort {$b cmp $a} @rows; my $biosequence_set_ids = $tmp[0]; my $organism_name = getOrganismName(organism_id => $organism_id); ## get biosequence set id for current organism $sql = qq~ SELECT BSS.BIOSEQUENCE_SET_ID FROM $TBAT_BIOSEQUENCE_SET BSS where BSS.biosequence_set_id in ($biosequence_set_ids) and BSS.organism_id = $organism_id ~; @rows = $sbeams->selectOneColumn($sql); my $biosequence_set_id = $rows[0]; my $biosequence_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"BioSequence Name", constraint_value=>$parameters{biosequence_name_constraint} ); return if ($biosequence_name_clause eq '-1'); #### Build PEPTIDE_SEQUENCE constraint my $peptide_sequence_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PTP.peptide_sequence", constraint_type=>"plain_text", constraint_name=>"Peptide Sequence", constraint_value=>$parameters{peptide_sequence_constraint} ); return if ($peptide_sequence_clause eq '-1'); my $length_clause=''; my $peptide_length_clause = $sbeams->parseConstraint2SQL( constraint_column=>"LEN(PTP.peptide_sequence)", constraint_type=>"flexible_int", constraint_name=>"Peptide Length", constraint_value=>$parameters{peptide_length_constraint} ); return if ($peptide_length_clause eq '-1'); my $score_clause =''; my %constraints = (combined_score_constraint => 'PTP.combined_predictor_score', ps_score_constraint => 'PTP.peptidesieve_score', espp_score_constraint => 'PTP.espp_score', dp_score_constraint => 'PTP.detectabilitypredictor_score', stepp_score_constraint => 'PTP.stepp_score', apex_score_constraint => 'PTP.apex_score', n_sp_mapping_constraint => 'PTPM.n_sp_mapping', n_spvar_mapping_constraint => 'PTPM.n_spvar_mapping', n_spsnp_mapping_constraint => 'PTPM.n_spsnp_mapping', n_ensp_mapping_constraint => 'PTPM.n_ensp_mapping', n_ensg_mapping_constraint => 'PTPM.n_ensg_mapping', n_ipi_mapping_constraint => 'PTPM.n_ipi_mapping', n_sgd_mapping_constraint => 'PTPM.n_sgd_mapping' ); foreach my $constraint (keys %constraints){ next if($constraint =~ /^n/); my $tmp = $sbeams->parseConstraint2SQL( constraint_column=>"$constraints{$constraint}", constraint_type=>"flexible_float", constraint_name=>"$constraint", constraint_value=>$parameters{$constraint} ); return if ($score_clause eq '-1'); if($tmp) { $score_clause .= $tmp; } } my $mapping_clause = ''; foreach my $constraint (keys %constraints){ next if($constraint !~ /^n/); my $tmp = $sbeams->parseConstraint2SQL( constraint_column=>"$constraints{$constraint}", constraint_type=>"flexible_int", constraint_name=>"$constraint", constraint_value=>$parameters{$constraint} ); return if ($mapping_clause eq '-1'); if($tmp) { $mapping_clause .= $tmp; } } #ad and clause for sql for $parameters{upload_file} my $biosequence_names_clause; my %protein_hash; if ( $parameters{upload_file} ) { ## upload the file to a file handler my $fh = $q->upload('upload_file'); if (!$fh && $q->cgi_error) { print $q->header(-status=>$q->cgi_error) if ($sbeams->output_mode() eq 'html'); } if ( (-T $fh) && (-s $fh < 1000000)) ##size constraint of 10 MB, restrict $count < 30000 { my $count = 0; my $read_file=0; my $prt; ## protein list while ($prt=<$fh>) { chomp($prt); $prt =~ s/\s+$//; if ($prt) { $protein_hash{$prt} = $prt; $count = $count + 1; } last if ($count > 30000); } } ## join with a commas: my $protein_list = ""; foreach my $pr (keys %protein_hash) { $protein_list = "'$protein_hash{$pr}',$protein_list"; } ## trim off last comma: $protein_list =~ s/(.*)(,)$/$1/; unless ($protein_list =~ m/%/) { $biosequence_names_clause = " AND BS.biosequence_name IN ( $protein_list )"; } if ($protein_list =~ m/%/) { my @tmp = split("," , $protein_list); $biosequence_names_clause = " AND ( BS.biosequence_name LIKE $tmp[0] "; for (my $i = 1; $i < scalar(@tmp); $i++ ){ if ( $tmp[$i] ){ $biosequence_names_clause = $biosequence_names_clause . " OR BS.biosequence_name LIKE $tmp[$i] "; } } $biosequence_names_clause = $biosequence_names_clause . " ) --end"; } } # if upload file #### Build ROWCOUNT constraint $parameters{row_limit} = 50000 unless ($parameters{row_limit} > 0 && $parameters{row_limit}<=1000000); my $limit_clause = "TOP $parameters{row_limit}"; #### Disable row limits #$limit_clause = ""; #### Define some variables needed to build the query my @column_array; # Set up base column defs @column_array = ( ["biosequence_name", "BS.biosequence_name", "Biosequence Name"], ["peptide_accession","P.peptide_accession","Peptide Accession"], ["preceding_residue","PTP.preceding_residue","Pre AA"], ["peptide_sequence","PTP.peptide_sequence","Sequence"], ["following_residue","PTP.following_residue","Fol AA"], ["peptide_length", "LEN(PTP.peptide_sequence)", "Peptide Length"], ["combined_predictor_score","PTP.combined_predictor_score","Combined Predictor Score"], ["peptidesieve_score","PTP.peptidesieve_score","PSieve"], ["espp_score", "PTP.espp_score","ESPP"], ["detectabilitypredictor_score","PTP.detectabilitypredictor_score","DPred"], ["apex_score", "PTP.apex_score", "APEX"], ["stepp_score", "PTP.stepp_score", "STEPP"], ["n_sp_mapping", "PTPM.n_sp_mapping", "N SP Mapping"], ["n_spvar_mapping","PTPM.n_spvar_mapping", "N SP-varsplic Mapping"], ["n_spsnp_mapping", "PTPM.n_spsnp_mapping", "N SP-nsSNP Mapping"], ["n_ensp_mapping", "PTPM.n_ensp_mapping", "N ENSP Mapping"], ["n_ensg_mapping", "PTPM.n_ensg_mapping", "N ENSG Mapping"], ["n_ipi_mapping", "PTPM.n_ipi_mapping", "N IPI Mapping"], ["n_sgd_mapping", "PTPM.n_sgd_mapping", "N SGD Mapping"], ["n_human_mapping", "PTPXM.n_human_mapping", "N Human Mapping"], ["n_mouse_mapping", "PTPXM.n_mouse_mapping", "N Mouse Mapping"], ["n_yeast_mapping", "PTPXM.n_yeast_mapping", "N Yeast Mapping"], ); #### Set flag to display SQL statement if user selected if ( $parameters{display_options} =~ /ShowSQL/ ) { $show_sql = 1; } #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); if($biosequence_name_clause){ $biosequence_names_clause = $biosequence_name_clause; } ## Sends @column_array_ref to build_SQL_columns_list, which ## (1) appends the 2nd element in array to $columns_clause ## (2) fills %colnameidx_ref as a hash with key = 1st element ## and value = 3rd element, and (3) fills @column_titles_ref ## array with the 3rd element my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); #### Define the SQL statement my $sql = qq~ SELECT $limit_clause $columns_clause FROM $TBAT_PROTEOTYPIC_PEPTIDE PTP LEFT JOIN $TBAT_PROTEOTYPIC_PEPTIDE_MAPPING PTPM ON ( PTP.PROTEOTYPIC_PEPTIDE_ID = PTPM.PROTEOTYPIC_PEPTIDE_ID ) LEFT JOIN $TBAT_PEPTIDE P ON ( PTP.MATCHED_PEPTIDE_ID = P.PEPTIDE_ID ) LEFT JOIN $TBAT_BIOSEQUENCE BS ON ( PTPM.SOURCE_BIOSEQUENCE_ID = BS.BIOSEQUENCE_ID ) LEFT JOIN $TBAT_PROTEOTYPIC_PEPTIDE_XSPECIES_MAPPING PTPXM ON (PTPXM.PROTEOTYPIC_PEPTIDE_MAPPING_ID = PTPM.PROTEOTYPIC_PEPTIDE_MAPPING_ID ) WHERE 1 = 1 AND BS.Biosequence_set_id = $biosequence_set_id $biosequence_names_clause $peptide_sequence_clause $score_clause $peptide_length_clause $mapping_clause AND BS.BIOSEQUENCE_NAME NOT LIKE 'DECOY%' ORDER BY BS.BIOSEQUENCE_NAME, PTP.COMBINED_PREDICTOR_SCORE DESC ~; #$sbeams->display_sql(sql=>$sql); #### Certain types of actions should be passed to links my $pass_action = "QUERY"; $pass_action = $apply_action if ($apply_action =~ /QUERY/i); #### Pass nearly all of the constraints down to a child query my @parameters_to_pass; my $parameters_list = ''; while ( ($key,$value) = each %input_types ) { if ($key ne 'sort_order' && $key ne 'display_options') { if ($parameters{$key}) { push(@parameters_to_pass,"$key=$parameters{$key}"); } } } if (@parameters_to_pass) { $parameters_list = join('&',@parameters_to_pass); } #### Define columns that should be hidden in the output table if($organism_name =~ /Yeast/i){ %hidden_cols = ( 'N IPI Mapping' => 1, #'N Yeast Mapping' => 1, ); }elsif($organism_name =~ /Human/i){ %hidden_cols = ( 'N SGD Mapping' => 1, #'N Human Mapping' => 1, ); }else{ %hidden_cols = ( 'N SGD Mapping' => 1, #'N Mouse Mapping' => 1, ); } ######################################################################### #### If QUERY or VIEWRESULTSET was selected, display the data if ($apply_action =~ /(QUERY|VIEWRESULTSET|VIEWPLOT)/i ){ #### If the action contained QUERY, then fetch the results from #### the database if ($apply_action =~ /QUERY/i) { #### Show the SQL that will be or was executed $sbeams->display_sql(sql=>$sql,use_tabbed_panes=> '1',) if ($show_sql); #### Fetch the results from the database server $sbeams->fetchResultSet( sql_query=>$sql, resultset_ref=>$resultset_ref, ); if($parameters{n_peptide_per_protein_constraint}){ postProcessResultset( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, sql => $sql ); } #### Store the resultset and parameters to disk resultset cache $rs_params{set_name} = "SETME"; $sbeams->writeResultSet( resultset_file_ref=>\$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME", ); } my @headings = ('Biosequence Name', 'Peptide Accession', 'Pre AA', 'Sequence', 'Fol AA', 'Peptide Length', 'Combined Predictor Score', 'PSieve', 'ESPP', 'DPred', 'APEX', 'STEPP', 'N SP Mapping', 'N SP-varsplic Mapping', 'N SP-nsSNP Mapping', 'N ENSP Mapping', 'N ENSG Mapping', 'N IPI Mapping', 'N Human Mapping', 'N Mouse Mapping', 'N Yeast Mapping', ); my $col_info = $sbeamsMOD->get_column_defs( labels => \@headings ); my $help_text = $sbeamsMOD->make_table_help( entries => $col_info,); $help_text =~ s///g; $help_text =~ s/
//g; my $use_tabbed_panes = 1; if ( $help_text ) { my $prefix = $sbeams->addTabbedPane(label => "Result"); print "$prefix$help_text\n" if ($sbeams->output_mode() eq 'html'); $use_tabbed_panes = 0; } #### Display the resultset $sbeams->displayResultSet( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, url_cols_ref=>\%url_cols, hidden_cols_ref=>\%hidden_cols, max_widths=>\%max_widths, column_titles_ref=>\@column_titles, base_url=>$base_url, use_tabbed_panes=>$use_tabbed_panes, ); #### Display the resultset controls $sbeams->displayResultSetControls( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, base_url=>$base_url, use_tabbed_panes=>$use_tabbed_panes, ); if(! $use_tabbed_panes ){ print $sbeams->closeTabbedPane() , "\n" if ($sbeams->output_mode() eq 'html'); } #print "$parameters{organism_id}
"; #### Display a plot of data from the resultset $sbeams->displayResultSetPlot( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, base_url=>$base_url, use_tabbed_panes=>'1', apply_action=>$apply_action, ); #### If QUERY was not selected, then tell the user to enter some parameters } else { if ($sbeams->invocation_mode() eq 'http') { print "

Select parameters above and press QUERY

\n"; } else { print "You need to supply some parameters to contrain the query\n"; } } } # end handle_request ############################################################################### # postProcessResultset # # Perform some additional processing on the resultset that would otherwise # be very awkward to do in SQL. ############################################################################### sub postProcessResultset { my %args = @_; #### Process the arguments list my $resultset_ref = $args{'resultset_ref'}; my $rs_params_ref = $args{'rs_params_ref'}; my $query_parameters_ref = $args{'query_parameters_ref'}; my %rs_params = %{$rs_params_ref}; my %parameters = %{$query_parameters_ref}; my $n_peptide_per_protein = $parameters{n_peptide_per_protein_constraint}; $n_peptide_per_protein =~ s/\s+//g; if($n_peptide_per_protein =~ /(\D)/){ print "

only interger number allowed in Number of Peptides Per Protein Constraint.

"; return -1; } my $n_rows = scalar(@{$resultset_ref->{data_ref}}); my $cols = $resultset_ref->{column_hash_ref}; my @processed_result =(); my %counts = (); for (my $i=0; $i<$n_rows; $i++) { my $protein_accession = $resultset_ref->{data_ref}->[$i]->[$cols->{biosequence_name}]; if ($counts{$protein_accession} < $n_peptide_per_protein){ push @processed_result , $resultset_ref->{data_ref}->[$i] ; } $counts{$protein_accession}++; } $resultset_ref->{data_ref} = \@processed_result; } ############################################################################### # evalSQL # # Callback for translating Perl variables into their values, # especially the global table variables to table names ############################################################################### sub evalSQL { my $sql = shift; return eval "\"$sql\""; } # end evalSQL ####################################################################### sub getOrganismName { my %args = @_; my $organism_id = $args{organism_id}; my $sql = qq~ SELECT organism_name FROM $TB_ORGANISM WHERE organism_id = '$organism_id' AND record_status != 'D' ~; my ($name) = $sbeams->selectOneColumn($sql); ## replace spaces with _ $name =~ s/ /\_/g; return $name; }