#!/usr/local/bin/perl ############################################################################### # Program : GetPeptides # Author : Eric Deutsch # $Id$ # # Description : This program that allows users to # get peptides from the PeptideAtlas based on various criteria. # # SBEAMS is Copyright (C) 2000-2021 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use Data::Dumper; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::TabMenu; use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; $sbeams = new SBEAMS::Connection; $sbeamsMOD = new SBEAMS::PeptideAtlas; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( #permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin'], # connect_read_only=>1, allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; $parameters{uploaded_file_not_saved} = 1; my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); #### Decide what action to take based on information so far if ($parameters{action} eq "???") { # Some action } else { my $project_id = $sbeamsMOD->getProjectID( atlas_build_id => $parameters{atlas_build_id} ); $sbeamsMOD->display_page_header( project_id => $project_id, use_tabbed_panes => 1 ); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer( use_tabbed_panes => 1 ); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Show current user context information #$sbeams->printUserContext(); #### Get the HTML to display the tabs my $tabMenu = $sbeamsMOD->getTabMenu( parameters_ref => \%parameters, program_name => $PROG_NAME, ); if ($sbeams->output_mode() eq 'html') { print $tabMenu->asHTML(); print ""; } #### Get the current atlas_build_id based on parameters or session my $atlas_build_id = $sbeamsMOD->getCurrentAtlasBuildID( parameters_ref => \%parameters, ); if (defined($atlas_build_id) && $atlas_build_id < 0) { return; } $parameters{atlas_build_id} = $atlas_build_id; #### Define some generic variables my ($i,$element,$key,$value,$line,$result,$sql); #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $CATEGORY="Get Peptides"; $TABLE_NAME="AT_GetPeptides" unless ($TABLE_NAME); ($PROGRAM_FILE_NAME) = $sbeamsMOD->returnTableInfo($TABLE_NAME,"PROGRAM_FILE_NAME"); my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; #### Get the columns and input types for this table/query my @columns = $sbeamsMOD->returnTableInfo($TABLE_NAME,"ordered_columns"); my %input_types = $sbeamsMOD->returnTableInfo($TABLE_NAME,"input_types"); #### Read the input parameters for each column my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters, columns_ref=>\@columns,input_types_ref=>\%input_types); #$sbeams->printDebuggingInfo($q); #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams(q=>$q); if ($apply_action =~ /VIEWRESULTSET|VIEWPLOT/ ) { $sbeams->readResultSet( resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters ); $n_params_found = 99; } #### Set some reasonable defaults if no parameters supplied unless ($n_params_found) { $parameters{input_form_format} = "minimum_detail"; } #### Apply any parameter adjustment logic # None #### Display the user-interaction input form $sbeams->display_input_form( TABLE_NAME=>$TABLE_NAME, CATEGORY=>$CATEGORY, apply_action=>$apply_action, PROGRAM_FILE_NAME=>$PROG_NAME, parameters_ref=>\%parameters, input_types_ref=>\%input_types, use_tabbed_panes => 1, mask_user_context=> '1', ); #### Display the form action buttons $sbeams->display_form_buttons( TABLE_NAME=>$TABLE_NAME, use_tabbed_panes => 1, ); #### Finish the upper part of the page and go begin the full-width #### data portion of the page $sbeams->display_page_footer( close_tables=>'NO', use_tabbed_panes => 1, separator_bar=>'NO', display_footer=>'NO'); ######################################################################### #### Process all the constraints #### If no atlas_build_id was selected, stop here unless ($parameters{atlas_build_id}) { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => 'You must select at least one Atlas Build', ); return; } #### Build ATLAS_BUILD constraint my $atlas_build_clause = $sbeams->parseConstraint2SQL( constraint_column=>"AB.atlas_build_id", constraint_type=>"int_list", constraint_name=>"Atlas Build", constraint_value=>$parameters{atlas_build_id} ); return if ($atlas_build_clause eq '-1'); #### Build BIOSEQUENCE_NAME constraint my $biosequence_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"BioSequence Name", constraint_value=>$parameters{biosequence_name_constraint} ); return if ($biosequence_name_clause eq '-1'); $parameters{biosequence_name_constraint} =~ s/^\s+//; $parameters{biosequence_name_constraint} =~ s/\s+$//; if ($parameters{biosequence_name_constraint} =~/null/i){ $biosequence_name_clause = 'AND BS.biosequence_name is null'; } #### Build BIOSEQUENCE_GENE_NAME constraint my $biosequence_gene_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_gene_name", constraint_type=>"plain_text", constraint_name=>"BioSequence Gene Name", constraint_value=>$parameters{biosequence_gene_name_constraint} ); return if ($biosequence_gene_name_clause eq '-1'); #### Build BIOSEQUENCE_DESC constraint my $biosequence_desc_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_desc", constraint_type=>"plain_text", constraint_name=>"BioSequence Description", constraint_value=>$parameters{biosequence_desc_constraint} ); return if ($biosequence_desc_clause eq '-1'); #### Build PEPTIDE_NAME constraint my $peptide_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"P.peptide_accession", constraint_type=>"plain_text", constraint_name=>"Peptide Name", constraint_value=>$parameters{peptide_name_constraint} ); return if ($peptide_name_clause eq '-1'); #### Build PEPTIDE_SEQUENCE constraint my $peptide_sequence_clause = $sbeams->parseConstraint2SQL( constraint_column=>"P.peptide_sequence", constraint_type=>"plain_text", constraint_name=>"Peptide Sequence", constraint_value=>$parameters{peptide_sequence_constraint} ); return if ($peptide_sequence_clause eq '-1'); #### Build BEST_PROBABILITY constraint my $best_probability_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PI.best_probability", constraint_type=>"flexible_float", constraint_name=>"Best Probability", constraint_value=>$parameters{best_probability_constraint} ); return if ($best_probability_clause eq '-1'); #### Build modified_peptide_sequence_constraint my $modified_peptide_sequence_clause = $sbeams->parseConstraint2SQL( constraint_column=>"MPI.modified_peptide_sequence", constraint_type=>"plain_text", constraint_name=>"Modified Sequence", constraint_value=>$parameters{modified_peptide_sequence_constraint} ); return if ($modified_peptide_sequence_clause eq '-1'); #### Build SAMPLE_ID constraint #### Build SAMPLE_CATEGORY_ID constraint my ($peptide_instance_in, $samples_clause, $sample_category_clause); if ($parameters{sample_ids} || $parameters{sample_category_id} ){ $samples_clause = $sbeams->parseConstraint2SQL( constraint_column=>"S.sample_id", constraint_type=>"int_list", constraint_name=>"Samples ", constraint_value=>$parameters{sample_ids} ); return if ($samples_clause eq '-1'); $sample_category_clause = $sbeams->parseConstraint2SQL( constraint_column=>"S.sample_category_id", constraint_type=>"int_list", constraint_name=>"Sample Category", constraint_value=>$parameters{sample_category_id} ); return if ($sample_category_clause eq '-1'); $peptide_instance_in = qq~ SELECT PI2.PEPTIDE_INSTANCE_ID FROM $TBAT_PEPTIDE_INSTANCE PI2 JOIN $TBAT_PEPTIDE_INSTANCE_SAMPLE PIS ON ( PIS.PEPTIDE_INSTANCE_ID = PI2.PEPTIDE_INSTANCE_ID ) JOIN $TBAT_SAMPLE S ON (PIS.SAMPLE_ID = S.SAMPLE_ID) where 1=1 and PI2.atlas_build_id = $atlas_build_id $samples_clause $sample_category_clause ~; $atlas_build_clause = ''; } #### Build N_OBSERVATIONS constraint my $n_observations_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PI.n_observations", constraint_type=>"flexible_int", constraint_name=>"Number of Observations", constraint_value=>$parameters{n_observations_constraint} ); return if ($n_observations_clause eq '-1'); #### Build N_SAMPLES constraint my $n_samples_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PI.n_samples", constraint_type=>"flexible_int", constraint_name=>"Number of Samples", constraint_value=>$parameters{n_samples_constraint} ); return if ($n_samples_clause eq '-1'); #### Build EMPIRICAL_PROTEOTYPIC_SCORE constraint my $empirical_proteotypic_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PI.empirical_proteotypic_score", constraint_type=>"flexible_float", constraint_name=>"Empirical Proteotypic Score", constraint_value=>$parameters{empirical_proteotypic_constraint} ); return if ($empirical_proteotypic_clause eq '-1'); #### Build n_protein_mappings constraint my $n_protein_mappings_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PI.n_protein_mappings", constraint_type=>"flexible_int", constraint_name=>"n_protein_mappings", constraint_value=>$parameters{n_protein_mappings_constraint} ); return if ($n_protein_mappings_clause eq '-1'); #### Build n_genome_locations constraint my $n_genome_locations_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PI.n_genome_locations", constraint_type=>"flexible_int", constraint_name=>"n_genome_locations", constraint_value=>$parameters{n_genome_locations_constraint} ); return if ($n_genome_locations_clause eq '-1'); #### Build is_exon_spanning constraint my $is_exon_spanning_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PI.is_exon_spanning", constraint_type=>"plain_text", constraint_name=>"is_exon_spanning", constraint_value=>$parameters{is_exon_spanning_constraint} ); return if ($is_exon_spanning_clause eq '-1'); #### Build allow_missed_cleavages constraint # my $allow_missed_cleavages_clause = ''; if ( $parameters{allow_missed_cleavages} && $parameters{allow_missed_cleavages} =~ /N/ ) { my $allow_missed_cleavages_clause = "AND P2.peptide_sequence NOT LIKE '%[KR][^P]%'"; if ($peptide_instance_in ne ''){ $peptide_instance_in = qq~ SELECT PI2.PEPTIDE_INSTANCE_ID FROM $TBAT_PEPTIDE P2 JOIN $TBAT_PEPTIDE_INSTANCE PI2 ON (P2.PEPTIDE_ID = PI2.PEPTIDE_ID) JOIN $TBAT_PEPTIDE_INSTANCE_SAMPLE PIS ON ( PIS.PEPTIDE_INSTANCE_ID = PI2.PEPTIDE_INSTANCE_ID ) JOIN $TBAT_SAMPLE S ON (PIS.SAMPLE_ID = S.SAMPLE_ID) WHERE 1=1 $allow_missed_cleavages_clause and PI2.atlas_build_id = $atlas_build_id $samples_clause $sample_category_clause ~; }else{ $peptide_instance_in = qq~ SELECT PI2.PEPTIDE_INSTANCE_ID FROM $TBAT_PEPTIDE P2 JOIN $TBAT_PEPTIDE_INSTANCE PI2 ON (P2.PEPTIDE_ID = PI2.PEPTIDE_ID) WHERE 1=1 $allow_missed_cleavages_clause ~; } } ## get organism_id to pass on to url_cols my $tsql = qq~ SELECT BS.organism_id FROM $TBAT_BIOSEQUENCE_SET BS JOIN $TBAT_ATLAS_BUILD AB ON (AB.biosequence_set_id = BS.biosequence_set_id) where AB.atlas_build_id ='$parameters{atlas_build_id}' ~; my ($organism_id) = $sbeams->selectOneColumn($tsql) or die "\nERROR: Unable to find the organism_id" . " with $tsql\n\n"; $parameters{organism_id} = $organism_id; #### Build allow_low_ntt constraint my $allow_low_ntt_clause = ''; if ( $parameters{allow_low_ntt} && $parameters{allow_low_ntt} =~ /N/ ) { if ($atlas_build_id >= 448 && $organism_id == 2 ){ $allow_low_ntt_clause = qq~ AND PI.highest_n_enzymatic_termini = 2 ~; }else{ $allow_low_ntt_clause = qq~ AND PI.preceding_residue like '[-KR]' AND ( following_residue = '-' OR P.peptide_sequence LIKE '%[KR]' ) ~; } } $sbeams->parseConstraint2SQL( constraint_column=>"PI.is_exon_spanning", constraint_type=>"plain_text", constraint_name=>"is_exon_spanning", constraint_value=>$parameters{is_exon_spanning_constraint} ); return if ($is_exon_spanning_clause eq '-1'); # $is_exon_spanning_clause # $is_exon_spanning_clause # $allow_low_ntt_clause # $allow_missed_cleavages_clause #### Build CHROMOSOME constraint my $chromosome_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PM.chromosome", constraint_type=>"plain_text", constraint_name=>"Chromosome", constraint_value=>$parameters{chromosome_constraint} ); return if ($chromosome_clause eq '-1'); #### Build START_IN_CHROMOSOME constraint my $start_in_chromosome_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PM.start_in_chromosome", constraint_type=>"flexible_int", constraint_name=>"Start in Chromosome", constraint_value=>$parameters{start_in_chromosome_constraint} ); return if ($start_in_chromosome_clause eq '-1'); #### Build END_IN_CHROMOSOME constraint my $end_in_chromosome_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PM.end_in_chromosome", constraint_type=>"flexible_int", constraint_name=>"end in Chromosome", constraint_value=>$parameters{End_in_chromosome_constraint} ); return if ($end_in_chromosome_clause eq '-1'); #### Build STRAND constraint my $strand_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PM.strand", constraint_type=>"plain_text", constraint_name=>"Strand", constraint_value=>$parameters{strand_constraint} ); return if ($chromosome_clause eq '-1'); #### Build peptide_ids constraint my $peptide_ids_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PI.peptide_id", constraint_type=>"int_list", constraint_name=>"is_subpeptide_of", constraint_value=>$parameters{peptide_ids_constraint} ); return if ($peptide_ids_clause eq '-1'); ## handle file upload and clause for sql for $parameters{upload_file} my $biosequence_names_clause; my %protein_hash; if ( $parameters{upload_file} ) { ## upload the file to a file handler my $fh = $q->upload('upload_file'); if (!$fh && $q->cgi_error) { print $q->header(-status=>$q->cgi_error); } if ( (-T $fh) && (-s $fh < 1000000)) ##size constraint of 10 MB, restrict $count < 30000 { my $count = 0; my $read_file=0; my $prt; ## protein list while ($prt=<$fh>) { chomp($prt); $prt =~ s/\s+$//; if ($prt) { $protein_hash{$prt} = $prt; $count = $count + 1; } last if ($count > 30000); } } ## join with a commas: my $protein_list = ""; foreach my $pr (keys %protein_hash) { $protein_list = "'$protein_hash{$pr}',$protein_list"; } ## trim off last comma: $protein_list =~ s/(.*)(,)$/$1/; unless ($protein_list =~ m/%/) { $biosequence_names_clause = " AND BS.biosequence_name IN ( $protein_list )"; } if ($protein_list =~ m/%/) { my @tmp = split("," , $protein_list); $biosequence_names_clause = " AND ( BS.biosequence_name LIKE $tmp[0] "; for (my $i = 1; $i < scalar(@tmp); $i++ ){ if ( $tmp[$i] ){ $biosequence_names_clause = $biosequence_names_clause . " OR BS.biosequence_name LIKE $tmp[$i] "; } } $biosequence_names_clause = $biosequence_names_clause . " ) --end"; } } # if upload file #### Build ROWCOUNT constraint my $MAX_ROW_NUMBER; if ($parameters{row_limit} == -1){ $MAX_ROW_NUMBER = 0; }else{ $parameters{row_limit} = 1500000 unless ($parameters{row_limit} > 0 ); $MAX_ROW_NUMBER = $parameters{row_limit}; } #$parameters{row_limit} = 100000 if ($parameters{row_limit} eq ''); my $limit_clause = "TOP $parameters{row_limit}" if ($MAX_ROW_NUMBER); #### Disable row limits #### Define some variables needed to build the query my @column_array; my $mapping_join = qq~ LEFT JOIN $TBAT_PEPTIDE_MAPPING PM ON ( PI.peptide_instance_id = PM.peptide_instance_id ) LEFT JOIN $TBAT_BIOSEQUENCE BS ON ( PM.matched_biosequence_id = BS.biosequence_id ) LEFT JOIN $TB_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) ~; my $mapping_constraints = qq~ $chromosome_clause $start_in_chromosome_clause $end_in_chromosome_clause $strand_clause ~; my $mapping_sort = 'ORDER BY P.peptide_accession, PM.chromosome, PM.start_in_chromosome'; #### Define the desired columns in the query #### [friendly name used in url_cols,SQL,displayed column title] #### If the DASFormat view was selected (excludes other options) my $modification_join = ''; if ($parameters{display_options} =~ /DASFormat/) { @column_array = ( ["label","'Similarity'","Label"], ["peptide_accession","P.peptide_accession","Peptide Accession"], ["n_observations","'Nobs='+LTRIM(STR(PI.n_observations,6,0))","N Obs"], ["peptide_sequence","P.peptide_sequence","Peptide Sequence"], ["chromosome","PM.chromosome","Chrom"], ["start_in_chromosome","PM.start_in_chromosome","Start Position"], ["end_in_chromosome","PM.end_in_chromosome","End Position"], ["strand","PM.strand","Strand"], ["n_protein_mappings","PI.n_protein_mappings","N Protein Mappings"], ["best_probability","STR(PI.best_probability,7,3)","Best Prob"], ["n_genome_locations","PI.n_genome_locations","N Unique Locations on Genome"], ); } else { # Set up base column defs @column_array = ( ["peptide_accession","P.peptide_accession","Peptide Accession"], ["peptide_sequence","P.peptide_sequence","Peptide Sequence"], ["best_probability","LTRIM(STR(PI.best_probability,7,3))","Best Prob"], ["n_observations","PI.n_observations","N Obs"], ["empirical_proteotypic_score","LTRIM(STR(PI.empirical_proteotypic_score,7,2))","Empirical Proteotypic Score"], ["SSRCalc_relative_hydrophobicity","LTRIM(STR(P.SSRCalc_relative_hydrophobicity,7,2))","SSRCalc Relative Hydrophob"], ["n_samples","PI.n_samples","N Experiments"], ["n_protein_mappings","PI.n_protein_mappings","N Protein Mappings"], ["n_genome_locations","PI.n_genome_locations","N Unique Locations on Genome"], ["is_exon_spanning","PI.is_exon_spanning","Spans exons? (Y,N)"], ["is_subpeptide_of","PI.is_subpeptide_of","Peptides that include this sequence"], ["protease_ids", "PI.protease_ids", "Protease IDs"], ["atlas build name","AB.atlas_build_name","PeptideAtlas Name"], ["organism_full_name","O.full_name","Organism"], ); ## If the modifications view was selected if ($parameters{display_options} =~ /ShowModifications/) { @column_array = ( @column_array[0], ["modified_peptide_sequence","MPI.modified_peptide_sequence","Modified Peptide Sequence"], $column_array[2], ["n_observations","MPI.n_observations","N Obs"], @column_array[4..5], ["peptide_charge","MPI.peptide_charge","Peptide Charge"], ["monoisotopic_parent_mz","STR(MPI.monoisotopic_parent_mz,10,4)","Monoisotopic Parent m/z"], @column_array[6..$#column_array], ); $modification_join = qq~ LEFT JOIN $TBAT_MODIFIED_PEPTIDE_INSTANCE MPI ON ( PI.peptide_instance_id = MPI.peptide_instance_id ) ~; } ## If the chromosomal coordinate mappings view was selected if ($parameters{display_options} =~ /ShowMappings/) { @column_array = ( @column_array[0,1], ["biosequence_name","BS.biosequence_name","Biosequence Name"], ["biosequence_accessor","DBX.accessor","biosequence_accessor"], ["biosequence_accessor_suffix","DBX.accessor_suffix","biosequence_accessor_suffix"], ["biosequence_accession","BS.biosequence_accession","biosequence_accession"], @column_array[2..5], ["chromosome","PM.chromosome","Chrom"], ["start_in_chromosome","PM.start_in_chromosome","Start Position"], ["end_in_chromosome","PM.end_in_chromosome","End Position"], ["strand","PM.strand","Strand"], @column_array[6..$#column_array], ); } else { $mapping_sort = 'ORDER BY P.peptide_accession'; $mapping_constraints = ''; if ( ! ($parameters{biosequence_name_constraint} || $parameters{biosequence_gene_name_constraint} || $parameters{biosequence_desc_constraint} || $biosequence_names_clause ne '') ){ $mapping_join = ''; } } } #### Set flag to display SQL statement if user selected if ( $parameters{display_options} =~ /ShowSQL/ ) { $show_sql = 1; } #@column_array = ( @column_array , # ["row_number" , "row_number() over (order by P.peptide_accession)", "row_number"] # ); #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); ## Sends @column_array_ref to build_SQL_columns_list, which ## (1) appends the 2nd element in array to $columns_clause ## (2) fills %colnameidx_ref as a hash with key = 1st element ## and value = 3rd element, and (3) fills @column_titles_ref ## array with the 3rd element my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); my $peptide_instance_id_contraint = ''; if ($peptide_instance_in ne ''){ $peptide_instance_id_contraint = "and PI.peptide_instance_id in ( $peptide_instance_in ) "; } #### Define the SQL statement $sql = qq~ SELECT DISTINCT $limit_clause $columns_clause FROM $TBAT_PEPTIDE_INSTANCE PI INNER JOIN $TBAT_PEPTIDE P ON ( PI.peptide_id = P.peptide_id ) INNER JOIN $TBAT_ATLAS_BUILD AB ON ( PI.atlas_build_id = AB.atlas_build_id ) LEFT JOIN $TBAT_BIOSEQUENCE_SET BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) LEFT JOIN $TB_ORGANISM O ON ( BSS.organism_id = O.organism_id ) $modification_join $mapping_join WHERE 1 = 1 $peptide_instance_id_contraint $atlas_build_clause $biosequence_name_clause $biosequence_names_clause $biosequence_gene_name_clause $biosequence_desc_clause $peptide_name_clause $peptide_sequence_clause $best_probability_clause $n_observations_clause $n_samples_clause $empirical_proteotypic_clause $n_protein_mappings_clause $n_genome_locations_clause $is_exon_spanning_clause $allow_low_ntt_clause $peptide_ids_clause ~; if ($parameters{display_options} =~ /ShowModifications/) { if($modified_peptide_sequence_clause ne ''){ $modified_peptide_sequence_clause =~ s/\)$//; $sql .= $modified_peptide_sequence_clause ."ESCAPE '\\')"; } } $sql .= $mapping_sort; #### Certain types of actions should be passed to links my $pass_action = "QUERY"; $pass_action = $apply_action if ($apply_action =~ /QUERY/i); #### Pass nearly all of the constraints down to a child query my @parameters_to_pass; my $parameters_list = ''; while ( ($key,$value) = each %input_types ) { if ($key ne 'sort_order' && $key ne 'display_options') { if ($parameters{$key}) { push(@parameters_to_pass,"$key=$parameters{$key}"); } } } if (@parameters_to_pass) { $parameters_list = join('&',@parameters_to_pass); } ## tailor the chromosomal url link to a genome browser by the organism name: my $organism_full_name = getOrganismFullName(organism_id => $organism_id); my $chrom_url_link; if ($organism_full_name =~ /^Halobacterium/) { $chrom_url_link = "http://www.genome.jp/kegg-bin/show_genomemap?ORG=hal&ACCESSION=\%$colnameidx{biosequence_name}V"; } else { ## assume it's an organism supported by Ensembl $chrom_url_link = "http://www.ensembl.org/$organism_full_name/contigview?c=\%$colnameidx{chromosome}V\:\%$colnameidx{start_in_chromosome}V&w=10000"; } #### Define the hypertext links for columns that need them %url_cols = ( 'Biosequence Name' => "$CGI_BASE_DIR/PeptideAtlas/GetProtein?protein_name=\%$colnameidx{biosequence_name}V&apply_action=$pass_action", 'Biosequence Name_ATAG' => 'TARGET="Win1" ONMOUSEOVER="window.status=\'Show more information about this protein\'; return true"', 'Peptide Accession' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$parameters{atlas_build_id}&searchWithinThis=Peptide+Name&searchForThis=\%$colnameidx{peptide_accession}V&action=QUERY", 'Peptide Accession_ATAG' => 'TARGET="Win1" ONMOUSEOVER="window.status=\'Show summary for this peptide\'; return true"', 'Peptide Sequence' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$parameters{atlas_build_id}&searchWithinThis=Peptide+Sequence&searchForThis=\%$colnameidx{peptide_sequence}V&action=QUERY", 'Peptide Sequence_ATAG' => 'TARGET="Win1" ONMOUSEOVER="window.status=\'Show summary for this peptide\'; return true"', 'Peptides that include this sequence' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptides?_tab=2&atlas_build_id=$parameters{atlas_build_id}&peptide_ids_constraint=\%$colnameidx{is_subpeptide_of}V&action=QUERY", 'Peptides that include this sequence_ATAG' => 'TARGET="Win1" ONMOUSEOVER="window.status=\'Show peptides with these IDs\'; return true"', 'Experiment IDs' => "$CGI_BASE_DIR/PeptideAtlas/GetPeptide?_tab=3&atlas_build_id=$parameters{atlas_build_id}&searchWithinThis=Peptide+Name&searchForThis=\%$colnameidx{peptide_accession}V&action=QUERY", 'Experiment IDs_ATAG' => 'TARGET="Win1" ONMOUSEOVER="window.status=\'Show experiment summaries for this peptide\'; return true"', 'Start Position' => "$chrom_url_link", 'Start Position_ATAG' => 'TARGET="_blank" ONMOUSEOVER="window.status=\'Show peptide in genome viewer\'; return true"', ); #### Define columns that should be hidden in the output table %hidden_cols = ( 'biosequence_accession' => 1, 'biosequence_accessor' => 1, 'biosequence_accessor_suffix' => 1, # 'peptide_accession' => 1, not sure why this was here but it was causing "DAS Format" TSC resultsets to be bad. Deutsch 'Organism' => 1, 'PeptideAtlas Name' => 1, 'row_number' => 1, ); if (! $parameters{enzyme_ids}){ $hidden_cols{'Protease IDs'} = 1; } ######################################################################### #### If QUERY or VIEWRESULTSET was selected, display the data if ($apply_action =~ /QUERY|VIEWRESULTSET|VIEWPLOT/i ) { #### If the action contained QUERY, then fetch the results from #### the database if ($apply_action =~ /QUERY/i) { $sbeams->display_sql( sql => $sql, use_tabbed_panes => 1 ) if ($show_sql); my $use_caching = (defined $parameters{use_caching}) ? $parameters{use_caching} : 1; $log->info( "Caching is $use_caching" ); #### Fetch the results from the database server $sbeams->fetchResultSet( sql_query=>$sql, resultset_ref=>$resultset_ref, use_caching => $use_caching ); #if (scalar @{$resultset_ref->{data_ref}} < $MAX_ROW_NUMBER){ # $parameters{row_limit} = ''; #} #### Post process the resultset if ( $resultset_ref->{from_cache} ) { $log->info( "Skipping post-processing with cached RS" ); } else { postProcessResultset( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, ) if ($parameters{display_options} =~ /DASFormat/i); } #### Store the resultset and parameters to disk resultset cache $rs_params{set_name} = "SETME"; my %write_params = ( rs_table => $TBAT_ATLAS_BUILD, key_field => 'atlas_build_id', key_value => $parameters{atlas_build_id} ); $sbeams->writeResultSet( resultset_file_ref=>\$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME", column_titles_ref=>\@column_titles, %write_params ); } #### Construct table help my $obs_help = get_table_help( 'peptides' ); $resultset_ref->{types_list_ref}->[$colnameidx{best_probability}] = 'varchar'; #### Display the resultset $sbeams->displayResultSet( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, url_cols_ref=>\%url_cols, hidden_cols_ref=>\%hidden_cols, max_widths=>\%max_widths, use_tabbed_panes => 1, column_titles_ref=>\@column_titles, column_help=>$obs_help, base_url=>$base_url, ); #### Display the resultset controls $sbeams->displayResultSetControls( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, use_tabbed_panes => 1, base_url=>$base_url, ); #### Display a plot of data from the resultset $sbeams->displayResultSetPlot_plotly( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, use_tabbed_panes => 1, mouseover_column => 'peptide_sequence', mouseover_url => $url_cols{'Peptide Sequence'}, mouseover_tag => '%1V', base_url=>$base_url, ); #### If QUERY was not selected, then tell the user to enter some parameters } else { if ($sbeams->invocation_mode() eq 'http') { print "

Select parameters above and press QUERY

\n"; } else { print "You need to supply some parameters to contrain the query\n"; } } } # end handle_request ############################################################################### # evalSQL # # Callback for translating Perl variables into their values, # especially the global table variables to table names ############################################################################### sub evalSQL { my $sql = shift; return eval "\"$sql\""; } # end evalSQL ############################################################################### # postProcessResultset # # Perform some additional processing on the resultset that would otherwise # be very awkward to do in SQL. ############################################################################### sub postProcessResultset { my %args = @_; my ($i,$element,$key,$value,$line,$result,$sql); #### Process the arguments list my $resultset_ref = $args{'resultset_ref'}; my $rs_params_ref = $args{'rs_params_ref'}; my $query_parameters_ref = $args{'query_parameters_ref'}; my $column_titles_ref = $args{'column_titles_ref'}; my %rs_params = %{$rs_params_ref}; my %parameters = %{$query_parameters_ref}; my $n_rows = scalar(@{$resultset_ref->{data_ref}}); my $cols = $resultset_ref->{column_hash_ref}; my $peptide_accession; my $nprot; my $nloc; for (my $i=0; $i<$n_rows; $i++) { $peptide_accession = $resultset_ref->{data_ref}->[$i]->[$cols->{peptide_accession}]; $nprot = $resultset_ref->{data_ref}->[$i]->[$cols->{n_protein_mappings}]; $nloc = $resultset_ref->{data_ref}->[$i]->[$cols->{n_genome_locations}]; if ($nprot > 1) { if ($nloc > 1) { $peptide_accession .= "(degen_NLoc=$nloc)"; } else { $peptide_accession .= "(degen)"; } $resultset_ref->{data_ref}->[$i]->[$cols->{peptide_accession}] = $peptide_accession; } elsif ($nloc > 1) { die("ERROR: nprot <=2 but $nloc > 1. This is wrong. Please report this error."); } } return 1; } # end postProcessResult ####################################################################### sub getOrganismFullName { my %args = @_; my $organism_id = $args{organism_id}; my $sql = qq~ SELECT full_name FROM $TB_ORGANISM WHERE organism_id = '$organism_id' AND record_status != 'D' ~; my ($full_name) = $sbeams->selectOneColumn($sql); ## replace spaces with _ $full_name =~ s/ /\_/g; return $full_name; } ####################################################################### sub get_table_help { my $name = shift; return '' unless $name; my @entries; my $hidetext; my $showtext; my $heading; my $description; if ( $name eq 'peptides' ) { @entries = ( { key => 'Peptide Accession', value => 'Accession within PeptideAtlas. Click for details on peptide.' }, { key => 'Peptide Sequence', value => 'Sequence excluding modifications. Click for details on peptide' }, { key => 'Best Prob', value => 'PeptideAtlas probability for best observation of this peptide in this atlas build' }, { key => 'N Obs', value => 'Number of spectra identified to this peptide in this atlas build' }, { key => 'Empirical Proteotypic Score', value => 'In PeptideAtlas, the fraction of observations of this protein that are supported by at least one observation of this peptide. Values range from 0 to 1; close to 1 means that this peptide is likely to be observed whenever this protein is observed.' }, { key => 'SSRCalc Relative Hydrophob', value => 'A measure of the hydrophobicity of this peptide' }, { key => 'N Experiments', value => 'Number of experiments this peptide was observed in for this atlas build' }, { key => 'N Protein Mappings', value => 'Number of protein identifiers that this peptide maps to for the biosequence set associated with this atlas build' }, { key => 'N Unique Locations on Genome', value => 'Number of genome locations that this peptide maps to' }, { key => 'Spans exons?', value => 'Does the peptide span multiple exons?' }, { key => 'Peptides that include this sequence', value => 'Click for peptides that have this peptide as a subsequence' }, ); $showtext = 'show column descriptions'; $hidetext = 'hide column descriptions'; $heading = 'Identified Peptides'; $description= 'Peptides identifications supported by the data'; } return unless @entries; my $help = $sbeamsMOD->get_table_help_section( name => $name, description => $description, heading => $heading, entries => \@entries, showtext => $showtext, hidetext => $hidetext ); return $help; } # end get_table_help