#!/usr/local/bin/perl ############################################################################### # Program : BrowseProteinSummary # Author : Eric Deutsch # $Id$ # # Description : This program that allows users to # browse through a summary of proteins from experiments. # # SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Proteomics; use SBEAMS::Proteomics::Settings; use SBEAMS::Proteomics::Tables; $sbeams = new SBEAMS::Connection; $sbeamsMOD = new SBEAMS::Proteomics; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); #use CGI; #$q = new CGI; ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['Proteomics_user','Proteomics_admin', 'Proteomics_readonly'], #connect_read_only=>1, #allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); #### Decide what action to take based on information so far if (defined($parameters{action}) && $parameters{action} eq "???") { # Some action } else { $sbeamsMOD->display_page_header(); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer(); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Define some generic varibles my ($i,$element,$key,$value,$line,$result,$sql); #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action=$parameters{'action'} || $parameters{'apply_action'} || ''; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program #my $CATEGORY="Browse Protein Summary"; my $CATEGORY="Browse Protein Summary"; $TABLE_NAME="PR_BrowseProteinSummary" unless ($TABLE_NAME); ($PROGRAM_FILE_NAME) = $sbeamsMOD->returnTableInfo($TABLE_NAME,"PROGRAM_FILE_NAME"); my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; #### Get the columns and input types for this table/query my @columns = $sbeamsMOD->returnTableInfo($TABLE_NAME,"ordered_columns"); my %input_types = $sbeamsMOD->returnTableInfo($TABLE_NAME,"input_types"); #### Read the input parameters for each column my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters, columns_ref=>\@columns,input_types_ref=>\%input_types); #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams(q=>$q); my @resultset_column_titles = (); if ($apply_action eq "VIEWRESULTSET") { $sbeams->readResultSet( resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, column_titles_ref=>\@resultset_column_titles, ); $n_params_found = 99; } #### Set some reasonable defaults if no parameters supplied unless ($n_params_found) { $parameters{input_form_format} = "minimum_detail"; $parameters{protein_group_probability_constraint} = '>=0.9'; $parameters{protein_probability_constraint} = '>=0.9'; } #### Apply any parameter adjustment logic #none #### Display the user-interaction input form $sbeams->display_input_form( TABLE_NAME=>$TABLE_NAME,CATEGORY=>$CATEGORY,apply_action=>$apply_action, PROGRAM_FILE_NAME=>$PROGRAM_FILE_NAME, parameters_ref=>\%parameters, input_types_ref=>\%input_types, ); #### Display the form action buttons $sbeams->display_form_buttons(TABLE_NAME=>$TABLE_NAME); #### Finish the upper part of the page and go begin the full-width #### data portion of the page $sbeams->display_page_footer(close_tables=>'YES', separator_bar=>'YES',display_footer=>'NO'); ######################################################################### #### Process all the constraints #### Build SEARCH BATCH / EXPERIMENT constraint my $search_batch_clause = $sbeams->parseConstraint2SQL( constraint_column=>"SB.search_batch_id", constraint_type=>"int_list", constraint_name=>"Search Batch List", constraint_value=>$parameters{search_batch_id} ); return if ($search_batch_clause eq '-1'); unless (defined($parameters{search_batch_id}) && $parameters{search_batch_id}) { print "

You must select at least one experiment to browse!

\n\n"; return; } #### Build PROTEIN GROUP PROBABILITY constraint my $protein_group_probability_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PG.probability", constraint_type=>"flexible_float", constraint_name=>"Probability", constraint_value=>$parameters{protein_group_probability_constraint} ); return if ($protein_group_probability_clause eq '-1'); #### Build PROBABILITY constraint my $protein_probability_clause = $sbeams->parseConstraint2SQL( constraint_column=>"PROT.probability", constraint_type=>"flexible_float", constraint_name=>"Probability", constraint_value=>$parameters{protein_probability_constraint} ); return if ($protein_probability_clause eq '-1'); #### Build REFERENCE PROTEIN constraint my $biosequence_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"Reference", constraint_value=>$parameters{reference_constraint} ); return if ($biosequence_name_clause eq '-1'); #### Build GENE NAME constraint my $gene_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_gene_name", constraint_type=>"plain_text", constraint_name=>"Gene Name", constraint_value=>$parameters{gene_name_constraint} ); return if ($gene_name_clause eq '-1'); #### Build PROTEIN DESCRIPTION constraint my $description_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_desc", constraint_type=>"plain_text", constraint_name=>"Protein Description", constraint_value=>$parameters{description_constraint} ); return if ($description_clause eq '-1'); #### Build MOLECULAR FUNCTION constraint my $molecular_function_clause = $sbeams->parseConstraint2SQL( constraint_column=>"MFA.annotation", constraint_type=>"plain_text", constraint_name=>"Molecular Function", constraint_value=>$parameters{molecular_function_constraint} ); return if ($molecular_function_clause eq '-1'); #### Build BIOLOGICAL PROCESS constraint my $biological_process_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BPA.annotation", constraint_type=>"plain_text", constraint_name=>"Biological Process", constraint_value=>$parameters{biological_process_constraint} ); return if ($biological_process_clause eq '-1'); #### Build CELLULAR COMPONENT constraint my $cellular_component_clause = $sbeams->parseConstraint2SQL( constraint_column=>"CCA.annotation", constraint_type=>"plain_text", constraint_name=>"Cellular Component", constraint_value=>$parameters{cellular_component_constraint} ); return if ($cellular_component_clause eq '-1'); #### Build INTERPRO PROTEIN DOMAIN constraint my $protein_domain_clause = $sbeams->parseConstraint2SQL( constraint_column=>"IPDA.annotation", constraint_type=>"plain_text", constraint_name=>"InterPro Protein Domain", constraint_value=>$parameters{protein_domain_constraint} ); return if ($protein_domain_clause eq '-1'); #### Build FAVORED CODON FREQUENCY constraint my $fav_codon_frequency_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BPS.fav_codon_frequency", constraint_type=>"flexible_float", constraint_name=>"Favored Codon Frequency", constraint_value=>$parameters{fav_codon_frequency_constraint} ); return if ($fav_codon_frequency_clause eq '-1'); #### Build TRANSMEMBRANE CLASS constraint my $transmembrane_class_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BPS.transmembrane_class", constraint_type=>"text_list", constraint_name=>"Transmembrane Class", constraint_value=>$parameters{transmembrane_class_constraint} ); return if ($transmembrane_class_clause eq '-1'); #### Build NUMBER OF TRANSMEMBRANE REGIONS constraint my $n_transmembrane_regions_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BPS.n_transmembrane_regions", constraint_type=>"flexible_int", constraint_name=>"Number of Transmembrane regions", constraint_value=>$parameters{n_transmembrane_regions_constraint} ); return if ($n_transmembrane_regions_clause eq '-1'); #### Build PROTEIN LENGTH constraint my $protein_length_clause = $sbeams->parseConstraint2SQL( constraint_column=>"DATALENGTH(BS.biosequence_seq)", constraint_type=>"flexible_int", constraint_name=>"Protein Length", constraint_value=>$parameters{protein_length_constraint} ); return if ($protein_length_clause eq '-1'); #### Build ACCESSION constraint my $accession_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_accession", constraint_type=>"plain_text", constraint_name=>"Accession", constraint_value=>$parameters{accession_constraint} ); return if ($accession_clause eq '-1'); #### Build NUMBER OF ANNOTATIONS constraint my $n_annotations_clause = $sbeams->parseConstraint2SQL( constraint_column=>"row_count", constraint_type=>"flexible_int", constraint_name=>"Number of Matches", constraint_value=>$parameters{n_annotations_constraint} ); return if ($n_annotations_clause eq '-1'); #### Build QUANTITATION constraint my $quantitation_clause = $sbeams->parseConstraint2SQL( constraint_column=>"SQ.ratio_mean", constraint_type=>"flexible_float", constraint_name=>"Quantitation Constraint", constraint_value=>$parameters{quantitation_constraint} ); return if ($quantitation_clause eq '-1'); #### Build GENE ANNOTATION LEVEL constraint $parameters{gene_annotation_level_constraint} = 'leaf' unless ($parameters{gene_annotation_level_constraint}); my $gene_annotation_level_clause = $sbeams->parseConstraint2SQL( constraint_column=>"hierarchy_level", constraint_type=>"plain_text", constraint_name=>"Gene Annotation Level Constraint", constraint_value=>$parameters{gene_annotation_level_constraint} ); return if ($gene_annotation_level_clause eq '-1'); #### Build SORT ORDER my $order_by_clause = ""; if ($parameters{sort_order}) { if ($parameters{sort_order} =~ /SELECT|TRUNCATE|DROP|DELETE|FROM|GRANT/i) { print "

Cannot parse Sort Order! Check syntax.

\n\n"; return; } else { $order_by_clause = " ORDER BY $parameters{sort_order}"; } } #### Build ROWCOUNT constraint #### FIXME: This is fundamentally broken because of the changing of the #### number of rows in the resultset $parameters{row_limit} = 50000 unless ($parameters{row_limit} > 0 && $parameters{row_limit}<=1000000); my $limit_clause = $sbeams->buildLimitClause( row_limit=>$parameters{row_limit}); #### Define some variables needed to build the query my $group_by_clause = ""; my $final_group_by_clause = ""; my @column_array; my $peptide_column = ""; my $count_column = ""; #### If the user opted to see the GO columns, add them in my @additional_columns = (); if ( $parameters{display_options} =~ /ShowGOColumns/ || $molecular_function_clause.$biological_process_clause. $cellular_component_clause.$protein_domain_clause ) { @additional_columns = ( ["molecular_function","MFA.annotation","Molecular Function"], ["molecular_function_GO","MFA.external_accession","molecular_function_GO"], ["biological_process","BPA.annotation","Biological Process"], ["biological_process_GO","BPA.external_accession","biological_process_GO"], ["cellular_component","CCA.annotation","Cellular Component"], ["cellular_component_GO","CCA.external_accession","cellular_component_GO"], ["interpro_protein_domain","IPDA.annotation","InterPro Protein Domain"], ["interpro_protein_domain_GO","IPDA.external_accession","interpro_protein_domain_GO"], ); } #### If the user opted to see GO columns or provided some GO constraints, #### then join in the GO tables my $GO_join = ""; if ( $parameters{display_options} =~ /ShowGOColumns/ || $molecular_function_clause.$biological_process_clause. $cellular_component_clause.$protein_domain_clause ) { $GO_join = qq~ LEFT JOIN $TBPR_BIOSEQUENCE_ANNOTATED_GENE AG ON ( BS.biosequence_id = AG.biosequence_id ) LEFT JOIN $TBBL_GENE_ANNOTATION MFA ON ( AG.annotated_gene_id = MFA.annotated_gene_id AND MFA.gene_annotation_type_id = 1 AND MFA.idx = 0 AND MFA.hierarchy_level = '$parameters{gene_annotation_level_constraint}' ) LEFT JOIN $TBBL_GENE_ANNOTATION BPA ON ( AG.annotated_gene_id = BPA.annotated_gene_id AND BPA.gene_annotation_type_id = 2 AND BPA.idx = 0 AND BPA.hierarchy_level = '$parameters{gene_annotation_level_constraint}' ) LEFT JOIN $TBBL_GENE_ANNOTATION CCA ON ( AG.annotated_gene_id = CCA.annotated_gene_id AND CCA.gene_annotation_type_id = 3 AND CCA.idx = 0 AND CCA.hierarchy_level = '$parameters{gene_annotation_level_constraint}' ) LEFT JOIN $TBBL_GENE_ANNOTATION IPDA ON ( AG.annotated_gene_id = IPDA.annotated_gene_id AND IPDA.gene_annotation_type_id = 4 AND IPDA.idx = 0 AND IPDA.hierarchy_level = '$parameters{gene_annotation_level_constraint}' ) ~; } #### Add in some extra columns if the user wants to see them if ( $parameters{display_options} =~ /ShowExtraProteinProps/ ) { @additional_columns = ( ["fav_codon_frequency","STR(BPS.fav_codon_frequency,10,3)","Favored Codon Frequency"], ["transmembrane_class","BPS.transmembrane_class","Transmembrane Regions Class"], ["n_transmembrane_regions","BPS.n_transmembrane_regions","Number of Transmembrane Regions"], ["protein_length","DATALENGTH(BS.biosequence_seq)","Protein Length"], @additional_columns, ); } #### Add in the bait protein column if the user wants to see them my @bait_columns = (); if ( $parameters{display_options} =~ /ShowBaitProteins/ ) { @bait_columns = ( ["pulldown_protein_name","PE.pulldown_protein_name","Pulldown Protein"], ); } #### Define the desired columns in the query #### [friendly name used in url_cols,SQL,displayed column title] #### If the user wants to see the peptides my @peptide_columns = (); my $peptide_join = ''; if ( $parameters{display_options} =~ /ShowPeptides/ ) { @peptide_columns = ( ["peptide_sequence","PEP.peptide_sequence","Peptide"], ["peptideatlas_peptide_link","NULL","PA Pep"], ["charge","PEP.charge","Chg"], ["peptide_probability","STR(PEP.nsp_adjusted_probability,7,3)","Peptide Prob"], ["peptide_n_instances","PEP.n_instances","Peptide N Instances"], ); $order_by_clause = ",PEP.nsp_adjusted_probability,PEP.peptide_sequence,PEP.charge"; $peptide_join = qq~ INNER JOIN $TBPR_PROTEOMICS_PEPTIDE PEP ON ( PROT.protein_id = PEP.protein_id ) ~; } if (1) { @column_array = ( ["experiment_tag","experiment_tag","Exp"], ["search_batch_subdir","search_batch_subdir","DB"], ["search_batch_id","SB.search_batch_id","search_batch_id"], @bait_columns, ["group_number","PG.group_number","Group #"], ["pseudo_name","PG.pseudo_name","Group Name"], ["group_probability","STR(PG.probability,7,3)","Group Prob"], ["biosequence_name","BS.biosequence_name","Protein Name"], ["accessor","DBX.accessor","accessor"], ["accessor_suffix","DBX.accessor_suffix","accessor_suffix"], ["biosequence_accession","BS.biosequence_accession","Accession"], ["peptideatlas_protein_link","NULL","PA Prot"], ["canonical_name","BSCAN.biosequence_name","Canonical Name"], ["protein_probability","STR(PROT.probability,7,3)","Protein Prob"], @peptide_columns, ["protein_n_peptides","PROT.n_peptides","Total Protein Peptides"], ["protein_quant_mean","STR(SQ.ratio_mean,7,3)","Protein Quant Average"], ["protein_quant_uncert","STR(SQ.ratio_standard_dev,7,3)","Protein Quant Uncertainty"], ["protein_n_quant","SQ.ratio_number_peptides","Protein Quant N"], @additional_columns, ["biosequence_desc","BS.biosequence_desc","Reference Description"], ["organism","O.full_name","Organism"], ); $peptide_column = ""; $count_column = ""; $order_by_clause = "ORDER BY PG.group_number,PROT.probability DESC,BS.biosequence_name$order_by_clause" } #### Limit the width of the Reference column if user selected if ( $parameters{display_options} =~ /MaxRefWidth/ ) { $max_widths{'Reference'} = 20; } #### Set flag to display SQL statement if user selected if ( $parameters{display_options} =~ /ShowSQL/ ) { $show_sql = 1; } #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); #### Define the SQL statement my $BIOLINK = $DBPREFIX{BioLink}; $sql = qq~ SELECT $limit_clause->{top_clause} $columns_clause FROM $TBPR_PROTEIN PROT INNER JOIN $TBPR_PROTEIN_GROUP PG ON ( PROT.protein_group_id = PG.protein_group_id ) LEFT JOIN $TBPR_SUMMARY_QUANTITATION SQ ON ( PROT.summary_quantitation_id = SQ.summary_quantitation_id ) INNER JOIN $TBPR_BIOSEQUENCE BS ON ( PROT.biosequence_id = BS.biosequence_id ) INNER JOIN $TBPR_BIOSEQUENCE_SET BSS ON ( BS.biosequence_set_id = BSS.biosequence_set_id ) INNER JOIN $TB_ORGANISM O ON ( BSS.organism_id = O.organism_id ) LEFT JOIN $TB_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) INNER JOIN $TBPR_PROTEIN_SUMMARY PS ON ( PG.protein_summary_id = PS.protein_summary_id ) INNER JOIN $TBPR_SEARCH_BATCH_PROTEIN_SUMMARY SBPS ON ( PS.protein_summary_id = SBPS.protein_summary_id ) INNER JOIN $TBPR_SEARCH_BATCH SB ON ( SBPS.search_batch_id = SB.search_batch_id ) INNER JOIN $TBPR_PROTEOMICS_EXPERIMENT PE ON ( SB.experiment_id = PE.experiment_id ) LEFT JOIN $TBPR_BIOSEQUENCE_PROPERTY_SET BPS ON ( BS.biosequence_id = BPS.biosequence_id ) $peptide_join LEFT JOIN ${BIOLINK}biosequence BSLINK ON ( BS.biosequence_gene_name = BSLINK.biosequence_name ) LEFT JOIN ${BIOLINK}relationship R ON ( BSLINK.biosequence_id = R.biosequence2_id AND R.relationship_type_id = 1) LEFT JOIN ${BIOLINK}biosequence BSCAN ON ( R.biosequence1_id = BSCAN.biosequence_id ) $GO_join WHERE 1 = 1 $search_batch_clause $biosequence_name_clause $description_clause $gene_name_clause $accession_clause $molecular_function_clause $biological_process_clause $cellular_component_clause $protein_domain_clause $transmembrane_class_clause $n_transmembrane_regions_clause $fav_codon_frequency_clause $protein_length_clause $protein_group_probability_clause $protein_probability_clause $quantitation_clause $group_by_clause $order_by_clause $limit_clause->{trailing_limit_clause} ~; #### Ad-Hoc, and sort of dangerous way of reversing the sense of the #### quantitation ratio if ( $parameters{quantitation_values} eq "d0d8" ) { #### This is the default #### Swap the numerator and denominator } elsif ( $parameters{quantitation_values} eq "d8d0" ) { $sql =~ s/d0\_/dQQQ_/g; $sql =~ s/d8\_/d0_/g; $sql =~ s/dQQQ\_/d8_/g; } #### Certain types of actions should be passed to links my $pass_action = "QUERY"; $pass_action = $apply_action if ($apply_action =~ /QUERY/i); #### Define columns that should be hidden in the output table %hidden_cols = ('accessor' => 1, 'accessor_suffix' => 1, 'search_batch_id' => 1, 'molecular_function_GO' => 1, 'biological_process_GO' => 1, 'cellular_component_GO' => 1, 'interpro_protein_domain_GO' => 1, 'Organism' => 1, ); #### If there are titles from the resultset, use those if (@resultset_column_titles) { @column_titles = @resultset_column_titles; } ######################################################################### #### If QUERY or VIEWRESULTSET was selected, display the data if ($apply_action =~ /QUERY/i || $apply_action eq "VIEWRESULTSET") { #### If the action contained QUERY, then fetch the results from #### the database if ($apply_action =~ /QUERY/i) { #### Show the SQL that will be or was executed $sbeams->display_sql(sql=>$sql) if ($show_sql); #### Fetch the results from the database server $sbeams->fetchResultSet( sql_query=>$sql, resultset_ref=>$resultset_ref, ); #### Post process the resultset if ($parameters{display_options} =~ /pivot/i) { postProcessResultset( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, ); } #### Store the resultset and parameters to disk resultset cache $rs_params{set_name} = "SETME"; $sbeams->writeResultSet( resultset_file_ref=>\$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, query_name=>"$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME", column_titles_ref=>\@column_titles, ); } #### Debugging if (1 == 0) { for (my $itmp = 0; $itmp < scalar(@{$resultset_ref->{types_list_ref}}); $itmp++) { print "column $itmp: ".$resultset_ref->{column_list_ref}->[$itmp]. " (type ".$resultset_ref->{types_list_ref}->[$itmp].")
\n"; } } #### Define the hypertext links for columns that need them %url_cols = getURLColumns( resultset_ref => $resultset_ref, query_parameters_ref => \%parameters, input_types_ref => \%input_types, pass_action => $pass_action, ); #### Create Cytoscape format files if selected my $cytoscape = { template => 'BrowseProteinSummary' }; if ($sbeams->output_mode() eq 'cytoscape') { prepareCytoscapeFiles( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, url_cols_ref=>\%url_cols, column_titles_ref=>\@column_titles, hidden_cols_ref=>\%hidden_cols, cytoscape=>$cytoscape, ); } #### Display the resultset $sbeams->displayResultSet( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, url_cols_ref=>\%url_cols, hidden_cols_ref=>\%hidden_cols, max_widths=>\%max_widths, column_titles_ref=>\@column_titles, base_url=>$base_url, cytoscape=>$cytoscape, ); #### Display the resultset controls $sbeams->displayResultSetControls( resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, rs_params_ref=>\%rs_params, base_url=>$base_url, cytoscape=>$cytoscape, ); #### Display a plot of data from the resultset $sbeams->displayResultSetPlot( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, base_url=>$base_url, ); #### Create a Gaggle-compatible embedded microformat print $sbeams->createGaggleMicroformat( rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, column_titles_ref=>\@column_titles, base_url=>$base_url, ); #### If QUERY was not selected, then tell the user to enter some parameters } else { if ($sbeams->invocation_mode() eq 'http') { print "

Select parameters above and press QUERY

\n"; } else { print "You need to supply some parameters to contrain the query\n"; } } } # end handle_request ############################################################################### # evalSQL # # Callback for translating Perl variables into their values, # especially the global table variables to table names ############################################################################### sub evalSQL { my $sql = shift; return eval "\"$sql\""; } # end evalSQL ############################################################################### # getExperimentNames: return a hash of the experiment (and possibly # search_batch) names of the supplied list of id's ############################################################################### sub getExperimentNames { my $search_batch_ids = shift || die "getExperimentNames: missing search_batch_ids"; #### Split into an array my @search_batch_ids = split(/,/,$search_batch_ids); #### Get the data for all the specified search_batch_ids my $sql = qq~ SELECT search_batch_id,experiment_tag,set_tag FROM $TBPR_PROTEOMICS_EXPERIMENT PE INNER JOIN $TBPR_SEARCH_BATCH SB ON ( PE.experiment_id = SB.experiment_id ) INNER JOIN $TBPR_BIOSEQUENCE_SET BSS ON ( SB.biosequence_set_id = BSS.biosequence_set_id ) WHERE search_batch_id IN ( $search_batch_ids ) ~; my @rows = $sbeams->selectSeveralColumns($sql); #### Define some variables my $row; my %exp_tag_hash; #### Contains just the experment tags my %exp_search_tag_hash; #### Contains both exp and search tags my %unique_tags_hash; #### Contains all exp tags in hash my $need_search_tags = 0; #### Set if we need to do search tags #### Go ahead and build all the hashes. The idea here is that if all #### the selected search_batch_ids correspond to different experiments #### (common) then we just want to display the experiment names. But, #### if two search_batch_id's correspond to two different search batches #### for the same experiment, then we need to display both experiment #### names and search library tags foreach $row (@rows) { my $search_batch_id = $row->[0]; my $experiment_tag = $row->[1]; my $set_tag = $row->[2]; $exp_tag_hash{$search_batch_id} = $experiment_tag; $exp_search_tag_hash{$search_batch_id} = "$experiment_tag($set_tag)"; if (exists($unique_tags_hash{$experiment_tag})) { $need_search_tags = 1; } $unique_tags_hash{$experiment_tag} = 1; } return %exp_search_tag_hash if ($need_search_tags); return %exp_tag_hash; } # end getExperimentNames ############################################################################### # postProcessResultset # # Perform some additional processing on the resultset that would otherwise # be very awkward to do in SQL. ############################################################################### sub postProcessResultset { my %args = @_; my ($i,$element,$key,$value,$line,$result,$sql); #### Process the arguments list my $resultset_ref = $args{'resultset_ref'}; my $rs_params_ref = $args{'rs_params_ref'}; my $query_parameters_ref = $args{'query_parameters_ref'}; my $column_titles_ref = $args{'column_titles_ref'}; my %rs_params = %{$rs_params_ref}; my %parameters = %{$query_parameters_ref}; #### Get a list of conditions to work with my %conditions = getExperimentNames($parameters{search_batch_id}); my @conditions = sort(values(%conditions)); #### Add a combined pseudo-condition push(@conditions,'combined'); #### Get some column number indexes my $search_batch_id_column_index = $resultset_ref->{column_hash_ref}->{search_batch_id}; my $biosequence_name_column_index = $resultset_ref->{column_hash_ref}->{biosequence_name}; my %col = %{$resultset_ref->{column_hash_ref}}; #### Set a flag for peptide processing my $have_peptides = 0; $have_peptides = 1 if (exists($col{peptide_sequence})); #### Loop over each row in the resultset, coalescing into a data hash my %data; my $n_rows = scalar(@{$resultset_ref->{data_ref}}); for (my $row=0;$row<$n_rows-1; $row++) { #### Extract a pointer to the row my $rowdata = $resultset_ref->{data_ref}->[$row]; #### Determine the search_batch_id and biosequence_name my $search_batch_id = $rowdata->[$col{search_batch_id}]; my $biosequence_name = $rowdata->[$col{biosequence_name}]; my $peptide_sequence = $rowdata->[$col{peptide_sequence} || 9999]; my $charge = $rowdata->[$col{charge} || 9999]; #my $key = substr(' ',0,10-length($group_number)).$group_number; my $key = $biosequence_name; $key .= $peptide_sequence.$charge if ($peptide_sequence); #### Store it in the new hash $data{$key}->{$conditions{$search_batch_id}} = $resultset_ref->{data_ref}->[$row]; } #### Define the columns that are to be in the new resultset #### First the initial columns before the data points my @prefix_columns = qw(biosequence_name accessor accessor_suffix search_batch_id biosequence_accession canonical_name); push(@prefix_columns,qw(peptide_sequence charge)) if ($have_peptides); my @prefix_column_types = qw(varchar varchar varchar int varchar varchar); push(@prefix_column_types,qw(varchar int)) if ($have_peptides); #### Then the data points my @data_columns = qw(protein_probability protein_n_peptides protein_quant_mean protein_quant_uncert protein_n_quant); push(@data_columns,qw(peptide_probability peptide_n_instances)) if ($have_peptides); my @data_column_types = qw(real int real real int); push(@data_column_types,qw(real int)) if ($have_peptides); if ($resultset_ref->{column_hash_ref}->{pulldown_protein_name}) { unshift(@data_columns,'pulldown_protein_name'); unshift(@data_column_types,'varchar'); } #### Finally the suffix columns after the data points my @suffix_columns = qw(fav_codon_frequency transmembrane_class n_transmembrane_regions protein_length molecular_function molecular_function_GO biological_process biological_process_GO cellular_component cellular_component_GO interpro_protein_domain interpro_protein_domain_GO biosequence_desc organism ); my @suffix_column_types = qw(real varchar int int varchar varchar varchar varchar varchar varchar varchar varchar varchar varchar ); my @new_column_names; my @new_column_titles; my @new_precisions; my @new_types; #### Push the pre-data columns onto the row array for (my $ielement=0; $ielement{column_hash_ref}->{$element}; if (defined($column_index)) { push(@new_column_names,$element); push(@new_column_titles,$column_titles_ref->[$column_index]); push(@new_precisions,$resultset_ref->{precisions_list_ref}-> [$column_index]); push(@new_types,$prefix_column_types[$ielement]); #print "$element ($column_titles_ref->[$column_index]) prec=". #$resultset_ref->{precisions_list_ref}->[$column_index]."
\n"; } } #### Push the data columns onto the array for (my $ielement=0; $ielement{column_hash_ref}->{$data_column}; push(@new_column_names,$condition.'__'.$data_column); push(@new_column_titles,$condition.' '.$column_titles_ref-> [$column_index]); push(@new_precisions,$resultset_ref->{precisions_list_ref}-> [$column_index]); push(@new_types,$data_column_types[$ielement]); #print $condition.'__'.$data_column." (".$condition.' '. #$column_titles_ref->[$column_index].") prec=$resultset_ref->". #"{precisions_list_ref}->[$column_index]
\n"; } } #### Push the post-data columns onto the row array for (my $ielement=0; $ielement{column_hash_ref}->{$element}; if (defined($column_index)) { push(@new_column_names,$element); push(@new_column_titles,$column_titles_ref->[$column_index]); push(@new_precisions,$resultset_ref->{precisions_list_ref}-> [$column_index]); push(@new_types,$suffix_column_types[$ielement]); #print "$element ($column_titles_ref->[$column_index]) prec=". #$resultset_ref->{precisions_list_ref}->[$column_index]."
\n"; } } #### Loop over each row_key and print the pivoted summary my @new_data_array; foreach my $row_key (sort keys(%data)) { my @row; my %summaries; my ($first_condition) = keys(%{$data{$row_key}}); #### Push the pre-data columns onto the row array foreach my $element (@prefix_columns) { my $column_index = $resultset_ref->{column_hash_ref}->{$element}; if (defined($column_index)) { push(@row,$data{$row_key}->{$first_condition}->[$column_index]); } } #### Push the data columns onto the array foreach my $data_column (@data_columns) { foreach my $condition (@conditions) { if ($condition eq 'combined') { my $combined_value = ''; #### Sum the total number of peptides if ( $data_column =~ /(protein_n_peptides|peptide_n_instances)/) { $combined_value = 0; foreach my $el (@{$summaries{$data_column}}) { $combined_value += $el; } } #### Sum the total number of quantitated peptides if ($data_column eq 'protein_n_quant') { $combined_value = 0; foreach my $el (@{$summaries{$data_column}}) { $combined_value += $el; } } #### Combine the probabilities if ($data_column eq 'protein_probability') { #### Convert the probabilities to 1 - P my @values = (); foreach my $el (@{$summaries{$data_column}}) { push(@values,1-$el) if (defined($el)); } my $n_values = scalar(@values); if ($n_values == 0) { $combined_value = undef; } elsif ($n_values == 1) { $combined_value = sprintf('%.3f',1 - $values[0]); } else { my $product = $values[0]; for (my $i=1; $i<$n_values; $i++) { $product *= $values[$i]; } $combined_value = sprintf('%.3f',1 - $product); } } #### Combine the average quantitation if ( $data_column =~ /(protein_quant_mean|protein_quant_uncert)/) { if ($data_column eq 'protein_quant_mean') { #### Store pointer to what will be updated later $summaries{protein_quant_mean_pointer} = scalar(@row); } elsif ($data_column eq 'protein_quant_uncert' && defined($summaries{protein_quant_mean}) && defined($summaries{protein_quant_uncert}) ) { my ($mean,$uncertainty) = $sbeams->average( values => $summaries{protein_quant_mean}, uncertainties => $summaries{protein_quant_uncert}, ); $combined_value = sprintf('%.3f',$uncertainty); if (defined($summaries{protein_quant_mean_pointer})) { $row[$summaries{protein_quant_mean_pointer}] = sprintf('%.3f',$mean); } } } push(@row,$combined_value); } else { my $column_index = $resultset_ref->{column_hash_ref}->{$data_column}; my $this_value = $data{$row_key}->{$condition}->[$column_index]; if (defined($this_value)) { push(@row,$this_value); unless (defined($summaries{$data_column})) { my @tmp = (); $summaries{$column_index} = \@tmp; } push(@{$summaries{$data_column}},$this_value); } else { push(@row,''); } } } } #### Push the post-data columns onto the row array foreach my $element (@suffix_columns) { my $column_index = $resultset_ref->{column_hash_ref}->{$element}; if (defined($column_index)) { push(@row,$data{$row_key}->{$first_condition}->[$column_index]); } } push(@new_data_array,\@row); } $resultset_ref->{data_ref} = \@new_data_array; $resultset_ref->{column_list_ref} = \@new_column_names; my $n_new_columns = scalar(@new_column_names); $resultset_ref->{precisions_list_ref} = \@new_precisions; $resultset_ref->{types_list_ref} = \@new_types; @{$column_titles_ref} = @new_column_titles; return 1; } # end postProcessResult ############################################################################### # getURLColumns # # Define the URL columns based on the latest column_list ############################################################################### sub getURLColumns { my %args = @_; #### Process the arguments list my $resultset_ref = $args{'resultset_ref'}; my $query_parameters_ref = $args{'query_parameters_ref'}; my $input_types_ref = $args{'input_types_ref'}; my $pass_action = $args{'pass_action'}; my %parameters = %{$query_parameters_ref}; my %input_types = %{$input_types_ref}; my %url_cols; #### Pass nearly all of the constraints down to a child query my @parameters_to_pass; my $parameters_list = ''; while ( my ($key,$value) = each %input_types ) { if ($key ne 'sort_order' && $key ne 'display_options' && $key ne 'reference_constraint') { if ($parameters{$key}) { push(@parameters_to_pass,"$key=$parameters{$key}"); } } } if (@parameters_to_pass) { $parameters_list = join('&',@parameters_to_pass); } #### Define the a new colnameidx based on the latest column names my $column_list_ref = $resultset_ref->{column_list_ref}; my $i = 0; my %colnameidx; foreach my $column_name (@{$column_list_ref}) { $colnameidx{$column_name} = $i; $i++; } #### Define the hypertext links for columns that need them %url_cols = ('Accession' => "\%$colnameidx{accessor}V\%$colnameidx{biosequence_accession}V\%$colnameidx{accessor_suffix}V", 'Accession_ATAG' => 'TARGET="Win1" ONMOUSEOVER="window.status=\'Show more information about this protein in source database\'; return true"', 'Reference' => "$CGI_BASE_DIR/Proteomics/GetSearchHits?QUERY_NAME=PR_GetSearchHits&search_batch_id=$parameters{search_batch_id}&reference_constraint=\%$colnameidx{reference}V&display_options=BSDesc,MaxRefWidth&apply_action=$pass_action", 'Reference_ATAG' => 'TARGET="Win2" ONMOUSEOVER="window.status=\'Show all occurrences of this protein in these experiments\'; return true"', 'Peptide' => "$CGI_BASE_DIR/Proteomics/GetSearchHits?QUERY_NAME=PR_GetSearchHits&search_batch_id=$parameters{search_batch_id}&peptide_constraint=\%$colnameidx{peptide}V&display_options=BSDesc,MaxRefWidth&apply_action=$pass_action", 'Peptide_ATAG' => 'TARGET="Win2" ONMOUSEOVER="window.status=\'Show all occurrences of this peptide in these experiments\'; return true"', 'Peptide String' => "$CGI_BASE_DIR/Proteomics/GetSearchHits?QUERY_NAME=PR_GetSearchHits&search_batch_id=$parameters{search_batch_id}&peptide_string_constraint=\%$colnameidx{peptide_string}V&display_options=BSDesc,MaxRefWidth&apply_action=$pass_action", 'Peptide String_ATAG' => 'TARGET="Win2" ONMOUSEOVER="window.status=\'Show all occurrences of this exact peptide instance in these experiments\'; return true"', 'Count' => "$CGI_BASE_DIR/Proteomics/GetSearchHits?QUERY_NAME=PR_GetSearchHits&search_batch_id=$parameters{search_batch_id}&reference_constraint=\%$colnameidx{reference}V&peptide_constraint=\%$colnameidx{peptide}V&peptide_string_constraint=\%$colnameidx{peptide_string}V&${parameters_list}&display_options=BSDesc,MaxRefWidth&apply_action=$pass_action", 'Count_ATAG' => 'TARGET="Win2" ONMOUSEOVER="window.status=\'Show just this many that match all above criteria\'; return true"', 'Molecular Function' => "http://www.ebi.ac.uk/ego/QuickGO?mode=display&entry=\%$colnameidx{molecular_function_GO}V", 'Molecular Function_ATAG' => 'TARGET="WinExt"', 'Molecular Function_OPTIONS' => {semicolon_separated_list=>1}, 'Biological Process' => "http://www.ebi.ac.uk/ego/QuickGO?mode=display&entry=\%$colnameidx{biological_process_GO}V", 'Biological Process_ATAG' => 'TARGET="WinExt"', 'Biological Process_OPTIONS' => {semicolon_separated_list=>1}, 'Cellular Component' => "http://www.ebi.ac.uk/ego/QuickGO?mode=display&entry=\%$colnameidx{cellular_component_GO}V", 'Cellular Component_ATAG' => 'TARGET="WinExt"', 'Cellular Component_OPTIONS' => {semicolon_separated_list=>1}, 'InterPro Protein Domain' => "http://www.ebi.ac.uk/interpro/IEntry?ac=\%$colnameidx{interpro_protein_domain_GO}V", 'InterPro Protein Domain_ATAG' => 'TARGET="WinExt"', 'InterPro Protein Domain_OPTIONS' => {semicolon_separated_list=>1}, 'PA Prot' => "https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/Search?search_key=\%$colnameidx{biosequence_accession}V&action=GO", 'PA Prot_ATAG' => 'TARGET="WinPeptideAtlas"', 'PA Prot_ISNULL' => '', 'PA Prot_OPTIONS' => {embed_html=>1}, 'PA Pep' => "https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/Search?search_key=\%$colnameidx{peptide_sequence}V&action=GO", 'PA Pep_ATAG' => 'TARGET="WinPeptideAtlas"', 'PA Pep_ISNULL' => '', 'PA Pep_OPTIONS' => {embed_html=>1}, ); return %url_cols; } ############################################################################### # prepareCytoscapeFiles # # Prepares the required noa and sif files for this dataset ############################################################################### sub prepareCytoscapeFiles { my %args = @_; #### Process the arguments list my $resultset_ref = $args{'resultset_ref'}; my $rs_params_ref = $args{'rs_params_ref'}; my $query_parameters_ref = $args{'query_parameters_ref'}; my $url_cols_ref = $args{'url_cols_ref'}; my $hidden_cols_ref = $args{'hidden_cols_ref'}; my $column_titles_ref = $args{'column_titles_ref'}; my $cytoscape = $args{'cytoscape'}; my %rs_params = %{$rs_params_ref}; my %parameters = %{$query_parameters_ref}; #### Set up some data data structures to hold Cytoscape data my %cytoscape_file_header = ( 'network.sif' => undef, 'commonName.noa' => 'commonName', 'organism.noa' => 'species', 'webLookup.noa' => 'web lookup', ); while ( my ($filename,$header_name) = each %cytoscape_file_header ) { my @tmp = ( $header_name ); $cytoscape->{files}->{$filename} = \@tmp; } #### Also include all other columns my $all_columns = $resultset_ref->{column_list_ref}; my $i=0; foreach my $column ( @{$all_columns} ) { my $title = $column_titles_ref->[$i++]; next if (defined($hidden_cols_ref->{$title})); my @tmp = ( $title || $column ); $cytoscape->{files}->{"$column.noa"} = \@tmp; } my $nrows = scalar(@{$resultset_ref->{data_ref}}); my $col = $resultset_ref->{column_hash_ref}; foreach my $row ( @{$resultset_ref->{data_ref}} ) { #### Determine the best canonical name. We must have one my $canonical_name = $row->[$col->{canonical_name}]; unless ($canonical_name) { $canonical_name = $row->[$col->{biosequence_name}]; } #### If there's a bait protein named, then use it to draw an edge if ($col->{pulldown_protein_name} && $row->[$col->{pulldown_protein_name}]) { push(@{$cytoscape->{files}->{'network.sif'}}, "$canonical_name pp $row->[$col->{pulldown_protein_name}]"); } #### Write an attributes file for organism my $organism = $row->[$col->{organism}]; push(@{$cytoscape->{files}->{'organism.noa'}}, "$canonical_name = $organism") if ($organism); #### Write an attributes file for the common name my $biosequence_name = $row->[$col->{biosequence_name}]; push(@{$cytoscape->{files}->{'commonName.noa'}}, "$canonical_name = $biosequence_name") if ($biosequence_name); #### We have no interaction information, so just list all proteins push(@{$cytoscape->{files}->{'network.sif'}}, "$canonical_name"); #### Create a protein web lookup attribute if (defined($col->{biosequence_name}) && defined($col->{accessor}) && defined($col->{accessor_suffix})) { push(@{$cytoscape->{files}->{'webLookup.noa'}}, "$canonical_name = $row->[$col->{accessor}]$biosequence_name$row->[$col->{accessor_suffix}]"); } $i=0; foreach my $column ( @{$all_columns} ) { my $title = $column_titles_ref->[$i++]; my $value = $row->[$col->{$column}]; next if (defined($hidden_cols_ref->{$title})); push(@{$cytoscape->{files}->{"$column.noa"}}, "$canonical_name = $value") if ($value); } } # end foreach my $row } # end prepareCytoscapeFiles