#!/usr/local/bin/perl ############################################################################### # Program : CompareBySpectrum # Author : Eric Deutsch # $Id$ # # Description : This program that allows users to # compare different searches of the same data by spectrum. # # SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Proteomics; use SBEAMS::Proteomics::Settings; use SBEAMS::Proteomics::Tables; $sbeams = new SBEAMS::Connection; $sbeamsMOD = new SBEAMS::Proteomics; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); #use CGI; #$q = new CGI; ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['Proteomics_user','Proteomics_admin', 'Proteomics_readonly'], #connect_read_only=>1, #allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); #### Decide what action to take based on information so far if (defined($parameters{action}) && $parameters{action} eq "???") { # Some action } else { $sbeamsMOD->display_page_header(); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer(); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Define some generic varibles my ($i,$element,$key,$value,$line,$result,$sql); #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action=$parameters{'action'} || $parameters{'apply_action'} || ''; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $CATEGORY="Compare By Spectrum"; $TABLE_NAME="PR_CompareBySpectrum" unless ($TABLE_NAME); ($PROGRAM_FILE_NAME) = $sbeamsMOD->returnTableInfo($TABLE_NAME,"PROGRAM_FILE_NAME"); my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; #### Get the columns and input types for this table/query my @columns = $sbeamsMOD->returnTableInfo($TABLE_NAME,"ordered_columns"); my %input_types = $sbeamsMOD->returnTableInfo($TABLE_NAME,"input_types"); #### Read the input parameters for each column my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters, columns_ref=>\@columns,input_types_ref=>\%input_types); #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams(q=>$q); if ($apply_action eq "VIEWRESULTSET") { $sbeams->readResultSet( resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref, query_parameters_ref=>\%parameters, resultset_params_ref=>\%rs_params, ); $n_params_found = 99; } #### Set some reasonable defaults if no parameters supplied unless ($n_params_found) { $parameters{input_form_format} = "minimum_detail"; } #### Apply any parameter adjustment logic #none #### Display the user-interaction input form $sbeams->display_input_form( TABLE_NAME=>$TABLE_NAME,CATEGORY=>$CATEGORY,apply_action=>$apply_action, PROGRAM_FILE_NAME=>$PROGRAM_FILE_NAME, parameters_ref=>\%parameters, input_types_ref=>\%input_types, ); #### Display the form action buttons $sbeams->display_form_buttons(TABLE_NAME=>$TABLE_NAME); #### Finish the upper part of the page and go begin the full-width #### data portion of the page $sbeams->display_page_footer(close_tables=>'YES', separator_bar=>'YES',display_footer=>'NO'); ######################################################################### #### Process all the constraints #### Build SEARCH BATCH / EXPERIMENT constraint my $search_batch_clause = $sbeams->parseConstraint2SQL( constraint_column=>"SB.search_batch_id", constraint_type=>"int_list", constraint_name=>"Search Batch List", constraint_value=>$parameters{search_batch_id} ); return if ($search_batch_clause eq '-1'); unless (defined($parameters{search_batch_id}) && $parameters{search_batch_id} =~ /\,/) { print "

You must select at least two experiments to compare!

\n\n"; return; } #### Build PROBABILITY constraint my $probability_clause = $sbeams->parseConstraint2SQL( constraint_column=>"SH.probability", constraint_type=>"flexible_float", constraint_name=>"Probability", constraint_value=>$parameters{probability_constraint} ); return if ($probability_clause eq '-1'); #### For much better performance, add in a hit_index constraint if #### there is a probability contraint. This could break if #### probabilities get assigned to rows with not hit_index = 1 !!!! #### The reason seems to be that the NONCLUSTERED INDEX on #### SH.probability is a lousy index that doesn't get used or #### something, likely because most values are NULL. $probability_clause .= " AND SH.hit_index = 1"; #### Build FILE_ROOT constraint my $file_root_clause = $sbeams->parseConstraint2SQL( constraint_column=>"S.file_root", constraint_type=>"plain_text", constraint_name=>"file_root", constraint_value=>$parameters{file_root_constraint} ); return if ($file_root_clause eq '-1'); #### Build CHARGE constraint my $charge_clause = $sbeams->parseConstraint2SQL( constraint_column=>"S.assumed_charge", constraint_type=>"int_list", constraint_name=>"Charge", constraint_value=>$parameters{charge_constraint} ); return if ($charge_clause eq '-1'); #### Build PRECURSOR MASS constraint my $precursor_mass_clause = $sbeams->parseConstraint2SQL( constraint_column=>"(S.sample_mass_plus_H+(S.assumed_charge-1)*1.008)/S.assumed_charge", constraint_type=>"flexible_float", constraint_name=>"Precursor_Mass Constraint", constraint_value=>$parameters{precursor_mass_constraint} ); return if ($precursor_mass_clause eq '-1'); #### Build PERCENT BUFFER B constraint my $percent_buffer_b_clause = $sbeams->parseConstraint2SQL( constraint_column=>"MSS.calc_buffer_percent", constraint_type=>"flexible_float", constraint_name=>"Percent ACN Constraint", constraint_value=>$parameters{percent_buffer_b_constraint} ); return if ($percent_buffer_b_clause eq '-1'); #### Get the proper names of all the biosequence_set_tags in the query my %experiment_names = getExperimentNames($parameters{search_batch_id}); if (defined($experiment_names{ERROR})) { print "

$experiment_names{ERROR}

\n"; return; } #### Build PROBABILITY IN INDIVIDUAL EXPERIMENTS constraint my @search_batch_ids = split(/,/,$parameters{search_batch_id}); my $specific_counts_clause = ''; my $i_exp = 1; foreach my $id (@search_batch_ids) { my $tmp_constraint = $sbeams->parseConstraint2SQL( constraint_column=>"$experiment_names{$id}->{cleaned}", constraint_type=>"flexible_float", constraint_name=>"Individual Probability Constraint", constraint_value=>$parameters{"probability_in_column_${i_exp}_constraint"} ); return if ($tmp_constraint eq '-1'); if ($tmp_constraint) { unless ($specific_counts_clause) { $tmp_constraint =~ s/\s*AND//; $tmp_constraint = "AND ( $tmp_constraint"; } $specific_counts_clause .= "$tmp_constraint\n"; } $i_exp++; } $specific_counts_clause .= ')' if ($specific_counts_clause); #### Build ROWCOUNT constraint $parameters{row_limit} = 5000 unless ($parameters{row_limit} > 0 && $parameters{row_limit}<=1000000); my $limit_clause = $sbeams->buildLimitClause( row_limit=>$parameters{row_limit}); #### Define some variables needed to build the query my $group_by_clause = ""; my $final_group_by_clause = ""; my @column_array; my $peptide_column = ""; my $count_column = ""; my @experiment_names_and_ids; #### If the user opted to see the GO columns, add them in my @additional_columns = (); my $additional_grouping_columns = ""; #### If the user opted to see GO columns or provided some GO constraints, #### then join in the GO tables my $GO_join = ""; #### Define the desired columns in the query #### [friendly name used in url_cols,SQL,displayed column title] @column_array = ( ["experiment_tag","experiment_tag","Exp"], ["file_root","file_root","File Root"], ); my @search_batch_ids = split(/,/,$parameters{search_batch_id}); my $pivot_clause = ''; my $pivot_clause2 = ''; my $pivot_clause3 = ''; my @tmp_columns2; my @tmp_columns3; #### Buffers to store sequence sameness constraint SQL my $sequence_sameness_clause = ''; my $previous_id; foreach my $id (@search_batch_ids) { push(@column_array, ["$experiment_names{$id}->{cleaned}", "STR($experiment_names{$id}->{cleaned},7,3)", "$experiment_names{$id}->{cleaned}"] ); push(@tmp_columns2, ["$experiment_names{$id}->{cleaned}__peptide", "$experiment_names{$id}->{cleaned}__peptide", "$experiment_names{$id}->{cleaned}__peptide"] ); push(@tmp_columns3, ["$experiment_names{$id}->{cleaned}__cross_corr", "$experiment_names{$id}->{cleaned}__cross_corr", "$experiment_names{$id}->{cleaned}__cross_corr"] ); push(@experiment_names_and_ids,$experiment_names{$id},$id); $pivot_clause .= " SUM(CASE WHEN search_batch_subdir = '$experiment_names{$id}->{original}' THEN NULLIF(probability,0.0) ELSE 0.0 END) AS \"$experiment_names{$id}->{cleaned}\",\n"; $pivot_clause2 .= " MAX(CASE WHEN search_batch_subdir = '$experiment_names{$id}->{original}' THEN peptide ELSE NULL END) AS \"$experiment_names{$id}->{cleaned}__peptide\",\n"; $pivot_clause3 .= " MAX(CASE WHEN search_batch_subdir = '$experiment_names{$id}->{original}' THEN cross_corr ELSE NULL END) AS \"$experiment_names{$id}->{cleaned}__cross_corr\",\n"; if ($parameters{display_options} =~ /SameSequence/) { if ($previous_id) { $sequence_sameness_clause .= " AND $experiment_names{$previous_id}->{cleaned}__peptide = $experiment_names{$id}->{cleaned}__peptide\n"; } } elsif ($parameters{display_options} =~ /DifferentSequence/) { if ($previous_id) { $sequence_sameness_clause .= " OR ISNULL($experiment_names{$previous_id}->{cleaned}__peptide,'') != ISNULL($experiment_names{$id}->{cleaned}__peptide,'')\n"; } } $previous_id = $id; } chop($pivot_clause3); chop($pivot_clause3); $pivot_clause3 .= "\n"; push(@column_array,@tmp_columns2,@tmp_columns3); if ($parameters{display_options} =~ /DifferentSequence/) { $sequence_sameness_clause =~ s/^ OR/ AND \(/; $sequence_sameness_clause .= " )\n"; } #### Limit the width of the Reference column if user selected if ( $parameters{display_options} =~ /MaxRefWidth/ ) { $max_widths{'Reference'} = 20; } #### Set flag to display SQL statement if user selected if ( $parameters{display_options} =~ /ShowSQL/ ) { $show_sql = 1; } #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); #### Define the SQL statement $sql = qq~ SELECT experiment_tag AS "experiment_tag", set_tag AS "set_tag", search_batch_subdir AS "search_batch_subdir", S.file_root AS "file_root", SH.probability AS "probability", reference AS "reference", peptide AS "peptide", cross_corr AS "cross_corr", SB.search_batch_id AS "search_batch_id", S.msms_spectrum_id AS "msms_spectrum_id" INTO #tmp1 FROM $TBPR_SEARCH_HIT SH INNER JOIN $TBPR_SEARCH S ON ( SH.search_id = S.search_id ) INNER JOIN $TBPR_SEARCH_BATCH SB ON ( S.search_batch_id = SB.search_batch_id ) INNER JOIN $TBPR_MSMS_SPECTRUM MSS ON ( S.msms_spectrum_id = MSS.msms_spectrum_id ) INNER JOIN $TBPR_FRACTION F ON ( MSS.fraction_id = F.fraction_id ) INNER JOIN $TBPR_BIOSEQUENCE_SET BSS ON ( SB.biosequence_set_id = BSS.biosequence_set_id ) INNER JOIN $TBPR_PROTEOMICS_EXPERIMENT PE ON ( F.experiment_id = PE.experiment_id ) WHERE 1 = 1 $search_batch_clause $probability_clause $file_root_clause $charge_clause $precursor_mass_clause --AND SH.cross_corr_rank = 1 Seems to slow things down? -- SELECT experiment_tag,file_root, $pivot_clause $pivot_clause2 $pivot_clause3 INTO #tmp2 FROM #tmp1 GROUP BY experiment_tag,file_root -- SELECT $limit_clause->{top_clause} $columns_clause FROM #tmp2 WHERE 1=1 $specific_counts_clause $sequence_sameness_clause ORDER BY 2 $limit_clause->{trailing_limit_clause} ~; #### Certain types of actions should be passed to links my $pass_action = "QUERY"; $pass_action = $apply_action if ($apply_action =~ /QUERY/i); #### Pass nearly all of the constraints down to a child query my @parameters_to_pass; my $parameters_list = ''; while ( ($key,$value) = each %input_types ) { if ($key ne 'sort_order' && $key ne 'display_options' && $key ne 'search_batch_id' && $key ne 'reference_constraint') { if ($parameters{$key}) { push(@parameters_to_pass,"$key=$parameters{$key}"); } } } if (@parameters_to_pass) { $parameters_list = join('&',@parameters_to_pass); } #### Define the hypertext links for columns that need them %url_cols = ('File Root' => "$CGI_BASE_DIR/Proteomics/GetSearchHits?QUERY_NAME=PR_GetSearchHits&file_root_constraint=\%$colnameidx{file_root}V&search_batch_id=$parameters{search_batch_id}&sort_order=experiment_tag,set_tag,S.file_root,SH.cross_corr_rank,SH.hit_index&display_options=BSDesc,MaxRefWidth&apply_action=$pass_action", 'File Root_ATAG' => 'TARGET="Win2" ONMOUSEOVER="window.status=\'Show the SEQUEST results for this spectrum for all selected search batches\'; return true"', ); #### Add entries for each experiment for ($i=0;$i