#!/usr/local/bin/perl ############################################################################### # Program : GetAnnotations # Author : Eric Deutsch # $Id$ # # Description : This CGI program that allows users to # browse through annotated proteins very simply # # SBEAMS is Copyright (C) 2000-2005 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::ProteinStructure; use SBEAMS::ProteinStructure::Settings; use SBEAMS::ProteinStructure::Tables; $sbeams = new SBEAMS::Connection; $sbeamsMOD = new SBEAMS::ProteinStructure; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); #use CGI; use CGI::Carp qw(fatalsToBrowser croak); #$q = new CGI; ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['ProteinStructure_user', 'ProteinStructure_admin','ProteinStructure_readonly','Admin'], #connect_read_only=>1, #allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); #### Decide what action to take based on information so far if ($parameters{action} eq "xxxx") { } else { $sbeamsMOD->display_page_header(); # navigation_bar=>$parameters{navigation_bar}); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer(); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Define some generic varibles my ($i,$element,$key,$value,$line,$result,$sql); #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $PROGRAM_FILE_NAME="GetAnnotations"; my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; #### Get the columns and input types for this table/query my @columns = ( 'search_scope','search_key' ); my %input_types = ( 'optionlist','text' ); #### Read the input parameters for each column my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters, columns_ref=>\@columns,input_types_ref=>\%input_types); #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams(q=>$q); if ($apply_action eq "VIEWRESULTSET") { $sbeams->readResultSet(resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters); $n_params_found = 99; } #### Set some reasonable defaults if no parameters supplied unless ($n_params_found) { } #### Apply any parameter adjustment logic #$parameters{display_options} = 'ShowSQL'; #### Display the user-interaction input form if ($sbeams->output_mode() eq 'html') { my @options = ( 'All','GeneSymbol','ORFName','FullGeneName','ECNumbers' ); my %options = ( 'GeneSymbol' => 'Gene Symbol', 'ORFName' => 'ORF Name', 'FullGeneName' => 'Full Gene Name', 'ECNumbers' => 'EC Number', 'All' => 'All Attributes', ); #### Build the option list my $optionlist = ''; foreach my $key ( @options ) { my $flag = ''; $flag = 'SELECTED' if ($parameters{search_scope} eq $key); $optionlist .= "\n"; }; # Had to wrap this its own table tags 'cause it was rendering very poorly # with new halo 'skin' print qq~
BROWSE THE PROTEOME

Search for

~; } #### Finish the upper part of the page and go begin the full-width #### data portion of the page # $sbeams->display_page_footer(close_tables=>'YES', # separator_bar=>'YES',display_footer=>'NO') # unless ( $parameters{display_options} =~ /SequenceFormat/ && # $apply_action =~ /HIDE/ ); ######################################################################### #### Process all the constraints #### Build BIOSEQUENCE_SET constraint my $form_test = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_set_id", constraint_type=>"int_list", constraint_name=>"BioSequence Set", constraint_value=>$parameters{biosequence_set_id} ); return if ($form_test eq '-1'); #### Verify that the selected biosequence_sets are permitted if ($parameters{biosequence_set_id}) { my $sql = qq~ SELECT biosequence_set_id,project_id FROM $TBPS_BIOSEQUENCE_SET WHERE biosequence_set_id IN ( $parameters{biosequence_set_id} ) AND record_status != 'D' ~; my %project_ids = $sbeams->selectTwoColumnHash($sql); my @accessible_project_ids = $sbeams->getAccessibleProjects(); my %accessible_project_ids; foreach my $id ( @accessible_project_ids ) { $accessible_project_ids{$id} = 1; } my @input_ids = split(',',$parameters{biosequence_set_id}); my @verified_ids; foreach my $id ( @input_ids ) { #### If the requested biosequence_set_id doesn't exist if (! defined($project_ids{$id})) { $sbeams->reportException( state => 'ERROR', type => 'BAD CONSTRAINT', message => "Non-existent biosequence_set_id = $id specified", ); #### If the project for this biosequence_set is not accessible } elsif (! defined($accessible_project_ids{$project_ids{$id}})) { $sbeams->reportException( state => 'ERROR', type => 'PERMISSION DENIED', message => "Your current privilege settings do not allow you to access biosequence_set_id = $id. See project owner to gain permission.", ); #### Else, let it through } else { push(@verified_ids,$id); } } #### Set the input constraint to only allow that which is valid $parameters{biosequence_set_id} = join(',',@verified_ids); } #### If no valid biosequence_set_id was selected, stop here unless ($parameters{biosequence_set_id}) { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => "You must select at least one valid Biosequence Set", ); return; } #### Build BIOSEQUENCE_SET constraint my $biosequence_set_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_set_id", constraint_type=>"int_list", constraint_name=>"BioSequence Set", constraint_value=>$parameters{biosequence_set_id} ); return if ($biosequence_set_clause eq '-1'); #### Build SEARCH SCOPE constraint my $search_scope_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.search_scope", constraint_type=>"plain_text", constraint_name=>"Search Scope", constraint_value=>$parameters{search_scope}, ); return if ($search_scope_clause eq '-1'); #### Build SEARCH KEY constraint my $search_key_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.search_key", constraint_type=>"plain_text", constraint_name=>"Search Key", constraint_value=>$parameters{search_key}, ); return if ($search_scope_clause eq '-1'); #### Build ORF NAME constraint my $orf_name_clause = ''; if ($parameters{search_scope} =~ /(ORFName|All)/) { $orf_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"ORF Name", constraint_value=>$parameters{search_key}, ); } return if ($orf_name_clause eq '-1'); $orf_name_clause =~ s/AND/ OR/; #### Build GENE SYMBOL constraint my $gene_symbol_clause = ''; if ($parameters{search_scope} =~ /(GeneSymbol|All)/) { my $search_key = $parameters{search_key}; if (defined($search_key) && $search_key gt '' && $search_key !~ /[%_]/) { $search_key = "$search_key\%"; }; $gene_symbol_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSA.gene_symbol", constraint_type=>"plain_text", constraint_name=>"Gene Symbol", constraint_value=>$search_key, ); } return if ($gene_symbol_clause eq '-1'); $gene_symbol_clause =~ s/AND/ OR/; #### Build EC NUMBER constraint my $ec_number_clause = ''; if ($parameters{search_scope} =~ /(ECNumbers|ECNumbers_exact|All)/) { my $search_key = $parameters{search_key}; if (defined($search_key) && $search_key gt '' && $search_key !~ /[%_]/ && $parameters{search_scope} !~ /ECNumbers_exact/) { $search_key = "\%$search_key\%"; }; $ec_number_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSA.EC_numbers", constraint_type=>"plain_text", constraint_name=>"EC Number", constraint_value=>$search_key, ); } return if ($ec_number_clause eq '-1'); $ec_number_clause =~ s/AND/ OR/; #### Build FULL GENE NAME constraint my $full_gene_name_clause = ''; if ($parameters{search_scope} =~ /(FullGeneName|All)/) { my $search_key = $parameters{search_key}; if (defined($search_key) && $search_key gt '' && $search_key !~ /[%_]/) { $search_key = "\%$search_key\%"; }; $full_gene_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSA.functional_description", constraint_type=>"plain_text", constraint_name=>"Full Gene Name", constraint_value=>$search_key, ); } return if ($full_gene_name_clause eq '-1'); $full_gene_name_clause =~ s/AND/ OR/; #### Sepcial handling for scope of 'All' if ($parameters{search_scope} =~ /All/) { my $result = searchExternal( query_parameters_ref => \%parameters, ); if ($result) { $orf_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"ORF Name", constraint_value=>$result, ); $gene_symbol_clause = ''; $ec_number_clause = ''; $full_gene_name_clause = ''; $orf_name_clause =~ s/AND/ OR/; } } #### No LIMITs my $limit_clause = ''; #### Define some variables needed to build the query my $group_by_clause = ""; my $final_group_by_clause = ""; my @column_array; my $peptide_column = ""; my $count_column = ""; #### Define the desired columns in the query #### [friendly name used in url_cols references,SQL,displayed column title] my @column_array; if ($parameters{biosequence_set_id} == 2 || $parameters{biosequence_set_id} == 5) { @column_array = ( ["biosequence_name","BS.biosequence_name","ORF Name"], ["gene_symbol","BSA.gene_symbol","Gene Symbol"], ["protein_EC_numbers","BSA.EC_numbers","EC Numbers"], ["functional_description","BSA.functional_description","Gene Name/Function"], ["domain_hits","' [View Domain Hits] '","Links"], ["project_id","BSS.project_id","project_id"], ["biosequence_set_id","BS.biosequence_set_id","biosequence_set_id"], ["biosequence_id","BS.biosequence_id","biosequence_id"], ["biosequence_annotation_id","BSA.biosequence_annotation_id","biosequence_annotation_id"], ["accessor","DBX.accessor","accessor"], ["biosequence_accession","BS.biosequence_accession","biosequence_accession"], ); }else { @column_array = ( ["biosequence_name","BS.biosequence_name","ORF Name"], ["gene_symbol","BSA.gene_symbol","Gene Symbol"], ["protein_EC_numbers","BSA.EC_numbers","EC Numbers"], ["full_gene_name","BSA.full_gene_name","Gene Name/Function"], ["domain_hits","' [View Domain Hits] '","Links"], ["project_id","BSS.project_id","project_id"], ["biosequence_set_id","BS.biosequence_set_id","biosequence_set_id"], ["biosequence_id","BS.biosequence_id","biosequence_id"], ["biosequence_annotation_id","BSA.biosequence_annotation_id","biosequence_annotation_id"], ["accessor","DBX.accessor","accessor"], ["biosequence_accession","BS.biosequence_accession","biosequence_accession"], ); } #### Adjust the columns definition based on user-selected options if ( $parameters{display_options} =~ /ShowSQL/ ) { $show_sql = 1; } #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); #### Define the SQL statement $sql = qq~ SELECT $limit_clause $columns_clause FROM $TBPS_BIOSEQUENCE BS LEFT JOIN $TBPS_BIOSEQUENCE_SET BSS ON ( BS.biosequence_set_id = BSS.biosequence_set_id ) LEFT JOIN $TB_DBXREF DBX ON ( BS.dbxref_id = DBX.dbxref_id ) LEFT JOIN $TBPS_BIOSEQUENCE_ANNOTATION BSA ON ( BS.biosequence_id = BSA.biosequence_id ) WHERE 1 = 1 $biosequence_set_clause AND ( 0 = 1 $orf_name_clause $gene_symbol_clause $ec_number_clause $full_gene_name_clause ) ORDER BY BS.biosequence_accession ~; #### Certain types of actions should be passed to links my $pass_action = "QUERY"; $pass_action = $apply_action if ($apply_action =~ /QUERY/i); #### Define the hypertext links for columns that need them %url_cols = ('ORF Name' => "$CGI_BASE_DIR/$SBEAMS_SUBDIR/BrowseBioSequence.cgi?project_id=\%$colnameidx{project_id}V&biosequence_set_id=\%$colnameidx{biosequence_set_id}V&biosequence_accession_constraint=\%V&action=QUERYHIDE", 'ORF Name_ATAG' => 'TARGET="Win1"', 'Gene Name/Function' => "$CGI_BASE_DIR/$SBEAMS_PART/ManageTable.cgi?TABLE_NAME=PS_biosequence_annotation&biosequence_annotation_id=\%$colnameidx{biosequence_annotation_id}V&biosequence_id=\%$colnameidx{biosequence_id}V&ShowEntryForm=1", 'Gene Name/Function_ATAG' => 'TARGET="Win1"', 'Gene Symbol' => "$CGI_BASE_DIR/$SBEAMS_PART/ManageTable.cgi?TABLE_NAME=PS_biosequence_annotation&biosequence_annotation_id=\%$colnameidx{biosequence_annotation_id}V", 'Gene Symbol_ATAG' => 'TARGET="Win1"', 'Links' => "$CGI_BASE_DIR/$SBEAMS_SUBDIR/GetDomainHits?project_id=\%$colnameidx{project_id}V&biosequence_set_id=\%$colnameidx{biosequence_set_id}V&biosequence_accession_constraint=\%$colnameidx{biosequence_accession}V&action=QUERYHIDE&display_options=ApplyChilliFilter,ShowExtraProteinProps", 'Links_ATAG' => 'TARGET="Win1"', 'Links_ISNULL' => ' [View Domain Hits] ', ); #### Define columns that should be hidden in the output table %hidden_cols = ('project_id' => 1, 'biosequence_set_id' => 1, 'biosequence_id' => 1, 'biosequence_annotation_id' => 1, 'accessor' => 1, 'accessor_suffix' => 1, 'biosequence_accession' => 1, ); ######################################################################### #### If QUERY or VIEWRESULTSET was selected, display the data $apply_action = 'GO' unless ($apply_action); if ($apply_action =~ /(QUERY|GO|VIEWRESULTSET)/) { #### Show the SQL that will be or was executed $sbeams->display_sql(sql=>$sql) if ($show_sql); #### If the action contained QUERY, then fetch the results from #### the database if ($apply_action =~ /(QUERY|GO)/i) { #### Fetch the results from the database server $sbeams->fetchResultSet(sql_query=>$sql, resultset_ref=>$resultset_ref); #### Store the resultset and parameters to disk resultset cache $rs_params{set_name} = "SETME"; $sbeams->writeResultSet(resultset_file_ref=>\$rs_params{set_name}, resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters); } #### Display the result if ( 1 == 1) { if ($sbeams->output_mode() eq 'html') { print qq~
NOTE: Since you searched "All Attributes", some proteins in this resultset may match the search constraint in fields not displayed. To see the complete domain search information, click on [View Domain Hits].

~ if ($parameters{search_scope} =~ /All/); } $sbeams->displayResultSet(rs_params_ref=>\%rs_params, url_cols_ref=>\%url_cols,hidden_cols_ref=>\%hidden_cols, max_widths=>\%max_widths,resultset_ref=>$resultset_ref, column_titles_ref=>\@column_titles, base_url=>$base_url,query_parameters_ref=>\%parameters, ); #### Display the resultset controls $sbeams->displayResultSetControls(rs_params_ref=>\%rs_params, resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters, base_url=>$base_url ); } #### If QUERY was not selected, then tell the user to enter some parameters } else { if ($sbeams->invocation_mode() eq 'http') { print "

Select parameters above and press QUERY

\n"; } else { print "You need to supply some parameters to contrain the query\n"; } } } # end handle_request ############################################################################### # evalSQL: Callback for translating global table variables to names ############################################################################### sub evalSQL { my $sql = shift; return eval "\"$sql\""; } # end evalSQL ################################################################################ # searchExternal: A method to search an external file for any matching info ############################################################################### sub searchExternal { my %args = @_; #### Process the arguments list my $query_parameters_ref = $args{'query_parameters_ref'}; #### Determine which external data source to search my %abbreviations = ( '3' => 'Hm', # Hm '2' => 'Halo', # Halobac ); my $abbrevation = $abbreviations{$query_parameters_ref->{biosequence_set_id}}; unless ($abbrevation) { print "ERROR: Unable to find a abbrevation for this dataset
\n"; return 0; } #### Search both the DomainHits and Biosequences files my %biosequence_accessions; foreach my $filetype ( qw (DomainHits Biosequences) ) { #### Open the file my $file = "${abbrevation}_$filetype.tsv"; my $fullfile = "/net/dblocal/www/html/sbeams/var/$SBEAMS_SUBDIR/$file"; open(INFILE,$fullfile) || die("ERROR: Unable to open $fullfile"); #### Parse header line my $line = ; $line =~ s/[\r\n]//g; my @column_list = split("\t",$line); #### Convert the array into a hash of names to column numbers my $i = 0; my %column_hash; foreach my $element (@column_list) { $column_hash{$element} = $i; $i++; } my $col = $column_hash{'biosequence_name'}; unless ($col) { print "ERROR: Could not find column 'biosequence_accession'
"; $col = 0; } #### Get the search_spec my $search_spec = $query_parameters_ref->{search_key}; $search_spec =~ s/\./\\./g; my @specs = split(/\s+/,$search_spec); #### Search through the file looking for matches while ($line = ) { $line =~ s/[\r\n]//g; my $match = 1; foreach my $spec (@specs) { if ($line !~ /$spec/i) { $match = 0; last; } } #### If there was a match, save this accession if ($match) { my @columns = split("\t",$line); $biosequence_accessions{$columns[$col]}++; } } close(INFILE); } #print join(";",keys(%biosequence_accessions)),"\n"; return join(";",keys(%biosequence_accessions)); } # end searchExternal