#!/usr/local/bin/perl ############################################################################### # Program : GetAnnotations # Author : Eric Deutsch # $Id$ # # Description : This CGI program that allows users to # browse through annotated proteins very simply # # SBEAMS is Copyright (C) 2000-2010 Eric Deutsch # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Set up all needed modules and objects ############################################################################### use strict; use Getopt::Long; use FindBin; use lib "$FindBin::Bin/../../lib/perl"; use vars qw ($sbeams $sbeamsMOD $q $current_contact_id $current_username $PROG_NAME $USAGE %OPTIONS $QUIET $VERBOSE $DEBUG $DATABASE $TABLE_NAME $PROGRAM_FILE_NAME $CATEGORY $DB_TABLE_NAME @MENU_OPTIONS); use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::ProteinStructure; use SBEAMS::ProteinStructure::Settings; use SBEAMS::ProteinStructure::Tables; use SBEAMS::Microarray::Tables; $sbeams = new SBEAMS::Connection; $sbeamsMOD = new SBEAMS::ProteinStructure; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); use CGI; use CGI::Carp qw(fatalsToBrowser croak); $q = new CGI; ############################################################################### # Set program name and usage banner for command like use ############################################################################### $PROG_NAME = $FindBin::Script; $USAGE = <Authenticate() and exit if it fails or continue if it works. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['ProteinStructure_user', 'ProteinStructure_admin','ProteinStructure_readonly','Admin'], #connect_read_only=>1, #allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters); #$sbeams->printDebuggingInfo($q); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); #### Decide what action to take based on information so far if ($parameters{action} eq "xxxx") { } elsif ($parameters{action} eq "download") { }else { $sbeamsMOD->display_page_header( navigation_bar=>$parameters{navigation_bar}); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer(); } } # end main ############################################################################### # Handle Request ############################################################################### sub handle_request { my %args = @_; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Define some generic varibles my ($i,$element,$key,$value,$line,$result,$sql); #### Define some variables for a query and resultset my %resultset = (); my $resultset_ref = \%resultset; my (%url_cols,%hidden_cols,%max_widths,$show_sql); #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $PROGRAM_FILE_NAME="GetHaloAnnotations"; my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; #### Get the columns and input types for this table/query my @columns = ( 'search_scope','search_key' ); my %input_types = ( 'optionlist','text' ); #### Read the input parameters for each column my $n_params_found = $sbeams->parse_input_parameters( q=>$q,parameters_ref=>\%parameters, columns_ref=>\@columns,input_types_ref=>\%input_types); #### If the apply action was to recall a previous resultset, do it my %rs_params = $sbeams->parseResultSetParams(q=>$q); if ($apply_action eq "VIEWRESULTSET") { $sbeams->readResultSet(resultset_file=>$rs_params{set_name}, resultset_ref=>$resultset_ref,query_parameters_ref=>\%parameters); $n_params_found = 99; } #### Set some reasonable defaults if no parameters supplied unless ($n_params_found) { } #### Apply any parameter adjustment logic #$parameters{display_options} = 'ShowSQL'; #### Display the user-interaction input form if ($sbeams->output_mode() eq 'html') { my @options = ( 'All','GeneSymbol','ORFName', 'FullGeneName','ECNumbers','Aliases', 'RedundantORFs' ); my %options = ( 'GeneSymbol' => 'Gene Symbol', 'ORFName' => 'ORF Name', 'FullGeneName' => 'Full Gene Name', 'ECNumbers' => 'EC Number', 'Aliases' => 'Aliases', 'RedundantORFs' => 'Redundant ORFs', 'All' => 'All Attributes', ); #### Build the option list my $optionlist = ''; foreach my $key ( @options ) { my $flag = ''; $flag = 'SELECTED' if ($parameters{search_scope} eq $key); $optionlist .= "\n"; }; unless( $parameters{table_only} ) { print qq~

Search for
$LINESEPARATOR ~; } } ######################################################################### #### Process all the constraints #### Build BIOSEQUENCE_SET constraint my $form_test = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_set_id", constraint_type=>"int_list", constraint_name=>"BioSequence Set", constraint_value=>$parameters{protein_biosequence_set_id}); return if ($form_test eq '-1'); #### Verify that the selected biosequence_sets are permitted my @protein_ids; if ($parameters{protein_biosequence_set_id}) { @protein_ids = verify_biosequence_set_ids(ids => $parameters{protein_biosequence_set_id}); } my @dna_ids; if ($parameters{dna_biosequence_set_id}) { @dna_ids = verify_biosequence_set_ids(ids => $parameters{dna_biosequence_set_id}); } $parameters{protein_biosequence_set_id} = join(',',@protein_ids); $parameters{dna_biosequence_set_id} = join(',',@dna_ids); #### If no valid biosequence_set_id was selected, stop here unless ($parameters{protein_biosequence_set_id}) { $sbeams->reportException( state => 'ERROR', type => 'INSUFFICIENT CONSTRAINTS', message => "You must select at least one valid Biosequence Set", ); return; } #### Set the input constraint to only allow that which is valid $sql = qq~ SELECT project_id FROM $TBPS_BIOSEQUENCE_SET WHERE biosequence_set_id IN ( $parameters{protein_biosequence_set_id} ) AND record_status != 'D' ~; my @project_ids = $sbeams->selectOneColumn($sql); my $project_list = join (",",@project_ids); #### Build Protein BIOSEQUENCE_SET constraint my $protein_biosequence_set_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_set_id", constraint_type=>"int_list", constraint_name=>"Protein BioSequence Set", constraint_value=>$parameters{protein_biosequence_set_id} ); return if ($protein_biosequence_set_clause eq '-1'); #### Build DNA BIOSEQUENCE_SET constraint my $dna_biosequence_set_clause; if ($parameters{dna_biosequence_set_id}) { my $result = $sbeams->parseConstraint2SQL( constraint_column=>"DBS.biosequence_set_id", constraint_type=>"int_list", constraint_name=>"DNA BioSequence Set", constraint_value=>$parameters{dna_biosequence_set_id} ); $dna_biosequence_set_clause = $result if ($result ne '-1'); } #### Build SEARCH SCOPE constraint my $search_scope_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.search_scope", constraint_type=>"plain_text", constraint_name=>"Search Scope", constraint_value=>$parameters{search_scope}, ); return if ($search_scope_clause eq '-1'); #### Build SEARCH KEY constraint my $search_key_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.search_key", constraint_type=>"plain_text", constraint_name=>"Search Key", constraint_value=>$parameters{search_key}, ); return if ($search_key_clause eq '-1'); #### Identify clauses now for repetitive constraints my $orf_name_clause = ''; my $gene_symbol_clause = ''; my $ec_number_clause = ''; my $full_gene_name_clause = ''; my $functional_description_clause = ''; my $duplicate_biosequences_clause = ''; my $alias_clause = ''; my $complete_search_keys = $parameters{search_key}; $complete_search_keys =~ s/\s*\[AND\]\s*/_AND_/g; my @search_keys = split /[\s+,;]/, $complete_search_keys; # Remove [AND]s in parameters for searchExternal $complete_search_keys =~ s/_AND_/ /g; $parameters{search_key} = $complete_search_keys; foreach my $search_key (@search_keys) { next if ($search_key=~/^\s?$/); # remove '_AND_', in the case of [AND] searches $search_key =~ s/_AND_/ /g; $search_key = "\%$search_key\%"; #### Build ORF NAME constraint my $temp_orf_name_clause = ''; if ($parameters{search_scope} =~ /(ORFName|All)/) { $temp_orf_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"ORF Name", constraint_value=>$search_key, ); } return if ($temp_orf_name_clause eq '-1'); $temp_orf_name_clause =~ s/AND/ OR/; $orf_name_clause .= $temp_orf_name_clause; #### Build GENE SYMBOL constraint my $temp_gene_symbol_clause = ''; if ($parameters{search_scope} =~ /(GeneSymbol|All)/) { if (defined($search_key) && $search_key gt '' && $search_key !~ /[%_]/) { $search_key = "$search_key\%"; }; $temp_gene_symbol_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSA.gene_symbol", constraint_type=>"plain_text", constraint_name=>"Gene Symbol", constraint_value=>$search_key, ); } return if ($temp_gene_symbol_clause eq '-1'); $temp_gene_symbol_clause =~ s/AND/ OR/; $gene_symbol_clause .= $temp_gene_symbol_clause; #### Build EC NUMBER constraint my $temp_ec_number_clause = ''; if ($parameters{search_scope} =~ /(ECNumbers|ECNumbers_exact|All)/) { if (defined($search_key) && $search_key gt '' && $search_key !~ /[%_]/ && $parameters{search_scope} !~ /ECNumbers_exact/) { $search_key = "$search_key\%"; }; $temp_ec_number_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSA.EC_numbers", constraint_type=>"plain_text", constraint_name=>"EC Number", constraint_value=>$search_key, ); } return if ($temp_ec_number_clause eq '-1'); $temp_ec_number_clause =~ s/AND/ OR/; $ec_number_clause .= $temp_ec_number_clause; #### Build FULL GENE NAME constraint my $temp_full_gene_name_clause = ''; if ($parameters{search_scope} =~ /(FullGeneName|All)/) { if (defined($search_key) && $search_key gt '' && $search_key !~ /[%_]/) { $search_key = "$search_key\%"; }; $temp_full_gene_name_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSA.full_gene_name", constraint_type=>"plain_text", constraint_name=>"Full Gene Name", constraint_value=>$search_key, ); } return if ($temp_full_gene_name_clause eq '-1'); $temp_full_gene_name_clause =~ s/AND/ OR/; $full_gene_name_clause .= $temp_full_gene_name_clause; #### Build DUPLICATE BIOSEQUENCES constraint my $temp_duplicate_biosequences_clause = ''; if ($parameters{search_scope} =~ /(RedundantORFs|All)/) { if (defined($search_key) && $search_key gt "" && $search_key !~ /[%_]/) { $search_key = "$search_key\%"; }; $temp_duplicate_biosequences_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSPS.duplicate_biosequences", constraint_type=>"plain_text", constraint_name=>"Redundant ORFs", constraint_value=>$search_key, ); } return if ($temp_duplicate_biosequences_clause eq '-1'); $temp_duplicate_biosequences_clause =~ s/AND/ OR/; $duplicate_biosequences_clause .= $temp_duplicate_biosequences_clause; #### Build FUNCTIONAL DESCRIPTION constraint my $temp_functional_description_clause = ''; if ($parameters{search_scope} =~ /(All)/) { if (defined($search_key) && $search_key gt "" && $search_key !~ /[%_]/) { $search_key = "$search_key\%"; }; $temp_functional_description_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSA.functional_description", constraint_type=>"plain_text", constraint_name=>"Gene Function", constraint_value=>$search_key, ); } return if ($temp_functional_description_clause eq '-1'); $temp_functional_description_clause =~ s/AND/ OR/; $functional_description_clause .= $temp_functional_description_clause; #### Build ALIASES constraint my $temp_alias_clause = ''; if ($parameters{search_scope} =~ /(Aliases|All)/) { if (defined($search_key) && $search_key gt "" && $search_key !~ /[%_]/) { $search_key = "$search_key\%"; }; $temp_alias_clause = $sbeams->parseConstraint2SQL( constraint_column=>"BSA.aliases", constraint_type=>"plain_text", constraint_name=>"Aliases", constraint_value=>$search_key, ); } return if ($temp_alias_clause eq '-1'); $temp_alias_clause =~ s/AND/ OR/; $alias_clause .= $temp_alias_clause; #### Sepcial handling for scope of 'All' if ($parameters{search_scope} =~ /All/) { my $result = searchExternal(query_parameters_ref => \%parameters,); if ($result) { $orf_name_clause .= $sbeams->parseConstraint2SQL( constraint_column=>"BS.biosequence_name", constraint_type=>"plain_text", constraint_name=>"ORF Name", constraint_value=>$result, ); $gene_symbol_clause = ''; $ec_number_clause = ''; $full_gene_name_clause = ''; $alias_clause = ''; $duplicate_biosequences_clause = ''; $functional_description_clause = ''; $orf_name_clause =~ s/AND/ OR/; } } } #### No LIMITs my $limit_clause = ''; #### Define some variables needed to build the query my $group_by_clause = ""; my $final_group_by_clause = ""; my @column_array; my $peptide_column = ""; my $count_column = ""; #### Define the desired columns in the query #### [friendly name used in url_cols references,SQL,displayed column title] my @column_array = ( ["protein_biosequence_id","BS.biosequence_id","protein_biosequence_id"], ["biosequence_annotation_id","BSA.biosequence_annotation_id","biosequence_annotation_id"], ["biosequence_name","BS.biosequence_name","ORF Name"], ["gene_symbol","BSA.gene_symbol","Gene Symbol"], ["functional_description","BSA.functional_description","Gene Function"], ["chromosome","BSPS.chromosome","Chromosome"], ["start","BSPS.start_in_chromosome","Start"], ["stop","BSPS.end_in_chromosome","Stop"], ["gene_aliases","BSA.aliases","Aliases"], ["duplicate_sequences","BSPS.duplicate_biosequences","Redundant ORFs"], ["comment","BSA.comment","Comment"], ["protein_biosequence_accession","BS.biosequence_accession","protein_biosequence_accession"], ); if ($dna_biosequence_set_clause) { push @column_array, ["dna_biosequence_id","DBS.biosequence_id","dna_biosequence_id"]; } #### Build the columns part of the SQL statement my %colnameidx = (); my @column_titles = (); my $columns_clause = $sbeams->build_SQL_columns_list( column_array_ref=>\@column_array, colnameidx_ref=>\%colnameidx, column_titles_ref=>\@column_titles ); #### Define the SQL statement $sql = qq~ SELECT $limit_clause $columns_clause FROM $TBPS_BIOSEQUENCE BS LEFT JOIN $TBPS_BIOSEQUENCE_SET BSS ON ( BS.biosequence_set_id = BSS.biosequence_set_id ) LEFT JOIN $TBPS_BIOSEQUENCE_ANNOTATION BSA ON ( BS.biosequence_id = BSA.biosequence_id ) LEFT JOIN $TBPS_BIOSEQUENCE_PROPERTY_SET BSPS ON ( BSPS.biosequence_id = BS.biosequence_id ) ~; $sql .= qq~ LEFT JOIN $TBPS_BIOSEQUENCE DBS ON ( DBS.biosequence_name = BS.biosequence_name ) ~ if ($dna_biosequence_set_clause); $sql .= qq~ WHERE 1 = 1 $protein_biosequence_set_clause ~; $sql .= $dna_biosequence_set_clause if ($dna_biosequence_set_clause); $sql .= qq~ AND ( 0 = 1 $orf_name_clause $gene_symbol_clause $ec_number_clause $full_gene_name_clause $alias_clause $functional_description_clause $duplicate_biosequences_clause ) AND BS.biosequence_seq IS NOT NULL ORDER BY BS.biosequence_name ~; #print "
SQL for selectSeveralColumns is\n $sql
"; my @rows = $sbeams->selectSeveralColumns($sql); #### Start the table my $table_html; my $chrom_color = "#FF9933"; my $pnrc100_color = "#CC66CC"; my $pnrc200_color = "#00CCCC"; my $item_count = scalar(@rows); if ($sbeams->output_mode() eq 'html') { $table_html = qq~
     Chromosome        pNRC100        pNRC200
~; $table_html .= qq~ ~ if ($sbeams->getCurrent_username() ne 'ext_halo'); $table_html .= qq~ ~; # Keep track of protein and dna biosequence IDs my @p_ids; my @d_ids; my @array_expression_genes = (); # Get accessible conditions my @accessible_project_ids = $sbeams->getAccessibleProjects(); my $project_ids_clause = join ",", @accessible_project_ids; my $halo_conditions_sql = qq~ SELECT condition_id FROM $TBMA_COMPARISON_CONDITION WHERE project_id IN ($PUBLIC_PROJECTS) ~; my @conditions = $sbeams->selectOneColumn ($halo_conditions_sql); my $condition_ids_clause = join ",", @conditions; my $counter = 0; # list to store gaggle data my @glist; my $alias_links =''; # halo_link_menus is an array of strings where each entry is a javascript # object literal holding data describing a link menu for a single row # in the table. my @halo_link_menus = (); foreach my $row (@rows) { my $protein_biosequence_id = $row->[0]; my $biosequence_annotation_id = $row->[1]; my $biosequence_name = $row->[2]; my $gene_symbol = $row->[3] || "[+]"; my $functional_description = $row->[4] || "[+]"; my $chromosome = $row->[5] || "[+]"; my $start = $row->[6] || "[+]"; my $stop = $row->[7] || "[+]"; my $aliases = $row->[8] || "[+]"; my $duplicate_biosequences = $row->[9]; my $comments = $row->[10] || "[+]"; $comments =~ s/\t/ /g; $comments =~ s/\n/\/g; my $biosequence_accession = $row->[11] || "[+]"; my $dna_biosequence_id = $row->[12] if ($row->[12]); push @p_ids, $protein_biosequence_id; push @d_ids, $dna_biosequence_id if ($dna_biosequence_id); push @glist, $biosequence_name; #### Special (fragile?) handling for halo aliases my @alias_items = split ",", $aliases; my $NCBI_PID_BASE = "http://www.ncbi.nlm.nih.gov/sutils/blink.cgi?pid="; # my $COG_BASE = "http://www.ncbi.nlm.nih.gov/COG/new/release/cow.cgi?cog="; my $COG_BASE = "http://www.ncbi.nlm.nih.gov/COG/old/palox.cgi?txt="; foreach my $item (@alias_items) { if ($item =~ /^\d+$/) { $item = "$item"; }elsif ($item =~ /^COG.*/) { $item = "$item"; }else { $item = "$item"; } } my $add_an_alias = "[+]"; push @alias_items, $add_an_alias if ($sbeams->getCurrent_username() ne 'ext_halo'); $alias_links = join ",",@alias_items; } # print "glist should only be run once and contains: @glist

"; my $glist_clean = ""; my $peptide_hit_list = ""; foreach my $glist_item (@glist){ $glist_clean .= "'$glist_item',"; } #ugh, this is pretty nasty ! chop ($glist_clean); #### glist cannot be an empty string else query below will fail unless ($glist_clean) { $glist_clean = "''"; } #print "peptide_hit_list is $peptide_hit_list"; # glist now has what we need, but need to surround the ORFs with '' for valid SQL to work! #my $good_sql = "SELECT DISTINCT PI.peptide_id, PI.original_protein_name FROM PeptideAtlas.dbo.peptide_instance PI INNER JOIN PeptideAtlas.dbo.peptide P ON ( PI.peptide_id = P.peptide_id ) INNER JOIN PeptideAtlas.dbo.atlas_build AB ON ( PI.atlas_build_id = AB.atlas_build_id ) LEFT JOIN PeptideAtlas.dbo.biosequence_set BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) LEFT JOIN sbeams.dbo.organism O ON ( BSS.organism_id = O.organism_id ) LEFT JOIN PeptideAtlas.dbo.peptide_mapping PM ON ( PI.peptide_instance_id = PM.peptide_instance_id ) LEFT JOIN PeptideAtlas.dbo.biosequence BS ON ( PM.matched_biosequence_id = BS.biosequence_id ) WHERE O.organism_name = 'Halobacterium' AND AB.atlas_build_id = '91' AND BS.biosequence_gene_name IN ($glist_clean) ORDER BY PI.original_protein_name"; # print "SQL is $good_sql
"; my $ok_sql =<<"END_SQL"; SELECT DISTINCT PI.original_protein_name, SUM(PI.n_observations) AS 'peptide_hits', COUNT( DISTINCT peptide_sequence ) AS distinct_peptides FROM PeptideAtlas.dbo.peptide_instance PI INNER JOIN PeptideAtlas.dbo.peptide P ON ( PI.peptide_id = P.peptide_id ) INNER JOIN PeptideAtlas.dbo.atlas_build AB ON ( PI.atlas_build_id = AB.atlas_build_id ) LEFT JOIN PeptideAtlas.dbo.biosequence_set BSS ON ( AB.biosequence_set_id = BSS.biosequence_set_id ) LEFT JOIN sbeams.dbo.organism O ON ( BSS.organism_id = O.organism_id ) LEFT JOIN PeptideAtlas.dbo.peptide_mapping PM ON ( PI.peptide_instance_id = PM.peptide_instance_id ) LEFT JOIN PeptideAtlas.dbo.biosequence BS ON ( PM.matched_biosequence_id = BS.biosequence_id ) WHERE O.organism_name = 'Halobacterium' -- AND AB.atlas_build_id = '112' AND AB.atlas_build_id = '130' AND BS.biosequence_gene_name IN ($glist_clean) GROUP BY PI.original_protein_name ORDER BY PI.original_protein_name END_SQL #my @atlas_hit = $sbeams->selectSeveralColumns($good_sql); #my %atlas_hit_h = $sbeams->selectTwoColumnHash($ok_sql); my $sth = $sbeams->get_statement_handle($ok_sql); my %atlas_hit_h; while ( my @row = $sth->fetchrow_array() ) { $atlas_hit_h{$row[0]} = \@row; } #while( my ($k, $v) = each %atlas_hit_h ) { # print "key: $k, value: $v.\n"; # } #set this variable so we can have the nice background-coloring of the

~; $table_html .= qq~ ~ if ($sbeams->getCurrent_username() ne 'ext_halo'); $table_html .= qq~ ~; $counter++; } # FIXME! Hard coded reference to halobacterium genus/species! my $organism = 'Halobacterium sp. NRC-1'; my $gXML = $sbeams->getGaggleXML( object => 'namelist', type => 'direct', name => "Orf names", data => \@glist, start => 1, end => 1, organism => $organism ); print "$gXML\n"; # End the table $table_html .= qq~
Links Coordinates ORF Name Gene Symbol Aliases FunctionComments
below. my $atlas_data; #if ($atlas_hit_count > 0) {$atlas_data = $atlas_hit_count} #print "

Found $atlas_hit_count hits: $atlas_hit
"; #foreach my $atlas_hits (@atlas_hit){ # $peptide_hit_list = $atlas_hits . " ," . $peptide_hit_list; #} #chop ($peptide_hit_list); foreach my $row (@rows) { my $protein_biosequence_id = $row->[0]; my $biosequence_annotation_id = $row->[1]; my $biosequence_name = $row->[2]; my $gene_symbol = $row->[3] || "[+]"; my $functional_description = $row->[4] || "[+]"; my $chromosome = $row->[5] || "[+]"; my $start = $row->[6] || "[+]"; my $stop = $row->[7] || "[+]"; my $aliases = $row->[8] || "[+]"; my $duplicate_biosequences = $row->[9]; my $comments = $row->[10] || "[+]"; $comments =~ s/\t/ /g; $comments =~ s/\n/\/g; my $biosequence_accession = $row->[11] || "[+]"; my $dna_biosequence_id = $row->[12] if ($row->[12]); push @p_ids, $protein_biosequence_id; push @d_ids, $dna_biosequence_id if ($dna_biosequence_id); # push @glist, $biosequence_name; # collect menu item data my @halo_link_menu_items = (); my $url; $table_html .= qq~

~; $url = "BrowseBioSequence.cgi?project_id=$project_list&biosequence_set_id=$parameters{dna_biosequence_set_id}&biosequence_id_constraint=$dna_biosequence_id&action=QUERYHIDE&display_options=SequenceFormat&display_mode=FASTA"; my $dna_link = getColorizedTD (tag=>'DNA Sequences', data=>$dna_biosequence_id, text=>'D', target=>'sequence', link=>$url, tooltip=>"DNA sequence in FASTA format"); # if ($dna_biosequence_id) { push(@halo_link_menu_items, getMenuItemJavascript ( tag=>'DNA Sequences', target=>'sequence', data=>$dna_biosequence_id, link=>$url, tooltip=> "DNA sequence in FASTA format")); # } $url = "BrowseBioSequence.cgi?project_id=$project_list&biosequence_set_id=$parameters{protein_biosequence_set_id}&biosequence_id_constraint=$protein_biosequence_id&action=QUERYHIDE&display_options=SequenceFormat&display_mode=FASTA"; my $prot_link = getColorizedTD (tag=>'Protein Sequences', data=>$protein_biosequence_id, text=>'P', target=>'sequence', link=>$url, tooltip=>"Protein sequence in FASTA format"); push(@halo_link_menu_items, getMenuItemJavascript ( tag=>'Protein Sequences', target=>'sequence', link=>$url, data=>$protein_biosequence_id, tooltip=> "Protein sequence in FASTA format")); $url = "ManageTable.cgi?TABLE_NAME=PS_biosequence_annotation&biosequence_annotation_id=$biosequence_annotation_id&biosequence_id=$protein_biosequence_id&ShowEntryForm=1"; my $annot_link = getColorizedTD (tag=>'Annotations', data=>$biosequence_annotation_id, text=>'A', target=>'annotation', link=>$url, tooltip=>"Edit Annotation (restricted)"); push(@halo_link_menu_items, getMenuItemJavascript ( tag=>'Annotations', target=>'annotation', link=>$url, data=>$biosequence_annotation_id, tooltip=> "Edit Annotation (restricted)")); $url = 'http://www.microbesonline.org/cgi-bin/keywordSearch.cgi?type=0&mapId=MAPID&term=1&locus=0&hit=0&disp=0&homolog=0&format=1&favorites=&taxTyping=halo&taxSelector=64091&taxId=64091&keyword='.$biosequence_name; my $microbes_online_link = getColorizedTD (tag=>'Microbes Online', text=>'O', data=>$biosequence_name, target=>'microbes_onlines', link=>$url, tooltip=>"Microbes Online Search for $biosequence_name"); push(@halo_link_menu_items, getMenuItemJavascript ( tag=>'Microbes Online', target=>'microbes_onlines', link=>$url, data=>$biosequence_name, tooltip=> "Microbes Online Search for $biosequence_name")); push @array_expression_genes,$biosequence_name; $url = "$CGI_BASE_DIR/Microarray/GetHaloExpression?canonical_name_constraint=$biosequence_name&condition_id=$condition_ids_clause&row_limit=10000&input_form_format=minimum_detail&QUERY_NAME=MA_GetExpression&action=QUERY&apply_action=QUERY"; my $array_expression_link = getColorizedTD (tag=>'Gene Expression', text=>'M', data=>$biosequence_name, target=>'array_expression', link=>$url, tooltip=>"Microarray Data Containing $biosequence_name"); push(@halo_link_menu_items, getMenuItemJavascript ( tag=>'Gene Expression', target=>'array_expression', data=>$biosequence_name, link=>$url, tooltip=> "Microarray Data Containing $biosequence_name")); ## put a space in between redundant ORFs so the tooltip will wrap the text $duplicate_biosequences =~ s/,/, /g; my $tip = "None"; if ($duplicate_biosequences) { $tip = $duplicate_biosequences; } $url = "GetHaloAnnotations?search_scope=All&search_key=$biosequence_name&action=GO&dna_biosequence_set_id=$parameters{dna_biosequence_set_id}&protein_biosequence_set_id=$parameters{protein_biosequence_set_id}&apply_action=QUERY"; my $duplicates_link = getColorizedTD (tag=>'Redundant ORFs', data=>$duplicate_biosequences, text=>'R', link=>$url, tooltip=>"Redundant ORFS: $tip"); push(@halo_link_menu_items, getMenuItemJavascript ( tag=>'Redundant ORFs', link=>$url, data=>$duplicate_biosequences, tooltip=> "Redundant ORFS: $tip")); #hard-coded atlas ID, FIX THIS !!! #my $atlas_id = '112'; my $atlas_id = '130'; ## if there are hits against the peptideAtlas, say so, otherwise, say none my $peptide_tip = "None"; my $peptide_link = "Peptide Atlas"; if ( $atlas_hit_h{$biosequence_name} ) { $peptide_tip = "Detected $atlas_hit_h{$biosequence_name}->[2] distinct peptides for $biosequence_name, $atlas_hit_h{$biosequence_name}->[1] total observations (Restricted Access)"; $peptide_link .= " (detected $atlas_hit_h{$biosequence_name}->[2] distinct peptides for $biosequence_name, $atlas_hit_h{$biosequence_name}->[1] total observations)"; $atlas_data = $atlas_hit_h{$biosequence_name}; } # print "atlas_data is $atlas_data
"; $url = "$CGI_BASE_DIR/PeptideAtlas/GetProtein?atlas_build_id=$atlas_id&protein_name=$biosequence_name&action=GO"; my $atlas_link = getColorizedTD (tag=>'Peptide Atlas', data=>$atlas_data, text=>'T', target=>'atlas', #link=>"$CGI_BASE_DIR/PeptideAtlas/GetPeptides?atlas_build_id=$atlas_id&biosequence_gene_name_constraint=$biosequence_name&query_name=AT_GetPeptides&action=QUERY&apply_action=QUERY&display_options=ShowMappings", link=>$url, tooltip=> "$peptide_tip" ); $atlas_data = ""; my $has_data = "ok"; $has_data = "" if $peptide_link eq "Peptide Atlas"; push(@halo_link_menu_items, getMenuItemJavascript ( tag=>$peptide_link, data=>$has_data, target=>'atlas', link=>$url, tooltip=> "$peptide_tip")); # VNG5000's, A, B and m suffixes not supported, so set data to a false value my $seq_name = $biosequence_name; my $data = "ok"; if ($seq_name =~ /VNG5\d\d\d\w+/ or $seq_name =~ /VNG\d\d\d\d(A|B|\wm|\wM)/ ) { $seq_name = ""; $data = ""; } $tip = "Transcript structure for: $biosequence_name"; my $svg_url = "http://networks.systemsbiology.net/projects/halo/transcript_structure_svgz/$biosequence_name.svgz"; $url = "http://networks.systemsbiology.net/projects/halo/transcript_structure/$biosequence_name.pdf"; my $transcript_structure_link = getColorizedTD (tag=>"Transcript Structure", data=>$seq_name, text=>'S', target=>'transcript_structure', link=>$url, tooltip=> "$tip"); push(@halo_link_menu_items, getMenuItemJavascript ( tag=> "Transcript Structure (PDF)", link=> $url, target=>'transcript_structure_pdf', data=>$data, tooltip=> "$tip")); push(@halo_link_menu_items, getMenuItemJavascript ( tag=> "Transcript Structure (SVG)", link=> $svg_url, target=>'transcript_structure_svg', data=> $data, tooltip=> "$tip")); # add menu for current row to list of menus for all rows push(@halo_link_menus, getMenuJavascript(@halo_link_menu_items)); # $table_html .= qq~ # $dna_link # ~if ($dna_biosequence_id); # $table_html .= qq~ # $prot_link # $annot_link # $microbes_online_link # $array_expression_link # $duplicates_link # $atlas_link # $transcript_structure_link $table_html .= qq~
$item"; }elsif ($item =~ /^COG.*/) { $item = "$item"; }else { $item = "$item"; } } my $add_an_alias = "[+]"; push @alias_items, $add_an_alias if ($sbeams->getCurrent_username() ne 'ext_halo'); $alias_links = join ",",@alias_items; $table_html .= qq~>$start..$stop $biosequence_name $gene_symbol $alias_links $functional_description $comments

~; ## PREPEND summary links print "
$counter Results Returned
"; $table_html = qq~ -Download Tab-delimited Summary
~ . $table_html; $table_html = qq~ -Legend and index for transcriptome structure plots
~ . $table_html; if (@p_ids) { my $p_list = join ",", @p_ids; $table_html = qq~ -View all Protein Entries
~ . $table_html; } if (@d_ids) { my $d_list = join ",", @d_ids; $table_html = qq~ -View all DNA Entries
~ . $table_html; } if (@array_expression_genes) { my $array_expression_ids = join "%3B", @array_expression_genes; $table_html = qq~ -View Microarray data
-Download Microarray data
~ . $table_html; } #### Print HTML print $table_html; # output a \n" } ############################################################################### # evalSQL: Callback for translating global table variables to names ############################################################################### sub evalSQL { my $sql = shift; return eval "\"$sql\""; } # end evalSQL sub verify_biosequence_set_ids { my %args = @_; my $ids = $args{'ids'} || die "biosequence_set_ids need to be passed."; my $sql = qq~ SELECT biosequence_set_id,project_id FROM $TBPS_BIOSEQUENCE_SET WHERE biosequence_set_id IN ( $ids ) AND record_status != 'D' ~; my %project_ids = $sbeams->selectTwoColumnHash($sql); my @accessible_project_ids = $sbeams->getAccessibleProjects(); my %accessible_project_ids; foreach my $id ( @accessible_project_ids ) { $accessible_project_ids{$id} = 1; } my @input_ids = split(',',$ids); my @verified_ids; foreach my $id ( @input_ids ) { ## requested bio seq set id exists: if (exists $project_ids{$id} ) { my $okay = 0; if ($project_ids{$id} eq "") { ## let it through, bioseq set has no project id $okay = 1; } ## if have a project_id but it isn't accessible, print message if ( ($okay == 0) && ! defined($accessible_project_ids{$project_ids{$id}}) ) { ## id exists, but is not accessible project $sbeams->reportException( state => 'ERROR', type => 'PERMISSION DENIED', message => "Your current privilege settings do not allow you to access biosequence_set_id = $id. See project owner to gain permission.", ); } elsif (($okay == 0) && (exists $accessible_project_ids{$project_ids{$id}})) { $okay = 1; } if ($okay == 1) { push(@verified_ids,$id); } } else { #### requested biosequence_set_id doesn't exist $sbeams->reportException( state => 'ERROR', type => 'BAD CONSTRAINT', message => "Non-existent biosequence_set_id = $id specified", ); } } return @verified_ids; } ################################################################################ # searchExternal: A method to search an external file for any matching info ############################################################################### sub searchExternal { my %args = @_; #### Process the arguments list my $query_parameters_ref = $args{'query_parameters_ref'}; #### Determine which external data source to search my %abbreviations = ( '3' => 'Hm', # Hm '2' => 'Halo', # Halobac ); my $abbreviation = $abbreviations{$query_parameters_ref->{protein_biosequence_set_id}}; unless ($abbreviation) { print "ERROR: Unable to find a file for this dataset
\n"; return 0; } #### Search both the DomainHits and Biosequences files my %biosequence_accessions; foreach my $filetype ( qw (DomainHits Biosequences) ) { #### Open the file my $file = "${abbreviation}_$filetype.tsv"; my $fullfile = "/net/dblocal/www/html/sbeams/var/$SBEAMS_SUBDIR/$file"; open(INFILE,$fullfile) || die("ERROR: Unable to open $fullfile"); #### Parse header line my $line = ; $line =~ s/[\r\n]//g; my @column_list = split("\t",$line); #### Convert the array into a hash of names to column numbers my $i = 0; my %column_hash; foreach my $element (@column_list) { $column_hash{$element} = $i; $i++; } my $col = $column_hash{'biosequence_name'}; unless ($col) { print "ERROR: Could not find column 'biosequence_accession'
"; $col = 0; } #### Get the search_spec my $search_spec = $query_parameters_ref->{search_key}; $search_spec =~ s/\./\\./g; my @specs = split(/[\s+,;]/,$search_spec); #### Search through the file looking for matches while ($line = ) { $line =~ s/[\r\n]//g; my $match = 0; foreach my $spec (@specs) { if ($line =~ /$spec/i) { $match = 1; last; } } #### If there was a match, save this accession if ($match) { my @columns = split("\t",$line); $biosequence_accessions{$columns[$col]}++; } } close(INFILE); } # print join(";",keys(%biosequence_accessions)),"\n"; return join(";",keys(%biosequence_accessions)); } # end searchExternal