#!/usr/local/bin/perl ############################################################################### # $Id$ # # SBEAMS is Copyright (C) 2000-2021 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. ############################################################################### ############################################################################### # Get the script set up with everything it will need ############################################################################### use strict; use CGI::Carp qw(fatalsToBrowser croak); use Data::Dumper; use lib qw (../../lib/perl); use vars qw ($q $sbeams $sbeamsMOD $PROG_NAME $current_contact_id $current_username $glyco_query_o); use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::DataTable; use SBEAMS::BioLink::Tables; use SBEAMS::BioLink::MSF; use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; use SBEAMS::Connection::TabMenu; ############################################################################### # Global Variables ############################################################################### # $sbeams = new SBEAMS::Connection; $sbeamsMOD = new SBEAMS::PeptideAtlas; $sbeamsMOD->setSBEAMS($sbeams); $sbeams->setSBEAMS_SUBDIR($SBEAMS_SUBDIR); { # Main # Authenticate or exit $current_username = $sbeams->Authenticate(allow_anonymous_access=>1) || exit; #### Read in the default input parameters my %params; $sbeams->parse_input_parameters( q=>$q, parameters_ref=>\%params ); $sbeams->processStandardParameters(parameters_ref=>\%params); ## get project_id to send to HTMLPrinter display my $project_id = $sbeams->getCurrent_project_id(); my $page = $sbeams->getGifSpacer( 800 ) . "
\n"; my $tabMenu = $sbeamsMOD->getTabMenu( parameters_ref => \%params, program_name => 'viewOrthologs', ); $page .= "$tabMenu"; $sbeamsMOD->display_page_header(project_id => $project_id); $log->debug( "Begin page:" .time() ); # Use default if requested if ( $params{use_default} ) { $params{group_id} = 'OG5_126932'; } if ( $params{group_id} ) { $page .= get_ortholog_display( %params ); } elsif ( $params{list_groups} ) { $page .= get_ortholog_list( %params ); } else { $page .= $sbeams->makeErrorText( "Missing required parameter group_id" ); } $log->debug( "Page done:" .time() ); # Display page print $page; $sbeamsMOD->display_page_footer(); } # end main sub get_ortholog_display { my %args = @_; # Content scalar to return my $content = ''; # check params for my $arg ( qw( group_id ) ) { unless ( $args{$arg} ) { $content .= $sbeams->makeErrorText("Missing required parameter: $arg
"); return $content; } } # Highlight if available. $args{entry_accession} ||= ''; # limit by bioseq_id? my $bioseq_clause = ''; if( $args{restore} ) { $log->info( "Restoring" ); $args{bioseq_id} = ''; } elsif( $args{bioseq_id} && !$args{search_groups} ) { $bioseq_clause = "AND B.biosequence_id IN ( $args{bioseq_id} )"; } my $all_projects = join( ",", $sbeams->getAccessibleProjects() ); my $curr_bid = $sbeamsMOD->getCurrentAtlasBuildID( parameters_ref => \%args ); # my $null_clause = ( $args{show_all} ) ? 'OR search_key_type IS NULL' : ''; # SQL to fetch ortholog groups and bioseqs in them. my $sql =<<" END_SQL"; SELECT DISTINCT ortholog_group, biosequence_name, organism_name, search_key_name, CAST( biosequence_seq AS VARCHAR(4000) ), biosequence_id, B.biosequence_set_id, ORG.organism_id FROM biolink.dbo.ortholog O JOIN $TBAT_BIOSEQUENCE B ON O.entry_accession = B.biosequence_name JOIN $TBAT_ATLAS_BUILD AB ON AB.biosequence_set_id = B.biosequence_set_id JOIN $TB_ORGANISM ORG ON O.ortholog_organism_id = ORG.organism_id LEFT JOIN $TBAT_SEARCH_KEY SK ON SK.resource_name = B.biosequence_name WHERE ortholog_group = '$args{group_id}' -- AND AB.project_id IN ( $all_projects ) AND ( search_key_type = 'Full Name' OR search_key_type = 'Description' OR search_key_type LIKE '%Symbol' OR search_key_type LIKE 'Annotation ID' OR search_key_type IS NULL OR search_key_type = 'RefSeq' OR search_key_type = 'ORF Name' ) $bioseq_clause -- AND ORG.organism_id = 38 ORDER BY organism_name, B.biosequence_name ASC END_SQL # 0 ortholog_group, # 1 biosequence_name, # 2 organism_name, # 3 search_key_name, # 4 CAST( biosequence_seq AS VARCHAR(4000) ), # 5 biosequence_id # 6 biosequence_set_id # 7 organism_id $log->debug( "prepare stmt: $sql " . time() ); my $sth = $sbeams->get_statement_handle( $sql ); $log->debug( "prepare stmt done: " .time() ); # array of protein/ortholog info my @all_proteins; # hash of biosequence_ids -> seq or name my %bioseq_id2seq; my %bioseq_id2name; # hash seq <=> accession my %seq2acc; my %acc2seq; # Store (degenerate) acc -> bioseq_id my %acc2bioseq_id; # Store organism for each biosequence set my %bss2org; my %organisms; # Counter my $cnt = 0; while ( my @row = $sth->fetchrow_array() ) { $organisms{$row[7]}++; my $seq = $row[4]; $seq =~ s/[^a-zA-Z]//g; $bss2org{$row[6]} ||= $row[2]; # Check this out later for dups... $seq2acc{$seq} ||= {}; $seq2acc{$seq}->{$row[1]}++; $bioseq_id2seq{$row[5]} = $row[4]; $bioseq_id2name{$row[5]} = $row[1]; $acc2bioseq_id{$row[1]} ||= []; push @{$acc2bioseq_id{$row[1]}}, $row[5]; # Cache first seq for each accession, and push onto display unless ( $acc2seq{$row[1]} ) { $acc2seq{$row[1]} = $seq; # push row info for display push @all_proteins, \@row; } $cnt++; } $log->debug( "Iterated $cnt rows: " .time() ); # not quite working yet! my %dup_seqs; my $dup_char = 'A'; # for my $seq ( keys( %seq2acc ) ) { for my $prot ( @all_proteins ) { my $seq = $prot->[4]; # $log->debug( "SEQ is $seq" ); next unless scalar(keys(%{$seq2acc{$seq}})) > 1; for my $acc ( keys ( %{$seq2acc{$seq}} ) ) { next if $dup_seqs{$acc}; $dup_seqs{$acc} = $dup_char; } $dup_char++; } # Get stringified version for IN clause. my $bioseq_ids = join( ",", sort{ $a <=> $b } keys( %bioseq_id2name ) ); if ( !$bioseq_ids ) { return "

makeInfoText( "No matching sequences found for ortholog group $args{group_id}" ); } my $organisms = join( ',', keys( %organisms ) ); my $default_builds = get_default_builds( $all_projects, $organisms ); # We will get just one build per organism for now #Punt # my $accessible_builds = get_accessible_builds( $all_projects, $organisms ); # Fixed order list of builds - first use default builds, then any # accessible build my @build_ids = sort { $default_builds->{$a} cmp $default_builds->{$b} } keys( %{$default_builds} ); # push @build_ids, sort { $accessible_builds->{$a} cmp $accessible_builds->{$b} } keys( %{$accessible_builds} ); my %bseen; my $build_str; for my $id ( @build_ids ) { next if $bseen{$id}++; my $sep = ( $build_str ) ? ',' : ''; $build_str .= $sep . $id; } # hash of info keyed by build_id my %build_seqs; # To be passed to checkbox list routine. my %build_info; my %view_org2build; my %view_build2org; # Skip non-default builds, this var controls debug stmt my %skippy; if ( $build_str ) { # Define SQL to fetch info about builds in which a peptide has been observed my $sql =<<" ENDSQL"; SELECT distinct PI.peptide_instance_id, n_observations, PM.matched_biosequence_id, PI.atlas_build_id, atlas_build_name, peptide_sequence, AB.biosequence_set_id -- AB.biosequence_set_id, -- O.organism_name FROM $TBAT_PEPTIDE_MAPPING PM JOIN $TBAT_PEPTIDE_INSTANCE PI ON PI.peptide_instance_id = PM.peptide_instance_id JOIN $TBAT_PEPTIDE P ON ( PI.peptide_id = P.peptide_id ) JOIN $TBAT_ATLAS_BUILD AB ON ( PI.atlas_build_id = AB.atlas_build_id ) -- JOIN $TBAT_BIOSEQUENCE_SET BS ON ( AB.biosequence_set_id = BS.biosequence_set_id ) -- JOIN $TB_ORGANISM O ON ( O.organism_id = BS.organism_id ) WHERE PM.matched_biosequence_id IN ( $bioseq_ids ) AND AB.atlas_build_id IN ( $build_str ) ORDER BY PI.atlas_build_id DESC -- ORDER BY organism_name ASC, PI.atlas_build_id DESC ENDSQL $log->debug( "prepare instance stmt: " . time() ); # $log->debug( $sql ); my $sth = $sbeams->get_statement_handle( $sql ); $log->debug( "prepare instance stmt done: " .time() ); # 0 count(distinct PI.peptide_id), # 1 sum( n_observations ), # 2 PM.matched_biosequence_id, # 3 PI.atlas_build_id, # 4 atlas_build_name, # 5 peptide_sequence # 6 fizzle # 7 biosequence_set_id # 0 new PI.peptide_instance_id, # 1 new n_observations, # 2 new PM.matched_biosequence_id, # 3 new PI.atlas_build_id, # 4 new atlas_build_name, # 5 new peptide_sequence, # 6 new biosequence_set_id, $cnt = 0; # Loop to calculate coverate, per prot n_obs for each build $log->debug( "Iterate, calc coverage: " .time() ); while ( my @row = $sth->fetchrow_array() ) { # Effectively eliminate non-default builds # if ( !$default_builds->{$row[3]} ) { # $log->debug( "skipping $row[3] 'cause its not default" ) if !$skippy{$row[3]}++; # next; # } my $org = $bss2org{$row[6]}; $build_info{$row[3]} ||= {}; $build_info{$row[3]}->{name} = $row[4]; $build_info{$row[3]}->{org} = $org; $build_info{$row[3]}->{is_default} = ( $row[6] ) ? 'yes' : 'no'; $build_info{$row[3]}->{is_curr} = ( $row[3] == $curr_bid ) ? 1 : 0; $build_info{$row[3]}->{bss} = $row[7]; # Simplistic initial view mechanism if ( !$view_org2build{$org} ) { $build_info{$row[3]}->{visible}++; $build_info{$row[3]}->{view}++; $view_org2build{$org} = $row[3]; $view_build2org{$row[3]} = $org; # $log->debug( "View build for $org is $row[3] ( $row[4] )" ); } elsif ( $row[3] == $curr_bid ) { $build_info{$row[3]}->{visible}++; $build_info{$row[3]}->{view}++; # Gotta unset the build my $deprecated = $view_org2build{$org}; # $log->debug( "Replacing info from $build_info{$deprecated}->{name} with info from $build_info{$row[3]}->{name}, 'cause its current!" ); $build_info{$deprecated}->{visible} = 0; $build_info{$deprecated}->{view} = 0; delete $view_build2org{$deprecated}; $view_org2build{$org} = $row[3]; $view_build2org{$row[3]} = $org; } # error cases, shouldn't happen? next unless $row[0]; # if ( $row[0] > 1 ) { # $log->warn( "count peptide_id > 1" ); # $log->warn( join( ", ", @row ) ); # } $build_seqs{$row[3]} ||= {}; $build_seqs{$row[3]}->{$row[2]} ||= {}; $build_seqs{$row[3]}->{$row[2]}->{count}++; $build_seqs{$row[3]}->{$row[2]}->{n_obs} += $row[1]; # Calc coverage for this build/protein combination $build_seqs{$row[3]}->{$row[2]}->{coverage} ||= {}; my $posn = $sbeamsMOD->get_site_positions( pattern => $row[5], seq => $bioseq_id2seq{$row[2]} ); for my $p ( @$posn ) { for ( my $i = 0; $i < length($row[5]); $i++ ){ my $covered_posn = $p + $i; $build_seqs{$row[3]}->{$row[2]}->{coverage}->{$covered_posn}++; } } # Going to cache with accession for ease of use! $build_seqs{$row[3]}->{$bioseq_id2name{$row[2]}}->{coverage} = $build_seqs{$row[3]}->{$row[2]}->{coverage}; $build_seqs{$row[3]}->{$bioseq_id2name{$row[2]}}->{bioseq} = $row[2]; $cnt++; } $log->debug( "Iterated $cnt rows: " .time() ); } # Table headings my @headings = ( 'Accession', 'Organism' ); # Index of build col in display row, 2 already added above my $b_cnt = 2; my %build_posn; $log->debug( "Iterate, prepare display: " .time() ); # Seen builds skipped, allows us to use composite of default and accessible. my %seen; for my $build_id ( @build_ids ) { next if $seen{$build_id}++; $build_posn{$b_cnt++} = $build_id; my $dag = ''; my $display_name = $build_info{$build_id}->{name}; # $display_name =~ s/Yeast|Mouse|Human|Drosophila//; $display_name =~ s/Peptide//; $display_name =~ s/Atlas//; $display_name =~ s/P\s*>*0.\d//; $display_name =~ s/^_//; $display_name =~ s/_$//; $build_info{$build_id}->{display_name} = $display_name; if ( $view_build2org{$build_id} ) { $dag = '^†'; } push @headings, "$display_name $dag"; } push @headings, 'Protein Name/Description'; $log->debug( "Prepare display complete: " .time() ); # Set up display table my $table = SBEAMS::Connection::DataTable->new( BORDER => 0, HEADER => 1 ); $table->addRow( \@headings ); $table->setRowAttr( ROWS => [1], BGCOLOR => '#002664' ); $table->setHeaderAttr( WHITE_TEXT => 1, BOLD => 1 ); my $current; my %subject; my $bgcolor = '#f3f1e4'; my $cnt = 0; my $fasta = ''; my %bss2color; my %bgcolor = ( 'consensus' => '#FFFFFF' ); my %acc_color = ( 'consensus' => '#FFFFFF' ); $log->debug( "Iterate, build table: " .time() ); for my $row ( @all_proteins ) { #Legacy... my @row = @{$row}; my $seq = $row[4]; # make fasta file in memory $fasta .= ">$row[1]\n"; $fasta .= "$seq\n"; my $acc_link = "$row[1]"; $row[3] = $sbeams->truncateStringWithMouseover( string => $row[3], len => 50 ); if ( $dup_seqs{$row[1]} ) { $acc_link .= "^{$dup_seqs{$row[1]}}"; } # $build_seqs{$row[3]}->{$row[2]}->{n_obs} += $row[1]; # $build_seqs{$row[3]}->{$row[2]}->{count} += $row[0]; $current ||= $row[2]; if ( $current ne $row[2] ) { # New organism $bgcolor = ( $bgcolor eq '#d3d1c4' ) ? '#f3f1e4' : '#d3d1c4'; $current = $row[2]; } $bgcolor{$row[1]} = $bgcolor; $acc_color{$row[1]} = '#FFFFFF'; my @table_row = ( $acc_link, $row[2] ); # my $b_cnt = 2; for my $build_id ( @build_ids ) { $build_info{$build_id}->{bgcolor} = $bgcolor; # $log->debug( "build $build_id is getting color $bgcolor!" ); # my $build_info = '*'; my $build_info = ' '; my $name = $build_info{$build_id}->{name}; for my $synon_bioseq_id ( @{$acc2bioseq_id{$row[1]}} ) { if ( $build_seqs{$build_id}->{$synon_bioseq_id} ) { my $cnt = $build_seqs{$build_id}->{$synon_bioseq_id}->{count}; my $obs = $build_seqs{$build_id}->{$synon_bioseq_id}->{n_obs}; my $link = ""; $build_info = " $link $obs ($cnt) "; } } push @table_row, $build_info; } push @table_row, $row[3]; $table->addRow( \@table_row ); $table->setRowAttr( ROWS => [$table->getRowNum()], BGCOLOR => $bgcolor, ); if ( $row[1] eq $args{entry_accession} ) { $subject{name} = $args{entry_accession}; $table->setColAttr( ROWS => [$table->getRowNum()], COLS => [1], BGCOLOR => '#FFFACD' ); $acc_color{$row[1]} = '#FFFACD'; } $cnt++; } $log->debug( "Build table done: " .time() ); $table->setRowAttr( ROWS => [1], ALIGN => 'CENTER' ); $table->setColAttr( ROWS => [1..$table->getRowNum()], COLS => [1..$table->getColNum()], NOWRAP => 1 ); $table->setColAttr( ROWS => [1..$table->getRowNum()], COLS => [3..$table->getColNum()-1], ALIGN => 'CENTER' ); # FIXME # $build_posn{$b_cnt++} = $build_id; # $view_org2build{$org} = $row[3]; # $view_build2org{$row[3]} = $org; $log->debug( "Iterate hide cols: " .time() ); for my $posn ( sort { $a <=> $b } keys %build_posn ) { if ( $view_build2org{$build_posn{$posn}} ) { $table->setColAttr( ROWS => [1..$table->getRowNum()], COLS => [$posn + 1], CLASS => 'tbl_visible', ID => $build_posn{$posn}, NAME => $build_posn{$posn}, ); } else { $table->setColAttr( ROWS => [1..$table->getRowNum()], COLS => [$posn + 1], CLASS => 'tbl_hidden', ID => $build_posn{$posn}, NAME => $build_posn{$posn}, ); } } $log->debug( "Hide cols complete: " .time() ); # finished my $MSF = SBEAMS::BioLink::MSF->new(); $log->debug( "Run alignment: " .time() ); my $clustal_display; my $alignment_overview = qq~ This display shows the protein sequences in the $args{group_id} group aligned using the Clustal algorithm, described below (click the Show Description link if necessary). Each sequence is highlighted to show which peptide(s) have been observed for that sequence in one of the builds for the corresponding organism. The builds used to highlight the sequences for a given organism are shown in the table above with a dagger symbol (^†) in the heading, and are diplayed in the table by default. ~; my $duplicate_text = ''; if ( scalar( keys( %dup_seqs ) ) ) { $duplicate_text = " Any sequence-identical proteins are denoted by a superscript letter."; } if ( $cnt > 1000 ) { $alignment_overview = ''; $clustal_display = $sbeams->makeErrorText( "Too many sequences to run alignment, skipping" ); } else { my $clustal = $MSF->runClustalW( sequences => $fasta ); if ( ref $clustal ne 'ARRAY' ) { $clustal_display = $sbeams->makeErrorText( "Error running Clustal: $clustal" ); } else { $clustal_display = get_clustal_display( alignments => $clustal, bgcolor => \%bgcolor, acc_color => \%acc_color, highlight_builds => \%view_build2org, build_seqs => \%build_seqs, acc2bioseq => \%acc2bioseq_id ); } } $log->debug( "ViewOrtho, fasta is " . length( $fasta ) . ", result is " . length( $clustal_display ) ); my $alignment_text = get_alignment_text(); $log->debug( "Run alignment Done: " .time() ); $log->debug( "Color build seqs: " .time() ); my $add_builds = get_add_builds_section( build_info => \%build_info ); $log->debug( "Color build seqs done: " .time() ); $log->debug( "Display " .time() ); my $spc = ' ' x 50; my $search_help = qq~

To search OrthoMCL groups by name, enter group name in text box. Use 'show_all' checkbox to show information for all group members, regardless of whether they are present in a Peptide Atlas build.

~; my @toggle = $sbeams->make_toggle_section( textlink => 1, imglink => 1, hidetext => "Hide Help", showtext => "Show Help", visible => 0, sticky => 1, name => 'search_orthologs_help', content => $search_help ); # my $search_groups = get_group_search_section( %args, help => "($toggle[1])" ); # $toggle[0] # $search_groups # Removed text # You can add information about additional builds by clicking the 'Add builds' link below and selecting the desired build(s). Builds used to highlight the multiple sequence alignment below are shown with a dagger symbol (^†). $duplicate_text if ( $table->getRowNum() == 1 ) { return $sbeams->makeInfoText( "Unable to find an OrthoMCL group for this protein" ); } $subject{name} ||= 'n/a'; my $group_link = qq~ OG5_126932 ~; # $add_builds return( <<" END" );

Cross-species comparison of OrthoMCL ortholog group $args{group_id}

The table below shows information about homologous(ortholog/paralog) proteins in the $group_link ortholog group. The groups were computed by OrthoMCL version 5, and are based on BLAST homology. The numbers shown in the columns under each build are the total number of observations of peptides that can be mapped to a given protein, and the total number of distinct peptides seen: total observations ( number distinct peptides ).

$table

Homologous Sequence Alignment

$alignment_overview

$clustal_display

$alignment_text
END } sub get_default_builds { my $accessible_projects = shift; my $organism_str = shift; my $sql = qq~ SELECT DAB.atlas_build_id, organism_name FROM $TBAT_DEFAULT_ATLAS_BUILD DAB JOIN $TBAT_ATLAS_BUILD AB ON AB.atlas_build_id = DAB.atlas_build_id JOIN $TB_ORGANISM O ON O.organism_id = DAB.organism_id WHERE AB.record_status = 'N' AND AB.project_id IN ( $accessible_projects ) AND DAB.organism_id IN ( $organism_str ) ORDER BY organism_specialized_build ASC, O.organism_id ASC ~; my $sth = $sbeams->get_statement_handle( $sql ); my %mapping; my %organism; while ( my @row = $sth->fetchrow_array() ) { next if $organism{$row[1]}++; # Just one per organism $mapping{$row[0]} = $row[1]; } # die Dumper %mapping; return \%mapping; } sub get_accessible_builds { my $accessible_projects = shift; my $organism_str = shift; my $sql = qq~ SELECT AB.atlas_build_id, DAB.organism_id FROM $TBAT_DEFAULT_ATLAS_BUILD DAB JOIN $TBAT_ATLAS_BUILD AB ON AB.atlas_build_id = DAB.atlas_build_id WHERE DAB.record_status = 'N' AND DAB.organism_id IN ( $organism_str ) AND AB.project_id IN ( $accessible_projects ) ORDER BY DAB.organism_id ASC, AB.atlas_build_id DESC ~; my $max_builds = 3; my $sth = $sbeams->get_statement_handle( $sql ); my %mapping; my %organism; while ( my @row = $sth->fetchrow_array() ) { next if $organism{$row[1]}++ >= $max_builds; # Just $max_builds per organism $mapping{$row[0]} = $row[1]; } return \%mapping; } sub get_alignment_text { my $content =<<" END";

  Alignment was created by the ClustalW program, which is maintained at the Conway Institute UCD Dublin.  
  The alignment considers physical properties of the amino acids in the protein sequence, and the 
  program was invoked using the following command-line:

   clustalw -tree -align -outorder=input -infile=clustal_file

  The consensus 'sequence' uses symbols to represent the level of conservation of amino acids at any given 
  position.  The text below, adapted from the clustal documentation, describes the various symbols used.

    CONSENSUS SYMBOLS:

       An alignment will display by default the following symbols denoting the degree of conservation observed in each column:

      "*" means that the residues or nucleotides in that column are identical in all sequences in the alignment.

      ":" means that conserved substitutions have been observed

      "." means that semi-conserved substitutions are observed. 

      " " means that substitutions are not conservative.

END my @toggle = $sbeams->make_toggle_section( textlink => 1, imglink => 1, hidetext => "Hide Description", showtext => "Show ClustalW Description", visible => 1, sticky => 1, name => 'viewOrthologs_description', content => $content ); return " $toggle[1]
$toggle[0]"; } sub get_group_search_section { my %args = @_; return '' unless $args{group_id}; my $checked = ( $args{show_all} ) ? 'checked' : ''; my $help = $args{help} || ''; my $group_html = qq~

Group Name:

Show All:

$help

~; return $group_html; } sub get_add_builds_section { my %args = @_; my $checklist = $sbeamsMOD->get_atlas_checklist( %args, js_call => 'toggle_view' ); my $js = qq~ ~; my $content =<<" END"; $js

			   $checklist

END my @toggle = $sbeams->make_toggle_section( textlink => 1, imglink => 1, hidetext => "Hide Builds", showtext => "Add Builds", visible => 0, sticky => 1, name => 'viewOrthologs_addbuilds', content => $content ); return "$toggle[1]
$toggle[0]"; } sub get_clustal_display { # Passed named args # alignments => $clustal, # ref to array of arrayrefs of acc, seq # bgcolor => \%bgcolor, # ref to hash of acc => bgcolor for seq # acc_color => \%acc_color, # ref to hash of acc => bgcolor for acc # highlight_builds => $highlight # ref to hash of build_ids to color with # build_seqs => \%build_seqs # ref to hash of per build seq attrs # acc2bioseq => \%build_seqs # ref to hash of per build seq attrs # $build_seqs{$row[3]}->{$row[2]}->{coverage}->{$covered_posn}++; my %args = @_; my $display = qq~

~; for my $seq ( @{$args{alignments}} ) { # $log->debug( "After passing we have $seq->[0] and $seq->[1]!" ); my $sequence = $seq->[1]; if ( $seq->[0] eq 'consensus' ) { # $log->debug( "Consensus!@!!!!!" ); $sequence =~ s/ / /g } else { # $log->debug( "$seq->[0] isn't consensus, commence to highlighting $sequence" ); $sequence = highlight_sites( seq => $sequence, acc => $seq->[0], %args ); } my $checkbox = ''; unless ( $seq->[0] eq 'consensus' ) { my $acc_str = join( ',', sort { $a <=> $b } ( @{$args{acc2bioseq}->{$seq->[0]}} ) ); $checkbox = ""; } # $build_seqs{$row[3]}->{$bioseq_id2name{$row[2]}}->{bioseq} = $row[2]; $display .= "\n"; } my $toggle_checkbox = $sbeams->get_checkbox_toggle( controller_name => 'alignment_chk', checkbox_name => 'bioseq_id' ); my $toggle_text = $sbeams->makeInfoText( 'Toggle all checkboxes' ); $display .= "\n"; $display .= "

$checkbox	{$seq->[0]} ALIGN=right class=sequence_font>$seq->[0]:	{$seq->[0]}>$sequence
$toggle_checkbox	$toggle_text

\n"; return $display; } sub highlight_sites { # highlight_builds => $highlight # ref to hash of build_ids to color with # build_seqs => \%build_seqs # ref to hash of per build seq attrs # seq => $sequence # sequence to be processed, a la --A--AB- # acc => $accession # accession of seq to be processed my %args = @_; my $coverage; for my $build ( keys( %{$args{highlight_builds}} ) ) { if ( $args{build_seqs}->{$build} && $args{build_seqs}->{$build}->{$args{acc}} ) { $coverage = $args{build_seqs}->{$build}->{$args{acc}}->{coverage}; last; } } return $args{seq} unless $coverage; my @aa = split( '', $args{seq} ); my $return_seq = ''; my $cnt = 0; my $in_coverage = 0; my $span_closed = 1; for my $aa ( @aa ) { if ( $aa eq '-' ) { if ( $in_coverage && !$span_closed ) { $return_seq .= "$aa"; $span_closed++; } else { $return_seq .= $aa; } } else { # it is an amino acid if ( $coverage->{$cnt} ) { if ( $in_coverage ) { # already in if ( $span_closed ) { # Must have been jumping a --- gap $span_closed = 0; $return_seq .= "$aa"; } else { $return_seq .= $aa; } } else { $in_coverage++; $span_closed = 0; $return_seq .= "$aa"; } } else { # posn not covered! if ( $in_coverage ) { # were in, close now $return_seq .= "$aa"; $in_coverage = 0; $span_closed++; } else { $return_seq .= $aa; } } $cnt++; } } if ( $in_coverage && !$span_closed ) { $return_seq .= ''; } return $return_seq; } __DATA__