#!/usr/local/bin/perl ############################################################################### # Program : EvalPepEvidence # Author : Terry Farrah # $Id: main.cgi 6972 2012-02-28 06:50:02Z dcampbel $ # # Description : Interface for manual evaluation of peptide evidence. # # SBEAMS is Copyright (C) 2000-2013 Institute for Systems Biology # This program is governed by the terms of the GNU General Public License (GPL) # version 2 as published by the Free Software Foundation. It is provided # WITHOUT ANY WARRANTY. See the full description of GPL terms in the # LICENSE file distributed with this software. # ############################################################################### ############################################################################### # Get the script set up with everything it will need ############################################################################### use strict; use vars qw ($q $sbeams $sbeamsMOD $PROG_NAME $current_contact_id $current_username); use lib qw (../../lib/perl); use CGI::Carp qw(fatalsToBrowser croak); use FindBin; use lib "$FindBin::Bin/../../lib/perl"; use SBEAMS::Connection qw($q $log); use SBEAMS::Connection::DataTable; use SBEAMS::Connection::Settings; use SBEAMS::Connection::Tables; use SBEAMS::Connection::TabMenu; use SBEAMS::PeptideAtlas; use SBEAMS::PeptideAtlas::Settings; use SBEAMS::PeptideAtlas::Tables; $sbeams = new SBEAMS::Connection; $sbeamsMOD = new SBEAMS::PeptideAtlas; $sbeamsMOD->setSBEAMS($sbeams); ############################################################################### # Global Variables ############################################################################### $PROG_NAME = 'EvalPepEvidence'; main(); ############################################################################### # Main Program: # # Call $sbeams->Authentication and stop immediately if authentication # fails else continue. ############################################################################### sub main { #### Do the SBEAMS authentication and exit if a username is not returned exit unless ($current_username = $sbeams->Authenticate( permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin', 'PeptideAtlas_readonly', 'PeptideAtlas_exec'], #connect_read_only=>1, allow_anonymous_access=>1, )); #### Read in the default input parameters my %parameters; my $n_params_found = $sbeams->parse_input_parameters( q=>$q, parameters_ref=>\%parameters ); if ( $parameters{reset_id} && $parameters{reset_id} eq 'true' ) { $sbeamsMOD->clearBuildSettings(); } ## get project_id to send to HTMLPrinter display my $project_id = $sbeamsMOD->getProjectID( atlas_build_name => $parameters{atlas_build_name}, atlas_build_id => $parameters{atlas_build_id} ); #### Process generic "state" parameters before we start $sbeams->processStandardParameters(parameters_ref=>\%parameters); # $sbeams->printCGIParams($q); #### Decide what action to take based on information so far if ($parameters{action} eq "???") { # Some action } else { $sbeamsMOD->display_page_header(project_id => $project_id); handle_request(ref_parameters=>\%parameters); $sbeamsMOD->display_page_footer(); } } # end main ############################################################################### # Show the main welcome page ############################################################################### sub handle_request { my %args = @_; #### Process the arguments list my $ref_parameters = $args{'ref_parameters'} || die "ref_parameters not passed"; my %parameters = %{$ref_parameters}; #### Get the current atlas_build_id based on parameters or session my $atlas_build_id = $sbeamsMOD->getCurrentAtlasBuildID( parameters_ref => \%parameters, ); if (defined($atlas_build_id) && $atlas_build_id < 0) { #### Don't return. Let the user pick from a valid one. #return; } #### Get the HTML to display the tabs my $tabMenu = $sbeamsMOD->getTabMenu( parameters_ref => \%parameters, program_name => $PROG_NAME, ); if ($sbeams->output_mode() eq 'html') { print "
\n"; print $tabMenu->asHTML() if ($sbeams->output_mode() eq 'html'); print "
\n"; } #### Read in the standard form values my $apply_action = $parameters{'action'} || $parameters{'apply_action'}; my $TABLE_NAME = $parameters{'QUERY_NAME'}; #### Set some specific settings for this program my $PROGRAM_FILE_NAME = $PROG_NAME; my $base_url = "$CGI_BASE_DIR/$SBEAMS_SUBDIR/$PROGRAM_FILE_NAME"; my $help_url = "$CGI_BASE_DIR/help_popup.cgi"; #### Get a list of accessible project_ids my @accessible_project_ids = $sbeams->getAccessibleProjects(); my $accessible_project_ids = join( ",", @accessible_project_ids ) || '0'; my $ATLAS_BUILD_ID = 393; my $biosequence_id = $parameters{'biosequence_id'}; #### Get a list of putative peptides mapping to a putative protein my $sql = qq~ select bs.biosequence_name, p.peptide_sequence, ppep.spectrum_annotation_level_id, ppep.added_to_nextprot, ppep.added_to_nextprot, count(distinct si.spectrum_identification_id) as n_obs, count(distinct mpis.sample_id), count(distinct pia.peptide_instance_annotation_id), avg(sal.level_probability) from $TBAT_PUTATIVE_PROTEIN pprot join $TBAT_BIOSEQUENCE bs on bs.biosequence_id = pprot.biosequence_id join $TBAT_PUTATIVE_PROTEIN_PEPTIDE ppp on ppp.putative_protein_id = pprot.putative_protein_id join $TBAT_PEPTIDE p on p.peptide_id = ppp.peptide_id join $TBAT_PUTATIVE_PEPTIDE ppep on ppep.peptide_id = p.peptide_id join $TBAT_PEPTIDE_INSTANCE pi on pi.peptide_id = p.peptide_id and pi.atlas_build_id = $ATLAS_BUILD_ID left join $TBAT_PEPTIDE_INSTANCE_ANNOTATION pia on pia.peptide_instance_id = pi.peptide_instance_id and pia.record_status = 'N' left join $TBAT_SPECTRUM_ANNOTATION_LEVEL sal on sal.spectrum_annotation_level_id = pia.spectrum_annotation_level_id join $TBAT_MODIFIED_PEPTIDE_INSTANCE mpi on mpi.peptide_instance_id = pi.peptide_instance_id join $TBAT_MODIFIED_PEPTIDE_INSTANCE_SAMPLE mpis on mpis.modified_peptide_instance_id = mpi.modified_peptide_instance_id join $TBAT_SPECTRUM_IDENTIFICATION si on si.modified_peptide_instance_id = mpi.modified_peptide_instance_id where pprot.biosequence_id = '$biosequence_id' group by bs.biosequence_name, p.peptide_sequence, ppep.spectrum_annotation_level_id, ppep.added_to_nextprot order by p.peptide_sequence ~; my @results = $sbeams->selectSeveralColumns($sql); # Extract pepseqs from results and store in array & hash my %unique_peps_hash = (); my @unique_peps_array = (); foreach my $result_aref ( @results ) { my $pepseq = $result_aref->[1]; push @unique_peps_array, $pepseq; $unique_peps_hash{$pepseq} = 1; } my $n_unique_peps = scalar @unique_peps_array; ### TODO : break out subroutines eval_sample_congruence and cluster_peptides. ### issue: variable scoping. Why can't I just cut/paste the code below ### into the subroutines and have it work? e.g. $biosequence_id undefined. my %n_matching_samples; #eval_sample_congruence(); my %sample_ids_for_other_peps; my %sample_ids_by_unique_peps; # Get all pepseqs that map to this protein my $sql = qq~ select p.peptide_sequence from $TBAT_PEPTIDE_MAPPING pm join $TBAT_PEPTIDE_INSTANCE pi on pi.peptide_instance_id = pm.peptide_instance_id join $TBAT_PEPTIDE p on p.peptide_id = pi.peptide_id where pm.matched_biosequence_id = '$biosequence_id' and pi.atlas_build_id = $ATLAS_BUILD_ID ~; my @all_peps = $sbeams->selectOneColumn($sql); my $n_all_peps = scalar @all_peps; my $n_other_peps = $n_all_peps - $n_unique_peps; # For each pep that maps to this protein for my $pep (sort @all_peps) { # Get its sample_ids my $sql = qq~ select mpis.sample_id from $TBAT_PEPTIDE p join $TBAT_PEPTIDE_INSTANCE pi on pi.peptide_id = p.peptide_id and pi.atlas_build_id = $ATLAS_BUILD_ID join $TBAT_MODIFIED_PEPTIDE_INSTANCE mpi on mpi.peptide_instance_id = pi.peptide_instance_id join $TBAT_MODIFIED_PEPTIDE_INSTANCE_SAMPLE mpis on mpis.modified_peptide_instance_id = mpi.modified_peptide_instance_id where p.peptide_sequence = '$pep' ~; my @sample_ids = $sbeams->selectOneColumn($sql); # Store sample_ids according to whether pep is seen # in Swiss-Prot or is a "unique" pep for my $id (@sample_ids) { if ( defined $unique_peps_hash{$pep} ) { $sample_ids_by_unique_peps{$pep}->{$id} = 1; } else { $sample_ids_for_other_peps{$id} = 1; } } } # Print sample_ids for unique & other peps: my $n_other_samples = scalar keys %sample_ids_for_other_peps; #print "Sample congruence:
\n"; print "$n_other_samples sample IDs for $n_other_peps other (mapping to Swiss-Prot) peps:
\n"; #-------------------------------------------------- # for my $sample_id (keys %sample_ids_for_other_peps) { # print "$sample_id "; # } # print "
\n"; #-------------------------------------------------- # For each unique pepseq for my $pep (@unique_peps_array) { my $nsamples = 0; my $nmatches = 0; # For each sample for my $sample_id (keys %{$sample_ids_by_unique_peps{$pep}}) { $nsamples++; # Is this sample seen for any of the other pepseqs? if (defined $sample_ids_for_other_peps{$sample_id}) { $nmatches++; } } $n_matching_samples{$pep} = $nmatches; #print "$pep $nmatches/$nsamples
\n"; } print "
\n"; my %clustering; my @clusters; #cluster_peptides(); my %overlap; for (my $i=0; $i < $n_unique_peps; $i++) { my $pep1 = $unique_peps_array[$i]; for (my $j=$i+1; $j < $n_unique_peps; $j++) { my $pep2 = $unique_peps_array[$j]; if (pep_overlap($pep1, $pep2)) { $overlap{$pep1}->{$pep2} = 1; $overlap{$pep2}->{$pep1} = 1; # printf "Overlap:
    %d: ". # "$unique_peps_array[$i]
    %d: ". # "$unique_peps_array[$j]
\n", $i+1, $j+1; } } } my $n_clusters=0; for my $pep (@unique_peps_array) { my $cluster_number; if (defined $clustering{$pep}) { $cluster_number = $clustering{$pep}; } else { $cluster_number = $n_clusters; $clustering{$pep} = $cluster_number; $n_clusters++; } for my $pep2 (keys %{$overlap{$pep}}) { if (defined $clustering{$pep2} ) { my $cluster_number_2 = $clustering{$pep2}; # merge clusters if ($cluster_number_2 != $cluster_number) { for my $pep3 (@unique_peps_array) { if ((defined $clustering{$pep3}) && $clustering{$pep3} == $cluster_number_2) { $clustering{$pep3} = $cluster_number; } } } } else { $clustering{$pep2} = $cluster_number; } } } for my $pep (@unique_peps_array) { $clusters[$clustering{$pep}]->{$pep} = 1; } #### If the output_mode is HTML, then display the form if ($sbeams->output_mode() eq 'html') { print "Go to EvalProtEvidence page
 
\n"; my $table = SBEAMS::Connection::DataTable->new(); my $rows = $table->getRowNum(); # Can add Comments back once I change their datatype from text to # varchar in the schema! need to add back to SELECT and GROUP BY # as well. $table->addRow( [ 'Biosequence', 'PepSeq', 'Cluster', 'Status', 'in NP?', 'Comments', 'N Obs', 'Samples', 'Matching', 'N User Evals', 'Avg User Eval'] ); $table->setRowAttr( COLS => [1..11], ROWS => [1], BGCOLOR => '#bbbbbb', ALIGN=>'CENTER' ); $table->setHeaderAttr( BOLD => 1 ); foreach my $result_aref ( @results ) { my @row; my @trinfo; my $bgcolor = '#dddddd'; $row[0] =<<" END"; [0]&action=GO&apply_action=QUERY&ann=1> $result_aref->[0] END $row[1] =<<" END"; [1]&action=GO&apply_action=QUERY&ann=1> $result_aref->[1] END my $pepseq = $result_aref->[1]; my $cluster_number = $clustering{$pepseq}; my $n_members = scalar keys %{$clusters[$cluster_number]}; $row[2] = ($n_members > 1) ? $cluster_number+1 : ''; $row[3] = $result_aref->[2]; $row[4] = $result_aref->[3]; #$row[4] = $sbeams->truncateStringWithMouseover( string => $result_aref->[4], len => 50 ); $row[6] = $result_aref->[5]; $row[7] = $result_aref->[6]; $row[8] = $n_matching_samples{$pepseq}; $row[9] = $result_aref->[7]; $row[10] = sprintf "%0.2f", $result_aref->[8]; $table->addRow( \@row ); $rows = $table->getRowNum(); $table->setRowAttr( COLS => [1..11], ROWS => [$rows], BGCOLOR => $bgcolor, @trinfo ); } $table->setColAttr( COLS => [1..11], ROWS => [1..$rows], NOWRAP => 1 ); $table->setColAttr( COLS => [3], ROWS => [1..$rows], ALIGN => 'RIGHT' ); $table->setColAttr( COLS => [4,5], ROWS => [1..$rows], ALIGN => 'CENTER' ); print "$table"; print $q->hidden( "apply_action", ''); print $q->end_form; } } # end showMainPage ########################################################################### # eval_sample_congruence -- For each peptide, of all the samples it's seen # in, how many are samples in which other peptides for this protein, # mapping to Swiss-Prot, have been seen? Non-zero is good. ########################################################################### #sub eval_sample_congruence { #} ########################################################################### # cluster_peptides -- cluster peptides that share 5 contiguous residues ########################################################################### #sub cluster_peptides { #} ########################################################################### # pep_overlap # Simple-minded function to return whether two peptides overlap each other. # Take the first 5 residues and last 5 residues of each, and see if those # appear anywhere in the other. ########################################################################### sub pep_overlap { my $pep1 = shift; my $pep2 = shift; #print "pep_overlap |$pep1| |$pep2|
\n"; my $first_five_residues_1 = substr($pep1, 0, 5); my $last_five_residues_1 = substr($pep1, length($pep1)-5, 5); #print "$pep1 $first_five_residues_1 $last_five_residues_1
\n"; my $first_five_residues_2 = substr($pep2, 0, 5); my $last_five_residues_2 = substr($pep2, length($pep2)-5, 5); #print "$pep2 $first_five_residues_2 $last_five_residues_2
\n"; return (($pep1 =~ /$first_five_residues_2/ ) || ($pep1 =~ /$last_five_residues_2/ ) || ($pep2 =~ /$first_five_residues_1/ ) || ($pep2 =~ /$last_five_residues_1/ )); }