#!/usr/local/bin/perl
###############################################################################
# Program : ShowObservedSpectrum
#
# Description : This CGI program displays a single spectrum in PeptideAtlas
#
# Based upon the ShowSpectrum.cgi in the Proteomics module by
# Kerry & Eric Deutsch
#
###############################################################################
###############################################################################
# Basic SBEAMS setup
###############################################################################
use strict;
use FindBin;
use lib "$FindBin::Bin/../../lib/perl";
use vars qw ($q $sbeams $sbeamsMOD $PROG_NAME
$current_username $massCalculator $TESTONLY $VERBOSE );
use SBEAMS::Connection qw($q);
use SBEAMS::Connection::Settings;
use SBEAMS::Connection::Tables;
use SBEAMS::PeptideAtlas;
use SBEAMS::PeptideAtlas::Settings;
use SBEAMS::PeptideAtlas::Tables;
use SBEAMS::PeptideAtlas::PeptideFragmenter;
use SBEAMS::Proteomics::Tables;
use SBEAMS::Proteomics::PeptideMassCalculator;
use SBEAMS::Proteomics::SpecViewer;
use SBEAMS::PeptideAtlas::ProtInfo;
use File::Basename;
use Data::Dumper;
#use lib "/net/db/projects/Proteomics/lib";
use lib "/net/db/projects/Proteomics/devED/lib";
use Proteomics::Spectra::UniversalSpectrumIdentifier;
use Proteomics::Response qw(processParameters);
use Proteomics::Config;
use Proteomics::Spectra::Spectrum;
use Proteomics::Sequence::Peptide;
use Carp;
$SIG{__DIE__} = sub { &Carp::confess };
use CGI::Carp qw (fatalsToBrowser);
#$q = new CGI;
$sbeams = new SBEAMS::Connection;
$sbeamsMOD = new SBEAMS::PeptideAtlas;
$sbeamsMOD->setSBEAMS($sbeams);
$PROG_NAME="ShowObservedSpectrum";
my $CLASS = $PROG_NAME;
my $DEBUG = 0;
my $VERBOSE = 0; #### Set this to 1 to trace problems in spectrum finding
my $output_mode = 'HTML';
# Allowable neutral losses
my @neutral_loss_masses = ();
###############################################################################
# Define global variables if any and execute main()
###############################################################################
main();
###############################################################################
# Main Program:
#
# If $sbeams->Authenticate() succeeds, print header, process the CGI request,
# print the footer, and end.
###############################################################################
sub main {
#### Do the SBEAMS authentication and exit if a username is not returned
exit unless ($current_username = $sbeams->Authenticate(
permitted_work_groups_ref=>['PeptideAtlas_user','PeptideAtlas_admin',
'PeptideAtlas_readonly', 'PeptideAtlas_exec'],
#connect_read_only=>1,
allow_anonymous_access=>1,
));
#### Process certain actions, then print the header, figure and do what the user wants, and print footer
processDatabaseActions();
$sbeamsMOD->display_page_header();
processRequest();
$sbeamsMOD->display_page_footer();
$sbeams->display_page_footer(close_tables=>'YES',
separator_bar=>'YES',display_footer=>'NO');
} # end main
###############################################################################
# Process insert/update/delete, if any, then redirect
###############################################################################
sub processDatabaseActions {
my $redirect = 0;
my %parameters;
$sbeams->parse_input_parameters(q=>$q,parameters_ref=>\%parameters);
$sbeams->processStandardParameters(parameters_ref=>\%parameters);
my $apply_action = $q->param('apply_action');
my $prot_info = new SBEAMS::PeptideAtlas::ProtInfo;
$TESTONLY = 0;
$prot_info->setSBEAMS($sbeams);
$prot_info->setVERBOSE($VERBOSE);
$prot_info->setTESTONLY($TESTONLY);
## get original value
my $orig_spectrum_annotation_level = 0;
if($parameters{spectrum_annotation_id}){
my $spectrum_annotation_id = $parameters{spectrum_annotation_id};
my $sql = qq~
SELECT max(spectrum_annotation_level_id)
FROM $TBAT_SPECTRUM_ANNOTATION
WHERE spectrum_annotation_id = $spectrum_annotation_id
~;
my @result = $sbeams->selectOneColumn($sql);
$orig_spectrum_annotation_level = $result[0];
}
if ($apply_action eq "UPDATE ANNOTATION") {
my %rowdata = (
spectrum_annotation_level_id => $parameters{user_spectrum_annotation},
comment => $parameters{user_spectrum_commment}
);
my $PK = $sbeams->updateOrInsertRow(
update => 1,
table_name => $TBAT_SPECTRUM_ANNOTATION,
rowdata_ref => \%rowdata,
PK => 'spectrum_annotation_id',
PK_value => $parameters{spectrum_annotation_id},
return_PK => 1,
add_audit_parameters => 1
);
if ($PK) {
my $action = '';
if ($orig_spectrum_annotation_level <= 2 && $parameters{user_spectrum_annotation} > 2 ){
$action = 'remove';
}
elsif($orig_spectrum_annotation_level > 2 && $parameters{user_spectrum_annotation} <= 2 ){
$action = 'add';
}
if($action){
$prot_info -> update_protInfo (
spectrum_annotation_id => $parameters{spectrum_annotation_id},
atlas_build_id=>$parameters{atlas_build_id},
action => $action);
}
$sbeams->set_page_message( type => 'Info', msg => "Your annotation record has been updated." );
}
else {
$sbeams->set_page_message( type => 'Error', msg => "ERROR: There was a problem updating your annotation record." );
}
$redirect++;
}
elsif ($apply_action eq "ADD ANNOTATION") {
my %rowdata = (
annotator_contact_id => $sbeams->getCurrent_contact_id(),
spectrum_identification_id => $parameters{spectrum_identification_id},
spectrum_id => $parameters{spectrum_id},
identified_peptide_sequence => $parameters{peptide},
identified_peptide_charge => $parameters{assumed_charge},
spectrum_annotation_level_id => $parameters{user_spectrum_annotation},
comment => $parameters{user_spectrum_commment}
);
my $PK = $sbeams->updateOrInsertRow(
insert => 1,
table_name => $TBAT_SPECTRUM_ANNOTATION,
rowdata_ref => \%rowdata,
return_PK => 1,
add_audit_parameters => 1
);
if ($PK) {
if ($parameters{user_spectrum_annotation} > 2 ){
$prot_info -> update_protInfo (
spectrum_annotation_id => $PK,
atlas_build_id=>$parameters{atlas_build_id},
action => 'remove',
);
}
$sbeams->set_page_message( type => 'Info', msg => "Your annotation record has been added." );
}
else {
$sbeams->set_page_message( type => 'Error', msg => "ERROR: There was a problem inserting your annotation record." );
}
$redirect++;
}
elsif ($apply_action eq "DELETE ANNOTATION") {
my %rowdata = (
record_status => 'D'
);
my $PK = $sbeams->updateOrInsertRow(
update => 1,
table_name => $TBAT_SPECTRUM_ANNOTATION,
rowdata_ref => \%rowdata,
PK => 'spectrum_annotation_id',
PK_value => $parameters{spectrum_annotation_id},
return_PK => 1,
add_audit_parameters => 1
);
if ($PK) {
## annotation has low value, need to add back obs
if ($orig_spectrum_annotation_level > 2){
$prot_info -> update_protInfo (
spectrum_annotation_id => $PK,
atlas_build_id=>$parameters{atlas_build_id},
action => 'add',
);
}
$sbeams->set_page_message( type => 'Info', msg => "Your annotation record has been deleted." );
}
else {
$sbeams->set_page_message( type => 'Error', msg => "ERROR: There was a problem deleting your annotation record." );
}
$redirect++;
}
if ($redirect) {
$q->delete( 'apply_action' );
print $q->redirect( $q->self_url() );
exit;
}
}
###############################################################################
# Process Request
###############################################################################
sub processRequest {
#### Define some general variables
my ($i,$element,$key,$value,$sql);
#### Parse general parameters
my %parameters;
$sbeams->parse_input_parameters(q=>$q,parameters_ref=>\%parameters);
$sbeams->processStandardParameters(parameters_ref=>\%parameters);
my $apply_action = $q->param('apply_action');
#### Determine and set the output_mode
$output_mode = uc( $sbeams->output_mode() || scalar $q->param('output_mode') || scalar $q->param('outputMode') || '' );
my %allowed_output_modes = ( 'HTML'=>'HTML', 'JSON'=>'JSON', 'TEXT'=>'TEXT', 'TXT'=>'TEXT' );
if ( $allowed_output_modes{$output_mode} ) {
$output_mode = $allowed_output_modes{$output_mode};
} else {
$output_mode = 'HTML';
}
my $verbose = $VERBOSE;
#### If debugging level is 2 or more, print out CGI input information
if ( $verbose >= 2 ) {
print("
|
~;
# caller will close the td and tr
return $buffer;
}
###############################################################################
# getUSIInputForm
###############################################################################
sub getUSIInputForm {
my $METHOD = 'getUSIInputForm';
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $DEBUG );
my %parameters = @_;
#### Set up a response object
my $response = Proteomics::Response->new();
#### Process standard parameters
my $debug = processParameters( name=>'debug', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$DEBUG, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $verbose = processParameters( name=>'verbose', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$VERBOSE, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $quiet = processParameters( name=>'quiet', required=>0, allowUndef=>0, default=>0, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $outputDestination = processParameters( name=>'outputDestination', required=>0, allowUndef=>0, default=>'STDERR', parameters=>\%parameters, caller=>$METHOD, response=>$response );
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $debug && !$DEBUG );
#### Process specific parameters
my $cgiParameters = processParameters( name=>'parameters', required=>0, allowUndef=>0, response=>$response, parameters=>\%parameters, caller=>$METHOD );
my $usi = processParameters( name=>'usi', required=>0, allowUndef=>1, response=>$response, parameters=>\%parameters, caller=>$METHOD );
#### Die if any unexpected parameters are passed
my $unexpectedParameters = '';
foreach my $parameter ( keys(%parameters) ) { $unexpectedParameters .= "ERROR: unexpected parameter '$parameter'\n"; }
die("CALLING ERROR [$METHOD]: $unexpectedParameters") if ($unexpectedParameters);
#### Return if there was a problem with the required parameters
return $response if ( $response->{errorCode} =~ /MissingParameter/i );
#### If it wasn't passed, check the CGI parameters
unless ( $usi ) {
$usi = $cgiParameters->{USI} || $cgiParameters->{usi};
}
$usi = "" unless ( $usi );
my $buffer = qq~
~;
return $buffer;
}
###############################################################################
# interpretUSI
###############################################################################
sub interpretUSI {
my $METHOD = 'interpretUSI';
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $DEBUG );
my %parameters = @_;
#### Set up a response object
my $response = Proteomics::Response->new();
#### Process standard parameters
my $debug = processParameters( name=>'debug', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$DEBUG, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $verbose = processParameters( name=>'verbose', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$VERBOSE, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $quiet = processParameters( name=>'quiet', required=>0, allowUndef=>0, default=>0, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $outputDestination = processParameters( name=>'outputDestination', required=>0, allowUndef=>0, default=>'STDERR', parameters=>\%parameters, caller=>$METHOD, response=>$response );
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $debug && !$DEBUG );
#### Process specific parameters
my $usiStr = processParameters( parameters=>\%parameters, caller=>$METHOD, name=>'usiStr', required=>1, allowUndef=>0, response=>$response );
#### Die if any unexpected parameters are passed
my $unexpectedParameters = '';
foreach my $parameter ( keys(%parameters) ) { $unexpectedParameters .= "ERROR: unexpected parameter '$parameter'\n"; }
die("CALLING ERROR [$METHOD]: $unexpectedParameters") if ($unexpectedParameters);
#### Return if there was a problem with the required parameters
return $response if ( $response->{errorCode} =~ /MissingParameter/i );
#### There may be some error messages for info messages coming, so switch to PRE mode
#print "Content-type: text/plain\n\n";
#print "\n";
$debug = $VERBOSE;
#### Clean the string a little
$usiStr =~ s/^\s+//;
$usiStr =~ s/\s+$//;
#### Parse the passed USI
my $usi = Proteomics::Spectra::UniversalSpectrumIdentifier->new( usi=>$usiStr );
#print "\n";
my $result = $usi->parse( verbose=>$verbose, debug=>$verbose, outputDestination=>'STDERR' );
#print(Data::Dumper->Dump([$result]))."\n";
#print " \n";
#### If the USI string was not successfully parsed, then just return
if ( $result->{status} ne 'OK' ) {
if ( $output_mode eq 'HTML' ) {
print "Content-type: text/plain\n\n";
print "\n";
print "Provided identifier=$usiStr\n";
$result->show();
print " \n";
}
$response->mergeResponse( sourceResponse=>$result, verbose=>0, debug=>0, outputDestination=>$outputDestination );
$response->{statusCode} = 400;
return $response;
}
my $datasetIdentifier = $usi->{datasetIdentifier};
my $msRunName = $usi->{msRunName};
my $indexFlag = $usi->{indexFlag};
my $index = $usi->{index};
my $interpretation = $usi->{interpretation};
my $peptidoform = $usi->{peptidoform};
my $charge = $usi->{charge};
#### If the peptidoform has mass mods in it, then convert it to TPP notation
#### Disabled by Eric 2024-05-08. No longer necessary
#if ( $peptidoform =~ /[\[\{\(]/ ) {
# #print "\n";
# #print "INFO: Peptidoform=$peptidoform\n";
# my $pep = Proteomics::Sequence::Peptide->new(sequenceString=>$peptidoform);
# $peptidoform = $pep->{modifiedTPP4SequenceString};
# #print " \n";
#}
#### If this is a PXL, just return the interpreted USI
if ( $datasetIdentifier =~ /PXL/ ) {
$response->{datasetIdentifier} = $datasetIdentifier;
$response->{msRunName} = $msRunName;
$response->{index} = $index;
$response->{indexFlag} = $indexFlag;
$response->{interpretation} = $interpretation;
$response->{peptidoform} = $peptidoform;
$response->{charge} = $charge;
return($response);
}
#### Otherwise it must be a PXD, try to find it
#print "datasetIdentifier=$datasetIdentifier\n";
my $sql = qq~
SELECT S.SAMPLE_ID,
SAMPLE_ACCESSION,
ASB.PROTEOMICS_SEARCH_BATCH_ID,
IS_PUBLIC,
REPOSITORY_IDENTIFIERS,
DATA_LOCATION
FROM $TBAT_SAMPLE S
JOIN $TBAT_ATLAS_SEARCH_BATCH ASB ON ( ASB.sample_id = S.sample_id)
WHERE repository_identifiers LIKE '\%$usi->{datasetIdentifier}\%'
order by ASB.PROTEOMICS_search_batch_id desc
~;
my @rows = $sbeams->selectSeveralColumns($sql);
my $nRows = scalar(@rows);
if ($nRows == 0){
my $sql = qq~
SELECT DATASET_ID,
NULL,
NULL,
'Y',
DATASET_IDENTIFIER,
LOCAL_DATA_LOCATION
FROM $TBAT_PUBLIC_DATA_REPOSITORY
WHERE DATASET_IDENTIFIER LIKE '\%$usi->{datasetIdentifier}\%'
AND LOCAL_DATA_LOCATION like '%sbeams%'
~;
@rows = $sbeams->selectSeveralColumns($sql);
$nRows = scalar(@rows);
}
my $foundFile = '';
my $nFoundFiles = 0;
my $foundFiles = [];
#### Turn quiet mode on, so that problems are not immediately spit out to STDOUT
$quiet = 1;
#### If no rows returned, then we don't have this dataset
if ( $nRows == 0 ) {
#### Make one more effort to find the dataset in a manual override file
print "INFO: found $nRows sample(s) matching dataset '$usi->{datasetIdentifier}', but trying the supplemental mapping lookup \n" if ( $debug );
if ( open(INFILE,"/regis/sbeams/archive/supplemental_PXD_mapping.tsv") ) {
while ( my $line = ) {
chomp($line);
my @columns = split("\t",$line);
if ( $columns[0] eq $datasetIdentifier ) {
@rows = ( [ 0,0,0,0,0,$columns[1] ] );
$nRows = 1;
print "INFO: Found in supplemental at $columns[1] \n" if ( $debug );
}
}
close(INFILE);
}
if ( $nRows == 0 ) {
$response->logEvent( status=>'ERROR', level=>'ERROR', errorCode=>'DatasetNotHere', statusCode=>404, verbose=>$verbose, debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
message=>"Dataset '$datasetIdentifier' is not stored here at PeptideAtlas.");
return $response;
}
}
#### Otherwise we seem to have it, so try to determine its location
if ( $nRows > 0 ) {
print "INFO: found $nRows sample(s) matching dataset '$usi->{datasetIdentifier}' \n" if ( $debug );
my $foundHere = 0;
foreach my $row ( @rows ) {
#### If we already found something in a previous iteration, then finish
last if ( $foundHere );
my ($sample_id,$sample_accession,$search_batch_id,$is_public,$repository_identifiers,$data_location) = @{$row};
print("INFO: data_location is $data_location \n") if ( $debug );
#### If this is a relative path, then try some prefixes
my @possible_prefixes = ( '/proteomics/peptideatlas/archive', '/regis/sbeams/archive' );
for my $possible_prefix ( @possible_prefixes ) {
print("INFO: data_location is a relative path, trying prefix $possible_prefix \n") if ( $debug );
if ( -d "$possible_prefix/$data_location" ) {
$data_location = "$possible_prefix/$data_location";
}
}
#### Is this a directory?
unless ( -d $data_location ) {
$response->logEvent( status=>'ERROR', level=>'ERROR', errorCode=>'InvalidDataLocation', statusCode=>500, verbose=>$verbose, debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
message=>"Although the dataset '$datasetIdentifier' was found, the internal data location appears to be invalid. This needs to be fixed. Please report this error. Looking for data location '$data_location'");
return($response)
}
#### Set some other places to look
my @locations = ( "$data_location/data", "$data_location/../data", "$data_location", "$data_location/.." );
#### If the MSRun name has a suffix of .mzML, strip that
$msRunName =~ s/\.mzML$//;
#### Loop over the various places to find it
my $iLocation = 0;
foreach my $location ( @locations ) {
#### If we already found this one, then finish
last if ( $foundHere );
#### Try to find the mzML files here
print("INFO: Looking for $location/$msRunName.*mzML* ") if ( $debug );
my @files = glob("$location/$msRunName.*mzML*");
if ( scalar(@files) > 0 ) {
print("INFO: Found here ") if ( $debug );
foreach my $file ( @files ) {
if ( $file =~ /mzML/ && $file !~ /\.mzML\.scan$/ ) {
print(" - Adding $file ") if ( $debug );
$nFoundFiles++;
$foundFile = $file;
$foundHere = 1;
push(@{$foundFiles},[$iLocation,$foundFile]);
}
}
print("INFO: nFoundfiles=$nFoundFiles ") if ( $debug );
}
$iLocation++;
}
#### If that all fails (as sometimes happens with a supplemental mapping) try a `find`
if ( $nFoundFiles == 0 ) {
print("INFO: Did not find what we are looking for. Try a find in $data_location ") if ( $debug );
my @files = `find $data_location -name '$msRunName.*mzML*'`;
foreach my $file ( @files ) {
chomp($file);
next if ( $file =~ /\.mzML\.scan$/ );
print(" - Adding $file \n") if ( $debug );
$nFoundFiles++;
$foundFile = $file;
$foundHere = 1;
push(@{$foundFiles},[$iLocation,$foundFile]);
}
}
}
}
print("INFO: nFoundfiles=$nFoundFiles ") if ( $debug );
#### If no files were found, this must be a bad MS Run name
if ( $nFoundFiles == 0 ) {
$response->logEvent( status=>'ERROR', level=>'ERROR', errorCode=>'MsRunNotFound', statusCode=>400, verbose=>$verbose, debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
message=>"Although the dataset '$datasetIdentifier' was found, no mzML file root '$msRunName' was found");
return $response;
#### If there are multiple files found, this is a challenge
} elsif ( $nFoundFiles > 1 ) {
print("INFO: We found multiple files here. How will be decide which one to use? \n") if ( $debug );
$nFoundFiles = 0;
my $nNoFragFiles = 0;
my $foundNoFragFile = '';
for my $level ( (0,1,2,3,4) ) {
foreach my $file ( @{$foundFiles} ) {
print(" - $file->[0] - $file->[1] \n") if ( $debug );
if ( $file->[0] == $level ) {
$nFoundFiles++;
$foundFile = $file->[1];
if ( $foundFile !~ /ITCID/ && $foundFile !~ /HCD/ ) {
$nNoFragFiles++;
$foundNoFragFile = $foundFile;
}
}
}
#### If we only found 1 file at this level, go with that
if ( $nFoundFiles == 1 ) {
print("INFO: We found only one file a level $level so go with that \n") if ( $debug );
last;
} elsif ( $nFoundFiles > 1 ) {
if ( $nNoFragFiles == 1 ) {
print("INFO: We several files at level $level but only one without ITCID or HCD in the path, so hope for that one. \n") if ( $debug );
$nFoundFiles = 1;
$foundFile = $foundNoFragFile;
last;
} else {
#$response->logEvent( status=>'ERROR', level=>'ERROR', errorCode=>'FoundMultipleMatchingMsRuns', statusCode=>500, verbose=>$verbose, debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
# message=>"Under dataset '$datasetIdentifier', multiple mzML files with file root '$msRunName' were found. This cannot be properly handled yet");
#return $response;
print("INFO: We several files at level $level so we'll just pick one and hope for the best \n") if ( $debug );
$nFoundFiles = 1;
#$foundFile = $foundNoFragFile;
last;
}
}
}
}
#### If we found just one file, let's go with that
if ( $nFoundFiles == 1 ) {
print "INFO: Found data file for this MS run: $foundFile \n" if ( $debug );
$response->{datasetIdentifier} = $datasetIdentifier;
$response->{mzMLFilePath} = $foundFile;
$response->{msRunName} = $msRunName;
$response->{index} = $index;
$response->{indexFlag} = $indexFlag;
$response->{interpretation} = $interpretation;
$response->{peptidoform} = $peptidoform;
$response->{original_peptidoform} = $usi->{peptidoform};
$response->{charge} = $charge;
}
#print " \n";
return $response;
}
###############################################################################
# getSpectrumFromMzmlFile
###############################################################################
sub getSpectrumFromMzmlFile {
my $METHOD = 'getSpectrumFromMzmlFile';
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $DEBUG );
my %parameters = @_;
#### Set up a response object
my $response = Proteomics::Response->new();
#### Process standard parameters
my $debug = processParameters( name=>'debug', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$DEBUG, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $verbose = processParameters( name=>'verbose', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$VERBOSE, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $quiet = processParameters( name=>'quiet', required=>0, allowUndef=>0, default=>0, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $outputDestination = processParameters( name=>'outputDestination', required=>0, allowUndef=>0, default=>'STDERR', parameters=>\%parameters, caller=>$METHOD, response=>$response );
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $debug && !$DEBUG );
#$verbose = 1;
#print("Forcing verbose=1 \n");
#### Process specific parameters
my $filePath = processParameters( name=>'filePath', required=>1, allowUndef=>0, response=>$response, parameters=>\%parameters, caller=>$METHOD );
my $scanNumber = processParameters( name=>'scanNumber', required=>1, allowUndef=>0, response=>$response, parameters=>\%parameters, caller=>$METHOD );
#### Die if any unexpected parameters are passed
my $unexpectedParameters = '';
foreach my $parameter ( keys(%parameters) ) { $unexpectedParameters .= "ERROR: unexpected parameter '$parameter'\n"; }
die("CALLING ERROR [$METHOD]: $unexpectedParameters") if ($unexpectedParameters);
#### Return if there was a problem with the required parameters
return $response if ( $response->{errorCode} =~ /MissingParameter/i );
#### Check to make sure the file exists
my $eol = " \n";
unless ( -e $filePath ) {
print "Did not find the expected file '$filePath'$eol" if ($verbose);
my $found = 0;
use File::Basename;
my ($file,$path) = fileparse($filePath);
print "file=$file$eol" if ($verbose);
print "path=$path$eol" if ($verbose);
#### If not, try it with a .gz at the end
if ( ! $found ) {
my $altFilePath = "$filePath.gz";
print "Trying alternate file '$altFilePath'$eol" if ($verbose);
if ( -e "$altFilePath" ) {
$filePath = $altFilePath;
print "Found '$altFilePath'$eol" if ($verbose);
$found = 1;
}
}
#### If still not, try in a parallel data/ directory
if ( ! $found ) {
my $altFilePath = "$path/../data/$file";
print "Trying alternate file '$altFilePath'$eol" if ($verbose);
if ( -e "$altFilePath" ) {
$filePath = $altFilePath;
print "Found '$altFilePath'$eol" if ($verbose);
$found = 1;
}
}
#### If still not, try in a parallel data/ directory and with a .gz at the end
if ( ! $found ) {
my $altFilePath = "$path/../data/$file.gz";
print "Trying alternate file '$altFilePath'$eol" if ($verbose);
if ( -e "$altFilePath" ) {
$filePath = $altFilePath;
print "Found '$altFilePath'$eol" if ($verbose);
$found = 1;
}
}
#### If still not, admit defeat
if ( ! $found ) {
$response->logEvent( level=>'ERROR', errorCode=>'MsRunFileNotFound', statusCode=>400, verbose=>$verbose, debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
message=>"Specified filePath '$filePath' is not a file");
return $response;
}
}
#### If the scanNumber has a comma, assume it is a nativeId and try to translate it
if ( $scanNumber =~ /,/ ) {
my $mappingFile = $filePath;
$mappingFile =~ s/\.gz$//;
$mappingFile =~ s/mzML$//;
$mappingFile .= 'txt';
print("INFO: Checking for nativeId mapping file '$mappingFile' \n") if ($verbose);
if ( -e $mappingFile ) {
if ( open(INFILE,$mappingFile) ) {
print("INFO: Reading nativeId mapping file '$mappingFile' \n") if ($verbose);
my $iLine = 0;
while ( my $line = ) {
chomp($line);
my @columns = split("\t",$line);
my $compactNativeId=$columns[3];
$compactNativeId =~ s/\s+/,/g;
$compactNativeId =~ s/[A-Za-z=]+//g;
if ( $compactNativeId eq $scanNumber ) {
$scanNumber = $columns[0];
print("INFO: Found scanNumber $scanNumber for compactNativeId '$compactNativeId' \n") if ($verbose);
last;
}
#print("INFO: Reading compactNativeId '$compactNativeId' \n") if ($verbose && $columns[3] =~ /1475/);
$iLine++;
}
}
}
}
#### Create a virtual filename to fetch the spectrum
#### Note that we have to use an EL7 compiled version while the server is still on EL7
#my $READMZXML = "/proteomics/sw/tpp-6.0.0/bin/readmzXML";
# Switch temporarily to Luis's compiled version
my $READMZXML = "/proteomics/lmendoza/sw/misc/readmzXML_TPP600";
my $filename = "$READMZXML $filePath $scanNumber |";
print "Fetch command: $filename\n" if ($verbose);
#### Try to open the spectrum for reading
unless (open(DTAFILE,$filename)) {
$response->logEvent( status=>'ERROR', level=>'ERROR', errorCode=>'MsRunFileNotReadable', statusCode=>500, verbose=>$verbose, debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
message=>"Specified filePath '$filePath' exists, but cannot be read");
return $response;
}
#### Read the spectrum data
my @mz_intensities;
my $observedPrecursorMz = 0;
my $precursorScan = 0;
my $selWinLow = 0;
my $selWinHigh = 0;
my $charge = 0;
my $nativeId = '';
my $filterLine = '';
while (my $line = ) {
$line =~ s/[\r\n]//g;
print "$line \n" if ($verbose);
#### Extract the precursor mz and charge information from that line
if ( $line =~ /precursorMZ: ([\d\.]+)/ ) {
$observedPrecursorMz = $1;
next;
}
if ( $line =~ /precursorZ: ([\d]+)/ ) {
$charge = $1;
next;
}
if ( $line =~ /precursorScan: ([\d]+)/ ) {
$precursorScan = $1;
next;
}
if ( $line =~ /isolationLower: ([\d\.]+)/ ) {
$selWinLow = $1;
}
if ( $line =~ /isolationUpper: ([\d\.]+)/ ) {
$selWinHigh = $1;
}
if ( $line =~ /nativeId:\s*(.+)/ ) {
$nativeId = $1;
next;
}
if ( $line =~ /filterLine:\s*(.+)/ ) {
$filterLine = $1;
next;
}
#### Skip if this isn't a mass intensity line
next if ($line !~ /mass.*inten/);
#### Extract and store mass intensity information
$line =~ /mass\s+(\S+)\s+inten\s+(\S+)/;
my $mz = $1 * 1.0;
my $intensity = $2 * 1.0;
push(@mz_intensities,[$mz, $intensity]);
}
close(DTAFILE);
#### If there were no values, print diagnostics and return
if ( scalar @mz_intensities == 0 ) {
$response->logEvent( status=>'ERROR', level=>'ERROR', errorCode=>'ScanNotFound', statusCode=>404, verbose=>$verbose, debug=>$debug, quiet=>$quiet,
message=>"No peaks returned from extraction attempt of scan $scanNumber from file '$filePath'");
return $response;
}
#### Return result
$response->logEvent( level=>'INFO', minimumVerbosity=>1, verbose=>$verbose, debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
message=>scalar(@mz_intensities)." mass-inten pairs read") if ($verbose);
$response->{mz} = \@mz_intensities;
$response->{observedPrecursorMz} = $observedPrecursorMz;
$response->{nativeId} = $nativeId;
$response->{filterLine} = $filterLine;
$response->{scanNumber} = $scanNumber;
#### Build a prototype PROXI spectrum
my $proxiSpectrum = {};
$proxiSpectrum->{attributes} = [];
push(@{$proxiSpectrum->{attributes}}, { accession=>"MS:1008025", name=>"scan number", value=>$scanNumber } );
push(@{$proxiSpectrum->{attributes}}, { accession=>"MS:1000827", name=>"isolation window target m/z", value=>$observedPrecursorMz } );
push(@{$proxiSpectrum->{attributes}}, { accession=>"MS:1000041", name=>"charge state", value=>$charge } ) if ( $charge );
my @mzs;
my @intensities;
foreach my $peak ( @mz_intensities ) {
push(@mzs,$peak->[0]);
push(@intensities,$peak->[1]);
}
$proxiSpectrum->{mzs} = \@mzs;
$proxiSpectrum->{intensities} = \@intensities;
$response->{proxiSpectrum} = $proxiSpectrum;
# Set to defaults to avoid errors
$response->{mz1} = [];
$response->{mz1_scan} = $precursorScan;
$response->{isolation_low} = $selWinLow;
$response->{isolation_high} = $selWinHigh;
#### Try to read the precursor scan if there is one
if ( $precursorScan == 0 ) {
return $response;
}
$filename = "$READMZXML $filePath $precursorScan |";
unless (open(DTAFILE,$filename)) {
$response->logEvent( status=>'ERROR', level=>'ERROR', errorCode=>'MsRunFileNotReadable', statusCode=>500, verbose=>$verbose,
debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
message=>"Specified filePath '$filePath' exists, but cannot be read");
return $response;
}
#### Read the spectrum data
print "*** Precursor spectrum *** \n" if ($verbose);
my @mz1_intensities;
while (my $line = ) {
$line =~ s/[\r\n]//g;
print "$line \n" if ($verbose);
#### Skip if this isn't a mass intensity line
next if ($line !~ /mass.*inten/);
#### Extract and store mass intensity information
$line =~ /mass\s+(\S+)\s+inten\s+(\S+)/;
my $mz = $1 * 1.0;
my $intensity = $2 * 1.0;
push(@mz1_intensities,[$mz, $intensity]);
}
close(DTAFILE);
$response->{mz1} = \@mz1_intensities;
$response->{mz1_scan} = $precursorScan;
$response->{isolation_low} = $selWinLow;
$response->{isolation_high} = $selWinHigh;
return $response;
} # end getSpectrumFromMzmlFile
###############################################################################
# getSpectrumFromProteomeCentral
###############################################################################
sub getSpectrumFromProteomeCentral {
my $METHOD = 'getSpectrumFromProteomeCentral';
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $DEBUG );
my %parameters = @_;
#### Set up a response object
my $response = Proteomics::Response->new();
#### Process standard parameters
my $debug = processParameters( name=>'debug', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$DEBUG, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $verbose = processParameters( name=>'verbose', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$VERBOSE, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $quiet = processParameters( name=>'quiet', required=>0, allowUndef=>0, default=>0, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $outputDestination = processParameters( name=>'outputDestination', required=>0, allowUndef=>0, default=>'STDERR', parameters=>\%parameters, caller=>$METHOD, response=>$response );
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $debug && !$DEBUG );
#$verbose = 1;
#### Process specific parameters
my $usiStr = processParameters( name=>'usiStr', required=>1, allowUndef=>0, response=>$response, parameters=>\%parameters, caller=>$METHOD );
#### Die if any unexpected parameters are passed
my $unexpectedParameters = '';
foreach my $parameter ( keys(%parameters) ) { $unexpectedParameters .= "ERROR: unexpected parameter '$parameter'\n"; }
die("CALLING ERROR [$METHOD]: $unexpectedParameters") if ($unexpectedParameters);
#### Return if there was a problem with the required parameters
return $response if ( $response->{errorCode} =~ /MissingParameter/i );
#### Try to fetch from ProteomeCentral
use HTTP::Request;
use LWP::UserAgent;
use JSON::Parse;
#### Read the spectrum data
my @mz_intensities;
my $observedPrecursorMz = 0;
my $peptideStr = "";
my $charge = 0;
my $url = "http://proteomecentral.proteomexchange.org/cgi/spectra?output_format=json&usi=$usiStr";
my $request = HTTP::Request->new( 'GET', $url );
$request->header( 'Content-Type' => 'application/json' );
my $lwp = LWP::UserAgent->new;
my $lwp_response = $lwp->request( $request );
if ( $lwp_response->is_success() ) {
my $proxiSpectrum = JSON::Parse::parse_json($lwp_response->content());
$response->{proxiSpectrum} = $proxiSpectrum;
#### Extract the precursor mz
if ( $proxiSpectrum->{'attributes'} ) {
my @attributes = @{$proxiSpectrum->{'attributes'}};
foreach my $attribute (@attributes) {
if ( $attribute->{accession} eq "MS:1000744" ) { # selected ion m/z
$observedPrecursorMz = $attribute->{value};
}
if ( $attribute->{accession} eq "MS:1000041" ) { # charge state
$charge = $attribute->{value};
}
if ( $attribute->{accession} eq "MS:1000888" ) { # unmodified peptide sequence
$peptideStr = $attribute->{value};
}
}
}
#### Extract the spectra data
if ( $proxiSpectrum->{'mzs'} ) {
my @peaks = @{$proxiSpectrum->{'mzs'}};
my $counter = 0;
foreach my $peak (@peaks) {
$mz_intensities[$counter] = [ $peak ];
$counter++;
}
}
#### Extract the spectra data
if ( $proxiSpectrum->{'intensities'} ) {
my @peaks = @{$proxiSpectrum->{'intensities'}};
my $counter = 0;
foreach my $peak (@peaks) {
$mz_intensities[$counter][1] = $peak;
$counter++;
}
}
} else {
print("ERROR: " . $lwp_response->status_line());
}
#### If there were no values, print diagnostics and return
unless (@mz_intensities) {
$response->logEvent( level=>'ERROR', errorCode=>'NoPeaksRead', statusCode=>500, verbose=>$verbose, debug=>$debug, quiet=>$quiet,
message=>"No peaks returned from USI $usiStr'");
return $response;
}
#### Return result
$response->logEvent( level=>'INFO', minimumVerbosity=>1, verbose=>$verbose, debug=>$debug, quiet=>$quiet, outputDestination=>$outputDestination,
message=>scalar(@mz_intensities)." mass-inten pairs read");
$response->{mz} = \@mz_intensities;
$response->{observedPrecursorMz} = $observedPrecursorMz;
$response->{peptideStr} = $peptideStr;
$response->{charge} = $charge;
return $response;
} # end getSpectrumFromProteomeCentral
###############################################################################
# writeSpectrum
###############################################################################
sub writeSpectrum {
my $METHOD = 'writeSpectrum';
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $DEBUG );
my %parameters = @_;
#### Set up a response object
my $response = Proteomics::Response->new();
#### Process standard parameters
my $debug = processParameters( name=>'debug', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$DEBUG, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $verbose = processParameters( name=>'verbose', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$VERBOSE, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $quiet = processParameters( name=>'quiet', required=>0, allowUndef=>0, default=>0, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $outputDestination = processParameters( name=>'outputDestination', required=>0, allowUndef=>0, default=>'STDERR', parameters=>\%parameters, caller=>$METHOD, response=>$response );
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $debug && !$DEBUG );
#### Process specific parameters
my $proxiSpectrum = processParameters( name=>'proxiSpectrum', required=>1, allowUndef=>0, response=>$response, parameters=>\%parameters, caller=>$METHOD );
my $output_mode = processParameters( name=>'output_mode', required=>1, allowUndef=>0, response=>$response, parameters=>\%parameters, caller=>$METHOD );
#### Die if any unexpected parameters are passed
my $unexpectedParameters = '';
foreach my $parameter ( keys(%parameters) ) { $unexpectedParameters .= "ERROR: unexpected parameter '$parameter'\n"; }
die("CALLING ERROR [$METHOD]: $unexpectedParameters") if ($unexpectedParameters);
#### Return if there was a problem with the required parameters
return $response if ( $response->{errorCode} =~ /MissingParameter/i );
if ( $output_mode eq 'JSON' ) {
use JSON;
print("Access-Control-Allow-Origin: *\n");
print("Content-type: application/json\n\n");
print(encode_json($proxiSpectrum));
} elsif ( $output_mode eq 'TEXT' ) {
print("Content-type: text/plain\n\n");
print(encode_json($proxiSpectrum));
} elsif ( $output_mode eq 'TSV' || $output_mode eq 'CSV' ) {
print("Content-type: text/plain\n\n");
print("Output mode $output_mode not yet supported\n");
} else {
print("Content-type: text/plain\n\n");
print("Output mode $output_mode not yet supported\n");
}
return;
} # end writeSpectrum
###############################################################################
# writeResponse
###############################################################################
sub writeResponse {
my $METHOD = 'writeResponse';
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $DEBUG );
my %parameters = @_;
#### Set up a response object
my $response = Proteomics::Response->new();
#### Process standard parameters
my $debug = processParameters( name=>'debug', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$DEBUG, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $verbose = processParameters( name=>'verbose', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$VERBOSE, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $quiet = processParameters( name=>'quiet', required=>0, allowUndef=>0, default=>0, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $outputDestination = processParameters( name=>'outputDestination', required=>0, allowUndef=>0, default=>'STDERR', parameters=>\%parameters, caller=>$METHOD, response=>$response );
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $debug && !$DEBUG );
#### Process specific parameters
my $error_response = processParameters( name=>'response', required=>1, allowUndef=>0, response=>$response, parameters=>\%parameters, caller=>$METHOD );
my $output_mode = processParameters( name=>'output_mode', required=>1, allowUndef=>1, response=>$response, parameters=>\%parameters, caller=>$METHOD );
#### Die if any unexpected parameters are passed
my $unexpectedParameters = '';
foreach my $parameter ( keys(%parameters) ) { $unexpectedParameters .= "ERROR: unexpected parameter '$parameter'\n"; }
die("CALLING ERROR [$METHOD]: $unexpectedParameters") if ($unexpectedParameters);
#### Return if there was a problem with the required parameters
return $response if ( $response->{errorCode} =~ /MissingParameter/i );
#print("Content-type: text/plain\n\n");
#print Data::Dumper->Dump([$error_response]);
#return;
my $statusCode = 200;
$statusCode = 400 if ( $error_response->{errorCode} ne 'OK' );
$statusCode = $error_response->{statusCode} if ( $error_response->{statusCode} );
if ( $output_mode eq 'JSON' ) {
use JSON;
print("Status: $statusCode $error_response->{errorCode}\n");
print("Access-Control-Allow-Origin: *\n");
print("Content-type: application/json\n\n");
my $http_response = { "status"=> $statusCode, "title"=> $error_response->{errorCode}, "detail"=> $error_response->{fullMessage}, "type"=> "about:blank" };
print(encode_json($http_response),"\n");
} elsif ( $output_mode eq 'TEXT' ) {
print("Status: $statusCode $error_response->{errorCode}\n");
print("Access-Control-Allow-Origin: *\n");
print("Content-type: text/plain\n\n");
print($error_response->serialize());
} elsif ( $output_mode eq 'HTML' ) {
print("Status: $statusCode $error_response->{errorCode}\n");
print("Access-Control-Allow-Origin: *\n");
print($error_response->serialize());
} elsif ( $output_mode eq 'TSV' || $output_mode eq 'CSV' ) {
print("Status: $statusCode $error_response->{errorCode}\n");
print("Access-Control-Allow-Origin: *\n");
print("Content-type: text/plain\n\n");
print("Output mode $output_mode not yet supported\n");
} else {
print("Status: $statusCode $error_response->{errorCode}\n");
print("Access-Control-Allow-Origin: *\n");
print("Content-type: text/plain\n\n");
print("Output mode $output_mode not yet supported\n");
}
return;
} # end writeSpectrum
###############################################################################
# getCompactNativeId
###############################################################################
sub getCompactNativeId {
my $METHOD = 'getCompactNativeId';
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $DEBUG );
my %parameters = @_;
#### Set up a response object
my $response = Proteomics::Response->new();
#### Process standard parameters
my $debug = processParameters( name=>'debug', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$DEBUG, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $verbose = processParameters( name=>'verbose', required=>0, allowUndef=>0, default=>0, overrideIfFalse=>$VERBOSE, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $quiet = processParameters( name=>'quiet', required=>0, allowUndef=>0, default=>0, parameters=>\%parameters, caller=>$METHOD, response=>$response );
my $outputDestination = processParameters( name=>'outputDestination', required=>0, allowUndef=>0, default=>'STDERR', parameters=>\%parameters, caller=>$METHOD, response=>$response );
print "DEBUG: Entering $CLASS.$METHOD\n" if ( $debug && !$DEBUG );
#### Process specific parameters
my $nativeId = processParameters( name=>'nativeId', required=>1, allowUndef=>0, response=>$response, parameters=>\%parameters, caller=>$METHOD );
#### Die if any unexpected parameters are passed
my $unexpectedParameters = '';
foreach my $parameter ( keys(%parameters) ) { $unexpectedParameters .= "ERROR: unexpected parameter '$parameter'\n"; }
die("CALLING ERROR [$METHOD]: $unexpectedParameters") if ($unexpectedParameters);
#### If there's no useful nativeId, just return empty string
return '' unless ( $nativeId );
my $compactNativeId = '';
#### Try to parse the different nativeId types
#### For Thermo
if ( $nativeId =~ /controllerType=(\d+) controllerNumber=(\d+) scan=(\d+)/ ) {
$compactNativeId = "$1,$2,$3";
#### For SCIEX
} elsif ( $nativeId =~ /sample=(\d+) period=(\d+) cycle=(\d+) experiment=(\d+)/ ) {
$compactNativeId = "$1,$2,$3,$4";
#### For conversion from mzXML
} elsif ( $nativeId =~ /scan=(\d+)/ ) {
$compactNativeId = "$1";
#### Not yet coded or invalid
} else {
print("ERROR: unrecognized nativeId '$nativeId'\n");
}
return($compactNativeId);
} # end getCompactNativeId
|