libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spomsspectrum.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/spomsspectrum.cpp
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief SpecPeptidOMS Spectrum
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
30 */
31
32#include <algorithm>
33#include <unordered_set>
34#include "spomsspectrum.h"
35#include "types.h"
38
39// SpOMSSpectrum::SpOMSSpectrum(const specglob::ExperimentalSpectrum &exp_spectrum)
41 pappso::PrecisionPtr precision_ptr,
42 const pappso::AaCode &aaCode)
43 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
44 specglob::ExperimentalSpectrum(qmass_spectrum, precision_ptr)),
45 m_qualifiedMassSpectrum(qmass_spectrum),
46 m_precision_ptr(precision_ptr),
47 m_aaCode(aaCode),
49{
50 m_aapositions.reserve(m_aaCode.getSize());
51 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
52 {
53 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
54 m_aapositions.back()->reserve(this->size() - 1);
55 }
56 m_supported_peaks.reserve(this->size());
57 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
58 m_reindexed_peaks.push_back(0);
59 for(std::size_t iter = 1; iter < this->size(); iter++)
60 {
61 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
62 m_reindexed_peaks.push_back(-1);
63 }
64 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
65 this->back().peak_mz = m_qualifiedMassSpectrum.getPrecursorMass() + pappso::MHPLUS;
67}
68
82
84 double precursor_mass_error)
85 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
86 pappso::specglob::ExperimentalSpectrum(
87 other.m_qualifiedMassSpectrum, other.m_precision_ptr, precursor_mass_error)),
90 m_aaCode(other.m_aaCode),
91 m_precursor_mass_error(precursor_mass_error)
92{
93 m_aapositions.reserve(m_aaCode.getSize());
94 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
95 {
96 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
97 m_aapositions.back()->reserve(this->size() - 1);
98 }
99 m_supported_peaks.reserve(this->size());
100 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
101 m_reindexed_peaks.push_back(0);
102 for(std::size_t iter = 1; iter < this->size(); iter++)
103 {
104 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
105 m_reindexed_peaks.push_back(-1);
106 }
107 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
108 this->back().peak_mz =
109 m_qualifiedMassSpectrum.getPrecursorMass() + pappso::MHPLUS + precursor_mass_error;
111}
112
116
117// Add comments !!
118void
120{
121 bool found;
122 uint8_t aa;
123 std::vector<double>::iterator iter1, iter2;
124 std::size_t peak1, peak2, next_l_peak;
125 std::vector<double> mass_list = getMassList();
126
127 peak1 = -1;
128 for(iter1 = mass_list.begin(); iter1 != mass_list.end(); iter1++)
129 {
130 peak1++;
131 peak2 = peak1;
132 for(iter2 = iter1 + 1; iter2 != mass_list.end(); iter2++)
133 {
134 peak2++;
135 aa = m_aaCode.getAaCodeByMass(*(iter2) - *(iter1), m_precision_ptr);
136 if(aa != 0)
137 {
138 next_l_peak = 0;
139 for(std::size_t iter = 1; iter < peak1;
140 iter++) // Search of the closer supported left peak.
141 // Possible optimization => search from the right
142 {
143 if(m_reindexed_peaks.at(iter) >= 0)
144 {
145 next_l_peak = iter;
146 }
147 }
148 if(m_reindexed_peaks.at(peak2) == -1)
149 {
150 addSupportedPeak(peak2);
151 m_supported_peaks.at(peak2)->push_back(aa);
152 }
153 if(m_reindexed_peaks.at(peak1) >= 0)
154 {
155 addAaPosition(aa, peak2, peak1, next_l_peak, true);
156 }
157 else
158 {
159 addAaPosition(aa, peak2, next_l_peak, next_l_peak, false);
160 }
161 }
162 }
163 }
164
167
168 // std::size_t i = 0;
169 // for(auto &data_point : *this)
170 // {
171 // data_point.indice = i;
172 // i++;
173 // }
174
176}
177
178// pappso::Aa const *
179// SpOMSSpectrum::findAAMass(double mass, bool *found) const
180// {
181// bool ok;
182// // auto charge = m_qualifiedMassSpectrum.getPrecursorCharge(&ok);
183
184// if(!ok)
185// {
186// throw pappso::PappsoException(
187// QObject::tr("precursor charge is not defined in spectrum %1")
188// .arg(m_qualifiedMassSpectrum.getMassSpectrumId().getNativeId()));
189// }
190// pappso::MzRange mz_range(mass / m_qualifiedMassSpectrum.getPrecursorCharge(),
191// m_precision_ptr);
192
193// for(std::unordered_map<const Aa, double>::const_iterator aa = aaMasses.begin();
194// aa != aaMasses.end();
195// aa++)
196// {
197// if(mz_range.contains(aa->second))
198// {
199// if(found != nullptr)
200// {
201// *found = true;
202// }
203// return &(aa->first);
204// }
205// }
206// if(found != nullptr)
207// {
208// *found = false;
209// }
210// return nullptr;
211// }
212
213// Not sure if optimal
214void
216{
217 std::vector<specglob::ExperimentalSpectrumDataPoint> kept_peaks;
218 for(std::vector<specglob::ExperimentalSpectrumDataPoint>::iterator iter = this->begin();
219 iter != this->end();
220 iter++)
221 {
222 if(m_reindexed_peaks.at(iter->indice) >= 0)
223 {
224 kept_peaks.push_back(*iter);
225 }
226 }
227 this->clear();
228 this->assign(kept_peaks.begin(), kept_peaks.end());
229}
230
231void
233 const std::size_t r_peak,
234 const std::size_t l_peak,
235 const std::size_t next_l_peak,
236 bool l_support)
237{
238 // aa=0 corresponds to no amino acid identified, thus aa is always >=1. We substract 1 to aa to
239 // avoid keeping an empty, useless vector.
240 if(l_support)
241 {
242 m_aapositions.at(aa - 1)->push_back(
243 {r_peak, l_peak, next_l_peak, computeCondition(l_peak, l_support), l_support});
244 }
245 else
246 {
247 m_aapositions.at(aa - 1)->push_back(
248 {r_peak, next_l_peak, next_l_peak, computeCondition(l_peak, l_support), l_support});
249 }
250}
251
252uint32_t
254 bool l_support) const
255{
256 uint32_t condition;
257 if(l_peak == 0)
258 {
259 condition = 2;
260 }
261 else if(!l_support)
262 {
263 condition = 1;
264 }
265 else
266 {
267 condition = 0;
268 for(std::vector<uint8_t>::iterator aa = m_supported_peaks.at(l_peak)->begin();
269 aa != m_supported_peaks.at(l_peak)->end();
270 aa++)
271 {
272 condition += 2 << *(aa);
273 }
274 }
275 return condition;
276}
277
278std::vector<pappso::specpeptidoms::AaPosition> &
280{
281
282 qDebug() << " m_aaCode.getAaCode(aa.getLetter()) - 1=" << m_aaCode.getAaCode(aa) - 1
283 << " m_aapositions.size()=" << m_aapositions.size();
284 return *m_aapositions.at(m_aaCode.getAaCode(aa) - 1);
285}
286
287std::vector<pappso::specpeptidoms::AaPosition>
289 std::vector<std::size_t> peaks_to_remove) const
290{
291 std::vector<AaPosition> aa_positions;
292 for(auto aap : *m_aapositions.at(m_aaCode.getAaCode(aa) - 1))
293 {
294 if(std::find(peaks_to_remove.begin(), peaks_to_remove.end(), aap.r_peak) ==
295 peaks_to_remove.end())
296 {
297 aa_positions.push_back(aap);
298 }
299 }
300 return aa_positions;
301}
302
303std::vector<double>
305{
306 std::vector<double> mass_list;
307 for(const specglob::ExperimentalSpectrumDataPoint &n : *this)
308 {
309 mass_list.push_back(n.peak_mz);
310 }
311 return mass_list;
312}
313
316{
317 return this->at(indice).type;
318}
319
320uint
325
326double
327pappso::specpeptidoms::SpOMSSpectrum::getMZShift(std::size_t l_peak, std::size_t r_peak) const
328{
329 return this->at(r_peak).peak_mz - this->at(l_peak).peak_mz;
330}
331
332double
334{
335 return this->m_qualifiedMassSpectrum.getPrecursorMass() - m_precursor_mass_error -
336 this->at(peak).peak_mz + MHPLUS;
337}
338
339void
341{
342 std::size_t counter = 0;
343 for(std::size_t iter = 0; iter < peak; iter++)
344 {
345 if(m_reindexed_peaks.at(iter) >= 0)
346 {
347 counter++;
348 }
349 }
350 m_reindexed_peaks.at(peak) = counter;
351 for(std::size_t iter = peak + 1; iter < m_reindexed_peaks.size(); iter++)
352 {
353 if(m_reindexed_peaks.at(iter) >= 0)
354 {
355 m_reindexed_peaks.at(iter)++;
356 }
357 }
358}
359
360void
362{
363 for(auto aa = m_aapositions.begin(); aa != m_aapositions.end(); aa++)
364 {
365 for(auto aap = aa->get()->begin(); aap != aa->get()->end(); aap++)
366 {
367 aap->l_peak = m_reindexed_peaks.at(aap->l_peak);
368 aap->r_peak = m_reindexed_peaks.at(aap->r_peak);
369 aap->next_l_peak = m_reindexed_peaks.at(aap->next_l_peak);
370 }
371 }
372}
373
374void
376{
377 std::size_t left_index, right_index;
378
379 m_complementary_peak_indexes.reserve(this->size());
380 while(m_complementary_peak_indexes.size() < this->size())
381 {
382 m_complementary_peak_indexes.push_back(0);
383 }
384 left_index = 0;
385 right_index = this->size() - 1;
386 double comp_mass = m_qualifiedMassSpectrum.getPrecursorMass() + 2 * MHPLUS;
387
388 while(left_index < right_index)
389 {
390 pappso::MzRange mz_range(comp_mass - this->at(left_index).peak_mz, m_precision_ptr);
391 if(mz_range.contains(this->at(right_index).peak_mz))
392 {
393 m_complementary_peak_indexes.at(left_index) = right_index;
394 m_complementary_peak_indexes.at(right_index) = left_index;
395 qDebug() << left_index << right_index;
396 }
397 if(comp_mass - this->at(left_index).peak_mz - this->at(right_index).peak_mz >= 0)
398 {
399 left_index++;
400 }
401 else
402 {
403 right_index--;
404 }
405 }
406}
407
408std::size_t
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:44
bool contains(pappso_double) const
Definition mzrange.cpp:120
Class representing a fully specified mass spectrum.
void preprocessSpectrum()
Preprocess the spectrum.
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
SpOMSSpectrum(pappso::QualifiedMassSpectrum &qmass_spectrum, pappso::PrecisionPtr precision_ptr, const pappso::AaCode &aaCode)
std::vector< AaPosition > & getAaPositions(pappso::Enums::AminoAcidChar aa) const
Returns the list of aa_positions for a given amino acid.
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::vector< std::size_t > m_complementary_peak_indexes
std::vector< std::shared_ptr< std::vector< uint8_t > > > m_supported_peaks
uint32_t computeCondition(const std::size_t l_peak, bool l_support) const
Computes the "condition" integer, used to apply the three peaks rule.
void addAaPosition(uint8_t aa, const std::size_t r_peak, const std::size_t l_peak, const std::size_t next_l_peak, bool l_support)
Adds an amino acid position to the data structure.
void removeUnsupportedMasses()
Removes the unsupported peaks (without an amino acid to the left) from the spectrum.
pappso::QualifiedMassSpectrum m_qualifiedMassSpectrum
std::vector< std::shared_ptr< std::vector< AaPosition > > > m_aapositions
void correctPeakIndexes()
Reindexes the peaks after removal of the unsupported peaks.
void addSupportedPeak(std::size_t peak)
Add a peak to the supported peaks list.
void fillComplementaryPeakIndexes()
For each point of the spectrum, indicate the index of its complementary peak;.
std::size_t getComplementaryPeak(std::size_t peak) const
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
std::vector< double > getMassList() const
Returns the spectrum's list of masses.
ExperimentalSpectrumDataPointType
Definition types.h:78
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
unsigned int uint
Definition types.h:68
const pappso_double MASSOXYGEN(15.99491461956)
const PrecisionBase * PrecisionPtr
Definition precision.h:122