libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spomsspectrum.h
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/spomsspectrum.h
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief SpecPeptidOMS Spectrum
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
30 */
31
32#pragma once
33
34#include <cstdint>
38
39namespace pappso
40{
41namespace specpeptidoms
42{
43
45{
46 std::size_t r_peak, l_peak, next_l_peak; // If the left peak is not supported (l_support ==
47 // false), then l_peak = next_l_peak
48 /**
49 * Condition is a 32 bit integer used to compute the threePeaks condition of the program.
50 * The first bit is put to 1 if the AaPosition's left peak has no other amino acid directly on its
51 * left (i.e. if l_support is false) and is not the spectrum's first peak.
52 * The second bit is put to one if the AaPosition's left peak is the spectrum's first peak.
53 * The next 22 bits are used as a one-hot encoding of the 22 amino acids. If an amino acid is
54 * found to the left of the AaPosition, its corresponding bit is put to 1.
55 * To check if the threePeaks condition is verified, we do a bitwise AND between condition and
56 * another 32 bit integer whose first 2 bits are put to 1, as well as the bit corresponding to the
57 * preceding amino acid in the protein sequence.
58 */
59 uint32_t condition;
61};
62
63class SpOMSSpectrum;
64
65typedef std::shared_ptr<const SpOMSSpectrum> SpOMSSpectrumCsp;
66
67class PMSPP_LIB_DECL SpOMSSpectrum : public std::vector<specglob::ExperimentalSpectrumDataPoint>
68{
69 public:
70 /**
71 * Default constructor
72 * @param exp_spectrum ExperimentalSpectrum to preprocess
73 * @param precision_ptr Precision to use for preprocessing
74 */
76 pappso::PrecisionPtr precision_ptr,
77 const pappso::AaCode &aaCode);
78
79 /**
80 * Copy constructor
81 * @param other SpOMSSpectrum to copy
82 */
83 SpOMSSpectrum(const SpOMSSpectrum &other);
84
85 /**
86 * Post-processing constructor
87 * @brief Returns a copy of the provided spectrum accounting for the provided precursor mass error
88 * @param other SpOMSSpectrum to copy
89 * @param precursor_mass_error precursor mass error to account for
90 */
91 SpOMSSpectrum(const SpOMSSpectrum &other, double precursor_mass_error);
92
93 /**
94 * Destructor
95 */
96 virtual ~SpOMSSpectrum();
97
98 /**
99 * @brief Adds an amino acid position to the data structure
100 * @param aa Amino acid to add to the data structure
101 * @param r_peak index of the amino acid's right support peak
102 * @param l_peak index of the amino acid's left support peak, if it is supported by an amino acid,
103 * otherwise see next_l_peak
104 * @param next_l_peak index of the first supported peak to the left of the amino acid's left
105 * support peak
106 * @param l_support indicates whether the amino acid's left support peak if supported by an amino
107 * acid
108 */
109 void addAaPosition(uint8_t aa,
110 const std::size_t r_peak,
111 const std::size_t l_peak,
112 const std::size_t next_l_peak,
113 bool l_support);
114
115 /**
116 * @brief Returns the list of aa_positions for a given amino acid
117 * @param aa Amino acid for which to retrieve positions
118 */
119 std::vector<AaPosition> &getAaPositions(pappso::Enums::AminoAcidChar aa) const;
120
121 /**
122 * @brief Returns the list of aa_positions for a given amino acid, except those relying on
123 * provided peaks
124 */
125 std::vector<AaPosition> getAaPositions(pappso::Enums::AminoAcidChar aa,
126 std::vector<std::size_t> peaks_to_remove) const;
127
128 /**
129 * @brief Returns the spectrum's list of masses
130 */
131 std::vector<double> getMassList() const;
132
133 /**
134 * @brief Returns the type of one of the spectrum's peaks
135 * @param indice Peak index to be identified
136 */
138
139 /**
140 * @brief Returns the spectrum's precursor's charge
141 */
142 uint getPrecursorCharge() const;
143
144 /**
145 * @brief Returns the missing mass between a peak and the precursor's mass (shift at the end).
146 */
147 double getMissingMass(std::size_t peak) const;
148
149 /**
150 * @brief Returns the mz difference between two peaks
151 * @param l_peak left peak
152 * @param r_peak right peak
153 */
154 double getMZShift(std::size_t l_peak, std::size_t r_peak) const;
155
156 std::size_t getComplementaryPeak(std::size_t peak) const;
157
158 private:
160 std::vector<std::shared_ptr<std::vector<AaPosition>>> m_aapositions;
162 std::vector<std::shared_ptr<std::vector<uint8_t>>> m_supported_peaks;
163 std::vector<int> m_reindexed_peaks; // Index of supported peaks after removing unsupported peaks;
164 // -1 if unsupported
166 std::vector<std::size_t> m_complementary_peak_indexes;
168
169 /**
170 * @brief Preprocess the spectrum
171 */
172 void preprocessSpectrum(); // TODO : min number of found amino acids
173
174 /**
175 * @brief Removes the unsupported peaks (without an amino acid to the left) from the spectrum
176 */
178
179 /**
180 * @brief Computes the "condition" integer, used to apply the three peaks rule
181 */
182 uint32_t computeCondition(const std::size_t l_peak,
183 bool l_support) const; // l_peak is original index
184
185 /**
186 * @brief Add a peak to the supported peaks list
187 */
188 void addSupportedPeak(std::size_t peak);
189
190 /**
191 * @brief Reindexes the peaks after removal of the unsupported peaks
192 */
193 void correctPeakIndexes();
194
195 /**
196 * @brief For each point of the spectrum, indicate the index of its complementary peak;
197 */
199};
200
201} // namespace specpeptidoms
202} // namespace pappso
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:44
Class representing a fully specified mass spectrum.
void preprocessSpectrum()
Preprocess the spectrum.
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
SpOMSSpectrum(pappso::QualifiedMassSpectrum &qmass_spectrum, pappso::PrecisionPtr precision_ptr, const pappso::AaCode &aaCode)
std::vector< AaPosition > & getAaPositions(pappso::Enums::AminoAcidChar aa) const
Returns the list of aa_positions for a given amino acid.
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::vector< std::size_t > m_complementary_peak_indexes
std::vector< std::shared_ptr< std::vector< uint8_t > > > m_supported_peaks
uint32_t computeCondition(const std::size_t l_peak, bool l_support) const
Computes the "condition" integer, used to apply the three peaks rule.
void addAaPosition(uint8_t aa, const std::size_t r_peak, const std::size_t l_peak, const std::size_t next_l_peak, bool l_support)
Adds an amino acid position to the data structure.
void removeUnsupportedMasses()
Removes the unsupported peaks (without an amino acid to the left) from the spectrum.
pappso::QualifiedMassSpectrum m_qualifiedMassSpectrum
std::vector< std::shared_ptr< std::vector< AaPosition > > > m_aapositions
void correctPeakIndexes()
Reindexes the peaks after removal of the unsupported peaks.
void addSupportedPeak(std::size_t peak)
Add a peak to the supported peaks list.
void fillComplementaryPeakIndexes()
For each point of the spectrum, indicate the index of its complementary peak;.
std::size_t getComplementaryPeak(std::size_t peak) const
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
std::vector< double > getMassList() const
Returns the spectrum's list of masses.
#define PMSPP_LIB_DECL
ExperimentalSpectrumDataPointType
Definition types.h:78
std::shared_ptr< const SpOMSSpectrum > SpOMSSpectrumCsp
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
unsigned int uint
Definition types.h:68
const PrecisionBase * PrecisionPtr
Definition precision.h:122