libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfeaturesscan.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/evalscan/psmfeaturesscan.cpp
3 * \date 15/07/2025
4 * \author Olivier Langella
5 * \brief compute features on scan's PSM
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfeaturesscan.h"
29#include <QCborArray>
30#include <QCborMap>
33
34
35namespace pappso
36{
37namespace cbor
38{
39namespace psm
40{
41
43 pappso::XtandemSpectrumProcess &tandem_spectrum_process,
44 std::list<Enums::PeptideIon> &ion_list,
45 pappso::PsmFeatures &psm_features,
46 pappso::PrecisionPtr fragment_tolerance)
47 : CborScanMapBase(psm_file_scan_process),
48 m_tandemSpectrumProcess(tandem_spectrum_process),
49 m_ionList(ion_list),
50 m_psmFeatures(psm_features)
51{
52 m_fragmentTolerance = fragment_tolerance;
53}
54
58
59double
60PsmFeaturesScan::checkInf(double input) const
61{
62 if(input < 0)
63 return 0;
64 return input;
65}
66
67void
69{
70
71 if(keys().contains("psm_list"))
72 {
74
75 pappso::MassSpectrum spectrum =
76 m_tandemSpectrumProcess.process(*qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
77 qualified_mass_spectrum.get()->getPrecursorMz(),
78 qualified_mass_spectrum.get()->getPrecursorCharge());
79
80 QCborArray new_psm_arr;
81 for(QCborValue cbor_psm : value("psm_list").toArray())
82 {
83 QCborMap cbor_psm_map = cbor_psm.toMap();
84 QCborMap cbor_psm_features;
85 pappso::PeptideSp peptide_sp =
86 pappso::PeptideProFormaParser::parseString(cbor_psm_map.value("proforma").toString());
87
88
89 std::size_t peptide_size = peptide_sp.get()->size();
90 cbor_psm_features.insert(QString("peptide_size"), (unsigned int)peptide_size);
91
92 pappso::XtandemHyperscore hyperscore(spectrum,
93 peptide_sp,
94 qualified_mass_spectrum.get()->getPrecursorCharge(),
97 true);
98 cbor_psm_features.insert(QString("hyperscore"), QCborValue(hyperscore.getHyperscore()));
99
100
101 m_psmFeatures.setPeptideSpectrumCharge(
102 peptide_sp,
103 qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
104 qualified_mass_spectrum.get()->getPrecursorCharge(),
105 2);
106 // TIC
107 cbor_psm_features.insert(QString("total_intensity"),
108 std::log(m_psmFeatures.getTotalIntensity()));
109 // MaxIntALL
110 cbor_psm_features.insert(QString("max_intensity"),
111 checkInf(std::log(qualified_mass_spectrum.get()
112 ->getMassSpectrumSPtr()
113 .get()
114 ->maxIntensityDataPoint()
115 .y)));
116
117 // MaxYionInt
118 cbor_psm_features.insert(
119 QString("MaxYionInt"),
120 checkInf(std::log(m_psmFeatures.getMaxIntensityPeakIonMatch(Enums::PeptideIon::y))));
121
122 // MaxBionInt
123 cbor_psm_features.insert(
124 QString("MaxBionInt"),
125 checkInf(std::log(m_psmFeatures.getMaxIntensityPeakIonMatch(Enums::PeptideIon::b))));
126
127 // SumYmatchInt
128 cbor_psm_features.insert(
129 QString("SumYmatchInt"),
130 checkInf(std::log(m_psmFeatures.getIntensityOfMatchedIon(Enums::PeptideIon::y))));
131
132 // SumBmatchInt
133 cbor_psm_features.insert(
134 QString("SumBmatchInt"),
135 checkInf(std::log(m_psmFeatures.getIntensityOfMatchedIon(Enums::PeptideIon::b))));
136
137 // FracYmatchInt
138 cbor_psm_features.insert(
139 QString("FracYmatchInt"),
140 checkInf(m_psmFeatures.getIntensityOfMatchedIon(Enums::PeptideIon::y) /
141 m_psmFeatures.getTotalIntensity()));
142 // FracBmatchInt
143 cbor_psm_features.insert(
144 QString("FracBmatchInt"),
145 checkInf(m_psmFeatures.getIntensityOfMatchedIon(Enums::PeptideIon::b) /
146 m_psmFeatures.getTotalIntensity()));
147
148 // SeqCoverYion
149 cbor_psm_features.insert(
150 QString("SeqCoverYion"),
151 (double)m_psmFeatures.getAaSequenceCoverage(Enums::PeptideIon::y) /
152 (double)peptide_size);
153 // SeqCoverBion
154 cbor_psm_features.insert(
155 QString("SeqCoverBion"),
156 (double)m_psmFeatures.getAaSequenceCoverage(Enums::PeptideIon::b) /
157 (double)peptide_size);
158
159
160 // ConsecutiveYion
161 cbor_psm_features.insert(
162 QString("ConsecutiveYion"),
163 (qint64)m_psmFeatures.getMaxConsecutiveIon(Enums::PeptideIon::y));
164 // ConsecutiveBion
165 cbor_psm_features.insert(
166 QString("ConsecutiveBion"),
167 (qint64)m_psmFeatures.getMaxConsecutiveIon(Enums::PeptideIon::b));
168
169 // MassErrMean
170 cbor_psm_features.insert(QString("MassErrMean"), m_psmFeatures.getMatchedMzDiffMean());
171
172 // MassErrSD
173 cbor_psm_features.insert(QString("MassErrSD"), m_psmFeatures.getMatchedMzDiffSd());
174
175 // NumofAnnoPeaks
176 cbor_psm_features.insert(QString("NumofAnnoPeaks"),
177 (unsigned int)m_psmFeatures.getNumberOfMatchedIons());
178
179 // NumofComplementPeaks
180 std::size_t num_of_pairs = m_psmFeatures.countMatchedIonComplementPairs();
181 cbor_psm_features.insert(QString("NumofComplementPeaks"), (unsigned int)num_of_pairs);
182 if(num_of_pairs > 0)
183 {
184 // SumComplementPeaksInt
185 cbor_psm_features.insert(
186 QString("SumComplementPeaksInt"),
187 std::log(m_psmFeatures.getTotalIntensityOfMatchedIonComplementPairs()));
188
189 // FracComplementPeaksInt
190 cbor_psm_features.insert(
191 QString("FracComplementPeaksInt"),
192 m_psmFeatures.getTotalIntensityOfMatchedIonComplementPairs() /
193 m_psmFeatures.getTotalIntensity());
194 // SeqCoverComplementPeaks
195 cbor_psm_features.insert(
196 QString("SeqCoverComplementPeaks"),
197 (double)m_psmFeatures.getComplementPairsAaSequenceCoverage() /
198 (double)peptide_size);
199 }
200 pappso::LinearRegression lr = m_psmFeatures.getIonIsotopeLinearRegression();
201 cbor_psm_features.insert(QString("lrSize"), (unsigned int)lr.getSize());
202
203
204 double coeff_of_determination = lr.getCoefficientOfDetermination();
205 if(std::isnan(coeff_of_determination))
206 {
207 }
208 else
209 {
210 cbor_psm_features.insert(QString("lrCoeffDet"), coeff_of_determination);
211 }
212
213
214 QCborMap psm_eval = cbor_psm_map.value("eval").toMap();
215 psm_eval.remove(QString("features"));
216 psm_eval.insert(QString("features"), cbor_psm_features);
217 cbor_psm_map.remove(QString("eval"));
218 cbor_psm_map.insert(QString("eval"), psm_eval);
219
220 new_psm_arr.push_back(cbor_psm_map);
221 }
222
223 insert(QString("psm_list"), new_psm_arr);
224 }
225}
226
227} // namespace psm
228} // namespace cbor
229} // namespace pappso
std::size_t getSize() const
get data size
double getCoefficientOfDetermination() const
get Coefficient of determination (R2)
Class to represent a mass spectrum.
static PeptideSp parseString(const QString &pepstr)
pappso_double getHyperscore() const
CborScanMapBase(const PsmFileScanProcess &psm_file_scan_process)
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
PsmFeaturesScan(const PsmFileScanProcess &psm_file_scan_process, pappso::XtandemSpectrumProcess &tandem_spectrum_process, std::list< pappso::Enums::PeptideIon > &ion_list, pappso::PsmFeatures &psm_features, pappso::PrecisionPtr fragment_tolerance)
pappso::XtandemSpectrumProcess & m_tandemSpectrumProcess
std::list< pappso::Enums::PeptideIon > & m_ionList
double checkInf(double input) const
pappso::PrecisionPtr m_fragmentTolerance
Basic PSM file reader to process scan (parallelized scan processing)
@ y
Cter amino ions.
Definition types.h:295
@ b
Nter acylium ions.
Definition types.h:287
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
const PrecisionBase * PrecisionPtr
Definition precision.h:122