libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfilereaderbase.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/psmfilereaderbase.h
3 * \date 05/07/2025
4 * \author Olivier Langella
5 * \brief Base class to read CBOR PSM file
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfilereaderbase.h"
30#include <QDebug>
32#include <QCborArray>
33
34
35namespace pappso
36{
37namespace cbor
38{
39namespace psm
40{
41
45
51
52void
54{
55 qDebug();
56 initCborReader(cborp);
57
58 qDebug();
59 if(mpa_cborReader->isMap())
60 {
61 readRoot(monitor);
62 }
63 qDebug();
64}
65
66void
70
71bool
73{
74 for(auto &it : m_currentPsmProteinRefList)
75 {
76 if(!m_proteinMap.getByAccession(it.accession).isTarget)
77 return true;
78 }
79 return false;
80}
81
82bool
84{
85 for(auto &it : m_currentPsmProteinRefList)
86 {
87 if(m_proteinMap.getByAccession(it.accession).isTarget)
88 return true;
89 }
90 return false;
91}
92
93
94void
96{
97 qDebug();
98 mpa_cborReader->enterContainer();
99
101 if(m_expectedString == "informations")
102 {
103 readInformations(monitor);
105
106 if(m_expectedString == "log")
107 {
108 readLog(monitor);
110 }
111
112 logReady(monitor);
113 }
114 else
115 {
116 throw pappso::PappsoException("ERROR: expecting informations element");
117 }
118
119
120 if(m_expectedString == "parameter_map")
121 {
122 readParameterMap(monitor);
123 }
124 else
125 {
126 throw pappso::PappsoException("ERROR: expecting parameter_map element");
127 }
128
129
131 m_targetFastaFiles.clear();
132 m_decoyFastaFiles.clear();
133 if(m_expectedString == "target_fasta_files")
134 {
137 }
138
139 if(m_expectedString == "decoy_fasta_files")
140 {
143 }
144 fastaFilesReady(monitor);
145
146 if(m_expectedString == "protein_map")
147 {
148 readProteinMap(monitor);
150 }
151
152 if(m_expectedString == "sample_list")
153 {
154 sampleListStarted(monitor);
155 mpa_cborReader->enterContainer(); // array
156 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
157 {
158 readSample(monitor);
159 }
160 mpa_cborReader->leaveContainer(); // array
161 sampleListFinished(monitor);
162 }
163 else
164 {
166 QObject::tr("ERROR: expecting sample_list element not %1").arg(m_expectedString));
167 }
168 mpa_cborReader->leaveContainer(); // whole file
169 if(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
170 {
171 readRoot(monitor);
172 }
173}
174
175void
177 [[maybe_unused]])
178{
179 bool is_ok;
180 // m_cborInformations.clear();
181 is_ok = mpa_cborReader->readCborMap(m_cborInformations);
182
183 if(!is_ok)
184 {
185 throw pappso::PappsoException("ERROR: PSM cbor header informations not well formed");
186 }
187 qDebug() << m_cborInformations.keys();
188 if(m_cborInformations.value("type").toString() != "psm")
189 {
190 QStringList all_keys;
191 for(auto it_k : m_cborInformations.keys())
192 {
193 all_keys << it_k.toString();
194 }
196 QObject::tr("ERROR: this file does not contain PSM data but %1 and %2")
197 .arg(m_cborInformations.value("type").toString())
198 .arg(all_keys.join(" ")));
199 }
200 informationsReady(monitor);
201}
202
203void
205{
206 bool is_ok;
207 // m_cborInformations.clear();
208 is_ok = mpa_cborReader->readCborArray(m_cborLog);
209
210 if(!is_ok)
211 {
212 throw pappso::PappsoException("ERROR: PSM cbor header log not well formed");
213 }
214}
215
216
217void
219 [[maybe_unused]])
220{
221 bool is_ok;
222 m_cborParameterMap.clear();
223 is_ok = mpa_cborReader->readCborMap(m_cborParameterMap);
224
225 if(!is_ok)
226 {
227 throw pappso::PappsoException("ERROR: PSM cbor parameter_map not well formed");
228 }
229 parameterMapReady(monitor);
230}
231
232void
238
239
242{
243 PsmProteinRef protein_ref;
244 protein_ref.accession = "";
245 protein_ref.positions.clear();
246 mpa_cborReader->enterContainer();
248 qDebug() << m_expectedString;
249 if(m_expectedString == "accession")
250 {
251 is_ok = mpa_cborReader->decodeString(protein_ref.accession);
252 if(!is_ok)
253 {
254 throw pappso::PappsoException("ERROR: protein accession is not a string");
255 }
256 }
257 else
258 {
259 throw pappso::PappsoException("ERROR: expecting accession element in PSM protein_list");
260 }
261
263 qDebug() << m_expectedString;
264 if(m_expectedString == "positions")
265 {
266 mpa_cborReader->readArray(protein_ref.positions);
267
268 // mpa_cborReader->next();
269 }
270 mpa_cborReader->leaveContainer();
271
272 qDebug() << "end";
273 return protein_ref;
274}
275
276
279{
280 PsmFile file;
281 mpa_cborReader->enterContainer();
283 if(m_expectedString == "name")
284 {
285 if(!mpa_cborReader->decodeString(file.name))
286 {
287 throw pappso::PappsoException("file name is not a string");
288 }
289 }
290 else
291 {
292 throw pappso::PappsoException("ERROR: expecting name element in file");
293 }
294 mpa_cborReader->leaveContainer();
295 return file;
296}
297
298
299void
301 const PsmFile &psm_file)
302{
303 writer.startMap();
304 writer.append("name");
305 writer.append(psm_file.name);
306 writer.endMap();
307}
308
309void
311 const std::vector<PsmFile> &file_list)
312{
313 writer.startArray();
314 for(auto &psm_file : file_list)
315 {
316 writePsmFile(writer, psm_file);
317 }
318 writer.endArray();
319}
320
321
322void
324{
325 //"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1",
326 qDebug();
327 mpa_cborReader->enterContainer();
329
330 qDebug() << m_expectedString;
331 if(m_expectedString == "name")
332 {
333 if(!mpa_cborReader->decodeString(m_currentSampleName))
334 {
335 throw pappso::PappsoException("sample name is not a string");
336 }
337 }
338 else
339 {
340 throw pappso::PappsoException("ERROR: expecting name element in file");
341 }
342 //"identification_file_list": [{ "name":
343 //"/home/langella/data1/tandem/tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.xml",
344 //}],
345
347
348 qDebug() << m_expectedString;
350 if(m_expectedString == "identification_file_list")
351 {
352 bool is_ok;
353 mpa_cborReader->enterContainer();
354
355 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
356 {
358 }
359 mpa_cborReader->leaveContainer();
360
362 }
363 //"peaklist_file": {"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.mzml"
364 //},
365
366 if(m_expectedString == "peaklist_file")
367 {
368 bool is_ok;
370 }
371 else
372 {
373 throw pappso::PappsoException("ERROR: expecting peaklist_file element in sample");
374 }
375 //"scan_list": [
376 sampleStarted(monitor);
378 if(m_expectedString == "scan_list")
379 {
380 mpa_cborReader->enterContainer();
381
382 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
383 {
384 readScan(monitor);
385 }
386 mpa_cborReader->leaveContainer();
387 }
388 else
389 {
390 throw pappso::PappsoException("ERROR: expecting scan_list element in sample");
391 }
392 mpa_cborReader->leaveContainer();
393
394 sampleFinished(monitor);
395}
396
397void
399{
400 qDebug();
401 m_cborScanId.clear();
402 mpa_cborReader->enterContainer();
403 //"id": {
404 //"index": 1976
405 //},
406 qDebug() << "scan begin";
407
409 qDebug() << m_expectedString;
410 if(m_expectedString == "id")
411 {
412 if(!mpa_cborReader->readCborMap(m_cborScanId))
413 {
414 throw pappso::PappsoException(QObject::tr("id element in scan is not a cbor map"));
415 }
416 }
417 else
418 {
420 QObject::tr("ERROR: expecting id element in scan not %1").arg(m_expectedString));
421 }
422 //"precursor": {
423 //"z": 2,
424 //"mz": 1120.529471
425 //},
426
428 m_cborScanPrecursor.clear();
429 qDebug() << m_expectedString;
430 if(m_expectedString == "precursor")
431 {
432 if(!mpa_cborReader->readCborMap(m_cborScanPrecursor))
433 {
434 throw pappso::PappsoException(QObject::tr("precursor element in scan is not a cbor map"));
435 }
436 }
437 //"ms2": {PSM CBOR format documentation
438 //"rt": 12648.87,
439 //"mz" :[1,2,3,4],
440 //"intensity" : [1,2,3,4]
441 //},
442
444 qDebug() << m_expectedString;
445 m_cborScanMs2.clear();
446 if(m_expectedString == "ms2")
447 {
448 if(!mpa_cborReader->readCborMap(m_cborScanMs2))
449 {
451 QObject::tr("ms2 element in scan is not a cbor map %1 %2:\n%3")
453 .arg(m_cborScanId.value("index").toInteger())
454 .arg(mpa_cborReader->lastError().toString()));
455 }
456 }
457 //"psm_list": [
458 scanStarted(monitor);
459
461 qDebug() << m_expectedString;
462 if(m_expectedString == "psm_list")
463 {
464 mpa_cborReader->enterContainer();
465 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
466 {
467 readPsm(monitor);
468 }
469 mpa_cborReader->leaveContainer();
470 }
471
472 mpa_cborReader->leaveContainer();
473 qDebug() << "scan end";
474 scanFinished(monitor);
475 qDebug();
476}
477
478void
480{
481 bool is_ok;
482 mpa_cborReader->enterContainer();
484 // "proforma": "AQEEM[+15.99491]AQVAK",
485 if(m_expectedString == "proforma")
486 {
487 if(!mpa_cborReader->decodeString(m_currentPsmProforma))
488 {
489 throw pappso::PappsoException("ERROR: proforma element in psm-scan is not a string");
490 }
491 }
492 else
493 {
494 throw pappso::PappsoException("ERROR: expecting proforma element in psm-scan");
495 }
496 //"protein_list" : [
497 //{
498 //"accession": "GRMZM2G083841_P01",
499 //"position": [15,236]
500 //}
501 //],
502
505 qDebug() << m_expectedString;
506 if(m_expectedString == "protein_list")
507 {
508 mpa_cborReader->enterContainer();
509 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
510 {
512 if(!is_ok)
513 {
515 QObject::tr("ERROR: reading protein_list element in psm-scan"));
516 }
517 }
518 mpa_cborReader->leaveContainer();
519 }
520 else
521 {
522 throw pappso::PappsoException("ERROR: expecting protein_list element in psm-scan");
523 }
524 //"eval": {
525 qDebug();
526 m_cborScanPsmEval.clear();
527 if(!getExpectedString())
528 {
530 QObject::tr("ERROR: expecting eval element in psm-scan %1").arg(m_currentPsmProforma));
531 }
532 if(m_expectedString == "eval")
533 {
534 is_ok = mpa_cborReader->readCborMap(m_cborScanPsmEval);
535 if(!is_ok)
536 {
537 throw pappso::PappsoException("ERROR: eval element in psm-scan is not well formed");
538 }
539 }
540
541 qDebug() << m_expectedString;
542
543
544 mpa_cborReader->leaveContainer();
545 qDebug();
546 psmReady(monitor);
547}
548
549void
551{
552 // PSM is ready, do what you want :)
553}
554
555void
559
560void
564
565void
569
570
571void
575
576void
580
581void
585
586void
590
591void
595
596void
600
603{
604 pappso::PeptideSp peptide_sp;
605 if(m_currentPsmProforma.isEmpty())
606 {
607 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPsmProforma is empty"));
608 }
609 else
610 {
612 }
613 return peptide_sp;
614}
615
618{
619 if(m_currentPeaklistFile.name.isEmpty())
620 {
621 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPeaklistFile is empty"));
622 }
623 if(m_cborScanId.isEmpty())
624 {
625 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanId is empty"));
626 }
627 if(m_cborScanPrecursor.isEmpty())
628 {
629 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanPrecursor is empty"));
630 }
631 if(m_cborScanMs2.isEmpty())
632 {
633 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanMs2 is empty"));
634 }
635
636 if(!m_cborScanId.keys().contains("index"))
637 {
638 throw pappso::PappsoException("There is no scan index");
639 }
640 if(!m_cborScanMs2.keys().contains("mz"))
641 {
642 throw pappso::PappsoException("There is no ms2 mz values");
643 }
644 if(!m_cborScanMs2.keys().contains("intensity"))
645 {
646 throw pappso::PappsoException("There is no ms2 intensity values");
647 }
650 pappso::MsRunIdCstSPtr msrun_id_sp = std::make_shared<const pappso::MsRunId>(msrun_id);
651 pappso::MassSpectrumId ms_id(msrun_id_sp);
652 ms_id.setSpectrumIndex(m_cborScanId.value("index").toInteger());
653
654 // native_id
655 if(m_cborScanId.keys().contains("native_id"))
656 {
657 ms_id.setNativeId(m_cborScanId.value("native_id").toString());
658 }
659
660 std::vector<DataPoint> data_point_vector;
661 std::size_t i = 0;
662 for(auto cbor_mz_value : m_cborScanMs2.value("mz").toArray())
663 {
664 data_point_vector.push_back(
665 {cbor_mz_value.toDouble(), m_cborScanMs2.value("intensity").toArray().at(i).toDouble()});
666 i++;
667 }
668
669
670 MassSpectrum mass_spectrum(data_point_vector);
671 pappso::PrecursorIonData precursor_ion_data;
672
673 pappso::QualifiedMassSpectrum qualified_mass_spectrum(ms_id);
674 qualified_mass_spectrum.setMassSpectrumSPtr(mass_spectrum.makeMassSpectrumSPtr());
675 qualified_mass_spectrum.setMsLevel(2);
676
677 if(m_cborScanPrecursor.keys().contains("z"))
678 {
679 precursor_ion_data.charge = m_cborScanPrecursor.value("z").toInteger();
680 }
681 if(m_cborScanPrecursor.keys().contains("mz"))
682 {
683 precursor_ion_data.mz = m_cborScanPrecursor.value("mz").toDouble();
684 }
685 if(m_cborScanPrecursor.keys().contains("intensity"))
686 {
687 precursor_ion_data.intensity = m_cborScanPrecursor.value("intensity").toDouble();
688 }
689 qualified_mass_spectrum.appendPrecursorIonData(precursor_ion_data);
690 if(m_cborScanMs2.keys().contains("rt"))
691 {
692 qualified_mass_spectrum.setRtInSeconds(m_cborScanMs2.value("rt").toDouble());
693 }
694
695
696 return qualified_mass_spectrum.makeQualifiedMassSpectrumSPtr();
697}
698
699void
704
705void
710
711
712double
713PsmFileReaderBase::getPrecursorMass(double mz_prec, uint charge) const
714{
715 // compute precursor mass given the charge state
716 mz_prec = mz_prec * (double)charge;
717 mz_prec -= (MHPLUS * (double)charge);
718 return mz_prec;
719}
720
721
722} // namespace psm
723} // namespace cbor
724} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setSampleName(const QString &name)
set a sample name for this MsRunId
Definition msrunid.cpp:79
static PeptideSp parseString(const QString &pepstr)
Class representing a fully specified mass spectrum.
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
QualifiedMassSpectrumSPtr makeQualifiedMassSpectrumSPtr() const
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
std::vector< PsmProteinRef > m_currentPsmProteinRefList
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
pappso::PeptideSp getCurrentPsmPeptideSp() const
virtual void sampleListStarted(pappso::UiMonitorInterface &monitor)
void writePsmFileList(CborStreamWriter &writer, const std::vector< PsmFile > &file_list)
double getPrecursorMass(double mz_prec, uint charge) const
convenient function do compute precusor ion mass
virtual void logReady(pappso::UiMonitorInterface &monitor)
virtual void scanStarted(pappso::UiMonitorInterface &monitor)
virtual void readPsm(pappso::UiMonitorInterface &monitor)
virtual void readLog(pappso::UiMonitorInterface &monitor)
virtual void proteinMapReady(pappso::UiMonitorInterface &monitor)
virtual void sampleStarted(pappso::UiMonitorInterface &monitor)
virtual void readParameterMap(pappso::UiMonitorInterface &monitor)
void readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor)
virtual void readScan(pappso::UiMonitorInterface &monitor)
virtual void readInformations(pappso::UiMonitorInterface &monitor)
virtual void scanFinished(pappso::UiMonitorInterface &monitor)
virtual void sampleListFinished(pappso::UiMonitorInterface &monitor)
virtual void psmReady(pappso::UiMonitorInterface &monitor)
virtual void informationsReady(pappso::UiMonitorInterface &monitor)
void readRoot(pappso::UiMonitorInterface &monitor)
std::vector< PsmFile > m_currentIdentificationFileList
void writePsmFile(CborStreamWriter &writer, const PsmFile &psm_file)
virtual void fastaFilesReady(pappso::UiMonitorInterface &monitor)
virtual void parameterMapReady(pappso::UiMonitorInterface &monitor)
virtual void readProteinMap(pappso::UiMonitorInterface &monitor)
virtual void readSample(pappso::UiMonitorInterface &monitor)
PsmProteinRef readPsmProteinRef(bool &is_ok)
virtual void sampleFinished(pappso::UiMonitorInterface &monitor)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
const pappso_double MHPLUS(1.007276466879)
unsigned int uint
Definition types.h:68