libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
chemicalformula.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/amino_acid/chemicalformula.cpp
3 * \date 06/04/2025
4 * \author Olivier Langella
5 * \brief structure to describe chemical formula
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of the PAPPSOms++ library.
13 *
14 * PAPPSOms++ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * PAPPSOms++ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29#include <QRegularExpression>
33
34
35const QString
37{
38 QString atom;
39 switch(isotope)
40 {
42 atom = "C";
43 break;
45 atom = "(13)C";
46 break;
48 atom = "H";
49 break;
51 atom = "(2)H";
52 break;
54 atom = "N";
55 break;
57 atom = "(15)N";
58 break;
60 atom = "O";
61 break;
63 atom = "(17)O";
64 break;
66 atom = "(18)O";
67 break;
69 atom = "P";
70 break;
72 atom = "S";
73 break;
75 atom = "(33)S";
76 break;
78 atom = "(34)S";
79 break;
81 atom = "(36)S";
82 break;
83 }
84 return QString("%1 %2").arg(atom).arg(count);
85}
86
87
91
95
100
101
103{
104 qDebug();
105 std::int16_t count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::C13);
106 std::int16_t count = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::C) - count_iso;
107 qDebug() << count;
108 if(count > 0)
109 {
110 m_isotopeVector.push_back({Enums::Isotope::C, count});
111 }
112
113 if(count_iso > 0)
114 {
115 m_isotopeVector.push_back({Enums::Isotope::C13, count_iso});
116 }
117
118 count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::H2);
119 count = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::H) - count_iso;
120 if(count > 0)
121 {
122 m_isotopeVector.push_back({Enums::Isotope::H, count});
123 }
124
125 if(count_iso > 0)
126 {
127 m_isotopeVector.push_back({Enums::Isotope::H2, count_iso});
128 }
129
130 qDebug();
131 count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::O17);
132 std::int16_t count_iso2 = atom_interface.getNumberOfIsotope(Enums::Isotope::O18);
133 count = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::O) - count_iso - count_iso2;
134 if(count > 0)
135 {
136 m_isotopeVector.push_back({Enums::Isotope::O, count});
137 }
138
139 if(count_iso > 0)
140 {
141 m_isotopeVector.push_back({Enums::Isotope::O17, count_iso});
142 }
143
144 if(count_iso2 > 0)
145 {
146 m_isotopeVector.push_back({Enums::Isotope::O18, count_iso2});
147 }
148
149 qDebug();
150
151 count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::N15);
152 count = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::N) - count_iso;
153 if(count > 0)
154 {
155 m_isotopeVector.push_back({Enums::Isotope::N, count});
156 }
157
158 if(count_iso > 0)
159 {
160 m_isotopeVector.push_back({Enums::Isotope::N15, count_iso});
161 }
162
163 qDebug();
164
165 count = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::S);
166 count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::S33);
167 if(count_iso > 0)
168 {
169 m_isotopeVector.push_back({Enums::Isotope::S33, count_iso});
170 }
171 count = count - count_iso;
172 qDebug();
173
174 count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::S34);
175 if(count_iso > 0)
176 {
177 m_isotopeVector.push_back({Enums::Isotope::S34, count_iso});
178 }
179 count = count - count_iso;
180 qDebug();
181
182 count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::S36);
183 if(count_iso > 0)
184 {
185 m_isotopeVector.push_back({Enums::Isotope::S36, count_iso});
186 }
187 count = count - count_iso;
188
189 if(count > 0)
190 {
191 m_isotopeVector.push_back({Enums::Isotope::S, count});
192 }
193 qDebug();
194 count = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::P);
195 if(count > 0)
196 {
197 m_isotopeVector.push_back({Enums::Isotope::P, count});
198 }
199
200 qDebug() << m_isotopeVector.size();
201}
202
209
212{
213 ChemicalFormula new_formula;
214
215 for(const IsotopeCount &isotope_count : m_isotopeVector)
216 {
217 std::int16_t count = isotope_count.count * -1;
218 new_formula.m_isotopeVector.push_back({isotope_count.isotope, count});
219 }
220
221 return new_formula;
222}
223
226{
227 ChemicalFormula new_formula(*this);
228
229 new_formula.m_isotopeVector.insert(new_formula.m_isotopeVector.end(),
230 to_add.m_isotopeVector.begin(),
231 to_add.m_isotopeVector.end());
232
233 new_formula.simplify();
234
235 return new_formula;
236}
237
238
239int
241{
242 int count = 0;
243 for(auto &isotope_count : m_isotopeVector)
244 {
245 switch(atom)
246 {
248 if((isotope_count.isotope == Enums::Isotope::C) ||
249 (isotope_count.isotope == Enums::Isotope::C13))
250 count += isotope_count.count;
251 break;
252
254 if((isotope_count.isotope == Enums::Isotope::H) ||
255 (isotope_count.isotope == Enums::Isotope::H2))
256 count += isotope_count.count;
257 break;
259 if((isotope_count.isotope == Enums::Isotope::O) ||
260 (isotope_count.isotope == Enums::Isotope::O17) ||
261 (isotope_count.isotope == Enums::Isotope::O18))
262 count += isotope_count.count;
263 break;
265 if((isotope_count.isotope == Enums::Isotope::N) ||
266 (isotope_count.isotope == Enums::Isotope::N15))
267 count += isotope_count.count;
268 break;
270 if((isotope_count.isotope == Enums::Isotope::S) ||
271 (isotope_count.isotope == Enums::Isotope::S33) ||
272 (isotope_count.isotope == Enums::Isotope::S34) ||
273 (isotope_count.isotope == Enums::Isotope::S36))
274 count += isotope_count.count;
275 break;
276 default:
277 break;
278 }
279 }
280 return count;
281}
282
283int
285{
286 int count = 0;
287 for(auto &isotope_count : m_isotopeVector)
288 {
289 if(isotope_count.isotope == isotope)
290 count += isotope_count.count;
291 }
292 return count;
293}
294
295double
297{
298
299 double mass = 0;
300 for(auto &isotope_count : m_isotopeVector)
301 {
302 switch(isotope_count.isotope)
303 {
305 mass += ((DIFFC12C13 + MASSCARBON) * (double)isotope_count.count);
306 break;
308 mass += (MASSCARBON * (double)isotope_count.count);
309 break;
311 mass += (MPROTIUM * (double)isotope_count.count);
312 break;
314 mass += ((DIFFH1H2 + MPROTIUM) * (double)isotope_count.count);
315 break;
317 mass += (MASSNITROGEN * (double)isotope_count.count);
318 break;
320 mass += ((MASSNITROGEN + DIFFN14N15) * (double)isotope_count.count);
321 break;
323 mass += (MASSOXYGEN * (double)isotope_count.count);
324 break;
326 mass += ((DIFFO16O17 + MASSOXYGEN) * (double)isotope_count.count);
327 break;
329 mass += ((DIFFO16O18 + MASSOXYGEN) * (double)isotope_count.count);
330 break;
332 mass += (MASSSULFUR * (double)isotope_count.count);
333 break;
335 mass += ((DIFFS32S33 + MASSSULFUR) * (double)isotope_count.count);
336 break;
338 mass += ((DIFFS32S34 + MASSSULFUR) * (double)isotope_count.count);
339 break;
341 mass += ((DIFFS32S36 + MASSSULFUR) * (double)isotope_count.count);
342 break;
344 mass += (MASSPHOSPHORUS * (double)isotope_count.count);
345 break;
346 }
347 }
348 return mass;
349}
350
351void
353{
354 m_isotopeVector.clear();
355
356 if(term.isUnimod())
357 {
358 qDebug();
360 }
361 else
362 {
363 qDebug() << "is_a " << term.m_isA.join(" ");
364 if(term.isA("MOD:01441"))
365 {
366 qDebug() << "term.isA(MOD:01441)";
367 if(term.m_origin.isEmpty())
368 {
370 QObject::tr("origin not found for term : [%1]").arg(term.getAccession()));
371 }
373 qDebug();
374 // new_mod->m_mass = AaBase::getAaMass(term.m_origin[0].toLatin1());
375 }
376 else
377 {
379 }
380 }
381 qDebug();
382}
383
384void
386{
387 // C(-6) 13C(6) N(-2) 15N(2)
388
389 // atom alone
390 QRegularExpression rx("(^|\\s)([C,H,O,N,S,P])($|\\s)");
391 QRegularExpressionMatchIterator i = rx.globalMatch(diff_formula);
392
393 while(i.hasNext())
394 {
395 QRegularExpressionMatch match = i.next();
396
397 qDebug() << match.captured(2) << " " << match.captured(2);
398 std::int8_t count = 1;
399
400 qDebug() << count;
401 if(match.captured(2) == "C")
402 {
403 m_isotopeVector.push_back({Enums::Isotope::C, count});
404 }
405 else if(match.captured(2) == "H")
406 {
407 m_isotopeVector.push_back({Enums::Isotope::H, count});
408 }
409 else if(match.captured(2) == "N")
410 {
411 m_isotopeVector.push_back({Enums::Isotope::N, count});
412 }
413 else if(match.captured(2) == "O")
414 {
415 m_isotopeVector.push_back({Enums::Isotope::O, count});
416 }
417 else if(match.captured(2) == "S")
418 {
419 m_isotopeVector.push_back({Enums::Isotope::S, count});
420 }
421 else if(match.captured(2) == "P")
422 {
423 m_isotopeVector.push_back({Enums::Isotope::P, count});
424 }
425 }
426
427 // C(-6) 13C(6) N(-2) 15N(2)
428 rx.setPattern("(^|\\s)([C,H,O,N,S,P])\\(([-]{0,1}\\d+)\\)");
429 i = rx.globalMatch(diff_formula);
430
431 while(i.hasNext())
432 {
433 QRegularExpressionMatch match = i.next();
434
435 qDebug() << match.captured(2) << " " << match.captured(3);
436 std::int8_t count = match.captured(3).toInt();
437
438 qDebug() << count;
439 if(match.captured(2) == "C")
440 {
441 m_isotopeVector.push_back({Enums::Isotope::C, count});
442 }
443 else if(match.captured(2) == "H")
444 {
445 m_isotopeVector.push_back({Enums::Isotope::H, count});
446 }
447 else if(match.captured(2) == "N")
448 {
449 m_isotopeVector.push_back({Enums::Isotope::N, count});
450 }
451 else if(match.captured(2) == "O")
452 {
453 m_isotopeVector.push_back({Enums::Isotope::O, count});
454 }
455 else if(match.captured(2) == "S")
456 {
457 m_isotopeVector.push_back({Enums::Isotope::S, count});
458 }
459 else if(match.captured(2) == "P")
460 {
461 m_isotopeVector.push_back({Enums::Isotope::P, count});
462 }
463 }
464
465 // C(-6) 13C(6) N(-2) 15N(2)
466 // look for isotopes :
467 rx.setPattern("(^|\\s)(\\d+)([C,H,O,N,S])($|\\s)");
468
469 i = rx.globalMatch(diff_formula);
470
471 while(i.hasNext())
472 {
473 QRegularExpressionMatch match = i.next();
474
475 qDebug() << match.captured(2) << " " << match.captured(3);
476
477 addIsotopeNumberCount(match.captured(3), match.captured(2), 1);
478 }
479 // C(-6) 13C(6) N(-2) 15N(2)
480 // look for isotopes :
481 rx.setPattern("(^|\\s)(\\d+)([C,H,O,N,S])\\(([-]{0,1}\\d+)\\)");
482
483 i = rx.globalMatch(diff_formula);
484
485 while(i.hasNext())
486 {
487 QRegularExpressionMatch match = i.next();
488
489 qDebug() << match.captured(2) << " " << match.captured(3) << " " << match.captured(4);
490
491 std::int8_t number_of_isotopes = match.captured(4).toInt();
492
493 addIsotopeNumberCount(match.captured(3), match.captured(2), number_of_isotopes);
494 }
495}
496
497void
499 const QString &atom_isotope_number,
500 std::int8_t count)
501{
502
503 if(atom_str == "C")
504 {
505 if(atom_isotope_number == "12")
506 {
507 m_isotopeVector.push_back({Enums::Isotope::C, count});
508 }
509 else if(atom_isotope_number == "13")
510 {
511 m_isotopeVector.push_back({Enums::Isotope::C13, count});
512 }
513 }
514 else if(atom_str == "H")
515 {
516 if(atom_isotope_number == "1")
517 {
518 m_isotopeVector.push_back({Enums::Isotope::H, count});
519 }
520 else if(atom_isotope_number == "2")
521 {
522 m_isotopeVector.push_back({Enums::Isotope::H2, count});
523 }
524 }
525 else if(atom_str == "N")
526 {
527
528 if(atom_isotope_number == "14")
529 {
530 m_isotopeVector.push_back({Enums::Isotope::N, count});
531 }
532 else if(atom_isotope_number == "15")
533 {
534 m_isotopeVector.push_back({Enums::Isotope::N15, count});
535 }
536 }
537 else if(atom_str == "O")
538 {
539 if(atom_isotope_number == "16")
540 {
541 m_isotopeVector.push_back({Enums::Isotope::O, count});
542 }
543 else if(atom_isotope_number == "17")
544 {
545 m_isotopeVector.push_back({Enums::Isotope::O17, count});
546 }
547 else if(atom_isotope_number == "18")
548 {
549 m_isotopeVector.push_back({Enums::Isotope::O18, count});
550 }
551 }
552 else if(atom_str == "S")
553 {
554 if(atom_isotope_number == "32")
555 {
556 m_isotopeVector.push_back({Enums::Isotope::S, count});
557 }
558 else if(atom_isotope_number == "33")
559 {
560 m_isotopeVector.push_back({Enums::Isotope::S33, count});
561 }
562 else if(atom_isotope_number == "34")
563 {
564 m_isotopeVector.push_back({Enums::Isotope::S34, count});
565 }
566 else if(atom_isotope_number == "36")
567 {
568 m_isotopeVector.push_back({Enums::Isotope::S36, count});
569 }
570 }
571 else
572 {
573 // not known
574
575 throw pappso::ExceptionNotFound(QObject::tr("atom string: [%1] with atom number %2 not found")
576 .arg(atom_str)
577 .arg(atom_isotope_number));
578 }
579}
580
581
582void
584{
585 QRegularExpression rx("(^|\\s)([C,H,O,N,S,P])\\s([-]{0,1}\\d+)");
586 QRegularExpressionMatchIterator i = rx.globalMatch(diff_formula);
587
588 while(i.hasNext())
589 {
590 QRegularExpressionMatch match = i.next();
591
592 qDebug() << match.captured(2) << " " << match.captured(2) << " " << match.captured(3);
593 std::int8_t count = match.captured(3).toInt();
594
595 qDebug() << count;
596 if(match.captured(2) == "C")
597 {
598 m_isotopeVector.push_back({Enums::Isotope::C, count});
599 }
600 else if(match.captured(2) == "H")
601 {
602 m_isotopeVector.push_back({Enums::Isotope::H, count});
603 }
604 else if(match.captured(2) == "N")
605 {
606 m_isotopeVector.push_back({Enums::Isotope::N, count});
607 }
608 else if(match.captured(2) == "O")
609 {
610 m_isotopeVector.push_back({Enums::Isotope::O, count});
611 }
612 else if(match.captured(2) == "S")
613 {
614 m_isotopeVector.push_back({Enums::Isotope::S, count});
615 }
616 else if(match.captured(2) == "P")
617 {
618 m_isotopeVector.push_back({Enums::Isotope::P, count});
619 }
620 }
621
622 // look for isotopes :
623 rx.setPattern("\\((\\d+)\\)([C,H,O,N,S])\\s([-]{0,1}\\d+)");
624
625 i = rx.globalMatch(diff_formula);
626
627 while(i.hasNext())
628 {
629 QRegularExpressionMatch match = i.next();
630
631 qDebug() << match.captured(1) << " " << match.captured(2) << " " << match.captured(3);
632
633 std::int8_t number_of_isotopes = match.captured(3).toInt();
634
635
636 addIsotopeNumberCount(match.captured(2), match.captured(1), number_of_isotopes);
637 }
638}
639
640void
642{
643 bool added = false;
644 for(IsotopeCount &iso_count : m_isotopeVector)
645 {
646 if(iso_count.isotope == isotope_count.isotope)
647 {
648 added = true;
649 iso_count.count += isotope_count.count;
650 }
651 }
652
653 if(!added)
654 m_isotopeVector.push_back(isotope_count);
655}
656
657
658const QString
660{
661 QStringList str_formula;
662 for(const IsotopeCount &iso_count : m_isotopeVector)
663 {
664 if(iso_count.count != 0)
665 str_formula << iso_count.toString();
666 }
667 str_formula.sort();
668 return str_formula.join(" ");
669}
670
671void
673{
674 IsotopeCount *isotope_ref_p = nullptr;
675 std::int16_t cumul_count = 0;
676 for(IsotopeCount &iso_count : m_isotopeVector)
677 {
678 if(isotope == Enums::Isotope::C13)
679 {
680 if(iso_count.isotope == Enums::Isotope::C)
681 {
682 cumul_count += iso_count.count;
683 iso_count.count = 0;
684 }
685 if(iso_count.isotope == Enums::Isotope::C13)
686 {
687 cumul_count += iso_count.count;
688 isotope_ref_p = &iso_count;
689 }
690 }
691 else if(isotope == Enums::Isotope::N15)
692 {
693 if(iso_count.isotope == Enums::Isotope::N)
694 {
695 cumul_count += iso_count.count;
696 iso_count.count = 0;
697 }
698 if(iso_count.isotope == Enums::Isotope::N15)
699 {
700 cumul_count += iso_count.count;
701 isotope_ref_p = &iso_count;
702 }
703 }
704 else if(isotope == Enums::Isotope::H2)
705 {
706 if(iso_count.isotope == Enums::Isotope::H)
707 {
708 cumul_count += iso_count.count;
709 iso_count.count = 0;
710 }
711 if(iso_count.isotope == Enums::Isotope::H2)
712 {
713 cumul_count += iso_count.count;
714 isotope_ref_p = &iso_count;
715 }
716 }
717 }
718 if(isotope_ref_p == nullptr)
719 {
720 if(isotope == Enums::Isotope::C13)
721 {
722 m_isotopeVector.push_back({Enums::Isotope::C13, cumul_count});
723 }
724 else if(isotope == Enums::Isotope::N15)
725 {
726 m_isotopeVector.push_back({Enums::Isotope::N15, cumul_count});
727 }
728 else if(isotope == Enums::Isotope::H2)
729 {
730 m_isotopeVector.push_back({Enums::Isotope::H2, cumul_count});
731 }
732 }
733 else
734 {
735 isotope_ref_p->count = cumul_count;
736 }
737}
738
739
740void
742{
743 std::vector<IsotopeCount> new_vector;
744 std::sort(m_isotopeVector.begin(), m_isotopeVector.end(), [](IsotopeCount &a, IsotopeCount &b) {
745 return a.isotope < b.isotope;
746 });
747
748 auto it = m_isotopeVector.begin();
749 while(it != m_isotopeVector.end())
750 {
751 IsotopeCount current_element = *it;
752 it++;
753 if(it != m_isotopeVector.end())
754 {
755 if(it->isotope != current_element.isotope)
756 {
757 new_vector.push_back(current_element);
758 }
759 else
760 {
761 while((it != m_isotopeVector.end()) && (it->isotope == current_element.isotope))
762 {
763 current_element.count += it->count;
764 it++;
765 }
766 if(current_element.count != 0)
767 {
768 new_vector.push_back(current_element);
769 }
770 }
771 }
772 }
773 m_isotopeVector = new_vector;
774}
virtual int getNumberOfIsotope(Enums::Isotope isotope) const =0
get the number of isotopes C13, H2, O17, O18, N15, S33, S34, S36 in the molecule
virtual int getNumberOfAtom(Enums::AtomIsotopeSurvey atom) const =0
get the number of atom C, O, N, H in the molecule
int getNumberOfIsotope(Enums::Isotope isotope) const override
get the number of isotopes C13, H2, O17, O18, N15, S33, S34, S36 in the molecule
void simplify()
simplify chemical formula sort, group isotope, remove 0
const QString toString() const
pappso::ChemicalFormula operator-() const
void setOboPsiModTerm(const OboPsiModTerm &term)
get formula from an Obo term
void addIsotopeNumberCount(const QString &atom_str, const QString &atom_isotope_number, std::int8_t count)
void setPsimodDiffFormula(const QString &diff_formula)
std::vector< IsotopeCount > m_isotopeVector
void setFullIsotope(Enums::Isotope isotope)
set full isotope labels
pappso::ChemicalFormula & operator=(const pappso::ChemicalFormula &other)
pappso::ChemicalFormula operator+(const pappso::ChemicalFormula &to_add) const
int getNumberOfAtom(Enums::AtomIsotopeSurvey atom) const override
get the number of atom C, O, N, H in the molecule
void addIsotopeCount(const IsotopeCount &isotope_count)
add an atom or isotope to the forumla
void setUnimodDiffFormula(const QString &diff_formula)
bool isA(const QString &accession) const
tells if this term "is_a" another accession
const QString & getAccession() const
const pappso_double DIFFS32S33(32.9714589101 - MASSSULFUR)
const pappso_double DIFFS32S34(33.9678670300 - MASSSULFUR)
const pappso_double DIFFO16O17(16.99913150 - MASSOXYGEN)
const pappso_double MASSCARBON(12)
const pappso_double MASSPHOSPHORUS(30.973761998)
const pappso_double MASSSULFUR(31.9720711741)
const pappso_double DIFFS32S36(35.9670812000 - MASSSULFUR)
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSNITROGEN(14.0030740048)
const pappso_double MASSOXYGEN(15.99491461956)
const pappso_double DIFFO16O18(17.9991610 - MASSOXYGEN)
const pappso_double DIFFN14N15(15.0001088982 - MASSNITROGEN)
const pappso_double DIFFC12C13(1.0033548378)
const pappso_double DIFFH1H2(2.0141017778 - MPROTIUM)
const QString toString() const
Enums::Isotope isotope