libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
peptideproformaparser.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/peptide/peptideproformaparser.cpp
3 * \date 27/11/2023
4 * \author Olivier Langella
5 * \brief parse peptide string in ProForma to pappso::Peptide
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2023 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of the PAPPSOms++ library.
13 *
14 * PAPPSOms++ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * PAPPSOms++ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
34
35namespace pappso
36{
37
38
39// QRegularExpression PeptideProFormaParser::_mod_parser("\\[[^\\]]*\\]");
40QRegularExpression PeptideProFormaParser::_rx_psimod("MOD:[0-9]+");
41QRegularExpression PeptideProFormaParser::_rx_modmass("[-+]?[0-9]+\\.?[0-9]*");
42
43//[MOD:01090]@C
44QRegularExpression
45 PeptideProFormaParser::m_firstGlobalMod("^<([\\[,\\],\\,,0-9,A-Z,a-z,:,@,-,^>]*)>(.*)$");
46
47void
49{
50 // Peptide
51 // peptide2("C[MOD:00397][MOD:01160]C[MOD:00397]AADDKEAC[MOD:00397]FAVEGPK");
52 // CCAADDKEACFAVEGPK
53 /*
54 <psimod position="1" accession="MOD:00397"/>
55 <psimod position="2" accession="MOD:00397"/>
56 <psimod position="10" accession="MOD:00397"/>
57 <psimod position="1" accession="MOD:01160"/>
58 */
59
60
61 QString peptide_str = pepstr;
62
63 QStringList res_split = peptide_str.split("?");
64 if(res_split.size() == 2)
65 {
66 peptide_str = res_split.at(1);
67 }
68 QRegularExpressionMatch match_global_mod = m_firstGlobalMod.match(peptide_str);
69
70 QStringList global_mod_list;
71 while(match_global_mod.hasMatch())
72 {
73 QStringList pline = match_global_mod.capturedTexts();
74 qDebug() << pline[1];
75 if(pline[1] == "13C")
76 {
77 // Carbon 13: <13C>ATPEILTVNSIGQLK
79 }
80 else if(pline[1] == "15N")
81 {
82 // Nitrogen 15: <15N>ATPEILTVNSIGQLK
84 }
85 else if(pline[1] == "D")
86 {
87 // Deuterium: <D>ATPEILTVNSIGQLK
89 }
90 else
91 {
92 //<[Oxidation]@C,M>
93 global_mod_list << pline[1];
94 }
95 peptide_str = pline[2];
96 match_global_mod = m_firstGlobalMod.match(peptide_str);
97 }
98
99
100 std::size_t i = 0;
101 std::size_t end = peptide_str.size();
103 bool in_cter = false;
104 while(i < end)
105 {
106 QChar aa_char = peptide_str[i];
107 if(aa_char == '[')
108 {
109 QString mod;
110 i++;
111 aa_char = peptide_str[i];
112 while((i < end) && (aa_char != ']'))
113 {
114 mod.append(aa_char);
115 i++;
116 if(i < end)
117 aa_char = peptide_str[i];
118 }
119
120 qDebug() << aa_char;
121 if(aa_char != ']')
122 {
124 QObject::tr("modification string is malformed %1").arg(mod));
125 }
126 // we have a mod
127 // is it a double ?
128 bool is_double = false;
129 double mass_modif = mod.toDouble(&is_double);
130 AaModificationP aamod;
131 if(is_double)
132 {
133 aamod =
134 Utils::guessAaModificationPbyMonoisotopicMassDelta(last_amino_acid, mass_modif);
135 }
136 else
137 {
138 aamod = AaModification::getInstance(mod);
139 }
140 if(peptide.m_aaVec.size() == 0)
141 {
142 if(is_double)
143 {
146 if(better_mod != nullptr)
147 {
148 aamod = better_mod;
149 }
150 }
151 peptide.setNterModification(aamod);
152 }
153 else
154 {
155 if(in_cter)
156 {
157 if(is_double)
158 {
159 AaModificationP better_mod =
161 mass_modif);
162 if(better_mod != nullptr)
163 {
164 aamod = better_mod;
165 }
166 }
167 peptide.setCterModification(aamod);
168 }
169 else
170 {
171 peptide.m_aaVec.back().addAaModification(aamod);
172 }
173 }
174 }
175 else
176 {
177 if(aa_char.isLetter())
178 {
179 qDebug() << aa_char;
180 Aa pappso_aa(aa_char.toLatin1());
181 last_amino_acid = pappso_aa.getAminoAcidChar();
182 peptide.m_aaVec.push_back(pappso_aa);
183 }
184 else if(aa_char == '-')
185 {
186 if(peptide.m_aaVec.size() > 0)
187 in_cter = true;
188 }
189 else
190 {
192 QObject::tr("%1 is not an amino acid").arg(aa_char));
193 }
194 }
195 i++;
196 }
197
198
199 for(QString &global_label_str : global_mod_list)
200 {
201 qDebug() << global_label_str;
202
203 QRegularExpression global_label_reg("^\\[(.*)\\]@(.*)$");
204
205 QRegularExpressionMatch match_global = global_label_reg.match(global_label_str);
206
207 if(match_global.hasMatch())
208 {
209 QStringList pline = match_global.capturedTexts();
210 qDebug() << pline[1];
212 qDebug() << aamod->getAccession();
213 qDebug() << pline[2];
214 for(QString &aa_str : pline[2].split(","))
215 {
216 qDebug() << aa_str;
217 peptide.addAaModificationOnAllAminoAcid(aamod, (Enums::AminoAcidChar)aa_str[0].toLatin1());
218 }
219 }
220 }
221 // qDebug() << peptide.toProForma();
222 peptide.m_proxyMass = -1;
223 peptide.getMass();
224}
225
228{
229
230 // QMutexLocker locker(&_mutex);
231 qDebug();
232 Peptide peptide("");
234 // qDebug() << peptide.toProForma();
235 return (peptide.makePeptideSp());
236}
237
240{
241
242 // QMutexLocker locker(&_mutex);
243 Peptide peptide("");
245
246 return (peptide.makeNoConstPeptideSp());
247}
248} // namespace pappso
const Enums::AminoAcidChar & getAminoAcidChar() const
Definition aabase.cpp:409
const QString & getAccession() const
static AaModificationP getInstance(const QString &accession)
static NoConstPeptideSp parseNoConstString(const QString &pepstr)
static QRegularExpression _rx_psimod
static PeptideSp parseString(const QString &pepstr)
static QRegularExpression _rx_modmass
static void parseStringToPeptide(const QString &pepstr, Peptide &peptide)
static QRegularExpression m_firstGlobalMod
PeptideSp makePeptideSp() const
Definition peptide.cpp:158
void setNterModification(AaModificationP mod)
Definition peptide.cpp:596
NoConstPeptideSp makeNoConstPeptideSp() const
Definition peptide.cpp:164
void setGlobalModification(Enums::Isotope isotope_kind)
apply 100% isotope replacement
Definition peptide.cpp:773
void setCterModification(AaModificationP mod)
Definition peptide.cpp:611
void addAaModificationOnAllAminoAcid(AaModificationP aaModification, Enums::AminoAcidChar amino_acid)
adds a modification to all amino acid of the sequence
Definition peptide.cpp:235
pappso_double getMass()
Definition peptide.cpp:322
double m_proxyMass
Definition peptide.h:258
std::vector< Aa > m_aaVec
Definition peptide.h:254
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
Definition utils.cpp:631
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< const Peptide > PeptideSp
const AaModification * AaModificationP
std::shared_ptr< Peptide > NoConstPeptideSp
Definition peptide.h:96