libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
semiglobalalignment.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/semiglobalalignment.cpp
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief protein to spectrum alignment
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 *
19 * This program is free software: you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation, either version 3 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program. If not, see <http://www.gnu.org/licenses/>.
31 */
32
33#include <QString>
34#include <QStringRef>
35#include "semiglobalalignment.h"
36#include "types.h"
39#include "correctiontree.h"
42
43void
45{
46 peaks.clear();
47 interpretation = "";
48 score = 0;
49 begin_shift = 0.0;
50 end_shift = 0.0;
51 shifts.clear();
52 SPC = 0;
53 beginning = 0;
54 end = 0;
55}
56
57
59 const ScoreValues &score_values,
60 const pappso::PrecisionPtr precision_ptr,
61 const pappso::AaCode &aaCode)
62 : m_scorevalues(score_values), m_aaCode(aaCode)
63{
64 m_precision_ptr = precision_ptr;
65 m_interest_cells.push_back({0, 0, 0, 0});
66}
67
68void
70 const QString &protein_seq_in,
71 const QString &protein_id)
72{
73 // TODO don't forget to reset any important variable
74 m_best_alignment.reset();
77
78 std::size_t sequence_length = protein_seq_in.size();
79 QString protein_seq(protein_seq_in);
80 std::reverse(protein_seq.begin(), protein_seq.end());
82 std::vector<AaPosition> aa_positions;
83 m_interest_cells.at(0).n_row = 0;
84 m_interest_cells.at(0).score = 0;
85 m_interest_cells.at(0).beginning = 0;
86 m_interest_cells.at(0).tree_id = 0;
87
88 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
89 {
90 m_interest_cells.at(i).n_row = 0;
92 m_interest_cells.at(i).beginning = 0;
93 m_interest_cells.at(i).tree_id = 0;
94 }
95
96 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
97 {
98 m_interest_cells.push_back({0, m_scorevalues.get(ScoreType::init), 0, 0});
99 }
100 m_location_saver.resetLocationSaver();
101 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
102 {
103 if(protein_seq[row_number - 1] == 'U' or protein_seq[row_number - 1] == 'X')
104 {
105 continue;
106 }
107 try
108 {
109 aa = pappso::Enums::AminoAcidChar(protein_seq[row_number - 1].toLatin1());
110 }
111 catch(const pappso::PappsoException &err)
112 {
113 throw err;
114 }
115 aa_positions = spectrum.getAaPositions(aa);
116 updateAlignmentMatrix(protein_seq, row_number, aa_positions, spectrum, true, protein_id);
117 }
118}
119
120void
122 const QString &protein_seq,
123 const QString &protein_id,
124 const std::size_t beginning,
125 const std::size_t length)
126{
127 qDebug();
128 std::size_t length2;
129 if((qsizetype)(beginning + length) <= protein_seq.size())
130 {
131 length2 = length;
132 }
133 else
134 {
135 length2 = protein_seq.size() - beginning;
136 }
137
138 qDebug();
139 QString sequence = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
140 std::reverse(sequence.begin(), sequence.end());
141 std::vector<AaPosition> aa_positions;
142 CorrectionTree correction_tree;
143
144 qDebug();
145 m_scenario.reserve(length2 + 1, spectrum.size());
146 m_interest_cells.reserve(spectrum.size());
147 m_interest_cells.at(0).n_row = 0;
148 m_interest_cells.at(0).score = 0;
149 m_interest_cells.at(0).beginning = 0;
150 m_interest_cells.at(0).tree_id = 0;
151 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
152 {
153 m_interest_cells.at(i).n_row = 0;
155 m_interest_cells.at(i).beginning = 0;
156 m_interest_cells.at(i).tree_id = 0;
157 }
158 qDebug();
159 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
160 {
161 m_interest_cells.push_back({0, m_scorevalues.get(ScoreType::init), 0, 0});
162 }
163 qDebug();
164 m_scenario.resetScenario();
165 qDebug();
166 for(std::size_t row_number = 1; row_number <= length2; row_number++)
167 {
168 qDebug() << "row_number - 1=" << row_number - 1 << " sequence.size()=" << sequence.size();
169 if(sequence[row_number - 1] == 'U' or sequence[row_number - 1] == 'X')
170 {
171 continue;
172 }
173
174 qDebug() << "row_number - 1=" << row_number - 1 << " sequence.size()=" << sequence.size();
175 // aa = Aa(sequence[row_number - 1].unicode());
176 aa_positions =
177 spectrum.getAaPositions(pappso::Enums::AminoAcidChar(sequence[row_number - 1].toLatin1()));
178 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_id);
179 }
180 qDebug();
181 saveBestAlignment(sequence, spectrum, protein_seq.size() - beginning);
182
183 qDebug() << m_scenario.getBestScore() << " " << MIN_ALIGNMENT_SCORE;
184 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
185 // are generated and aligned. The best alignment is kept.
186 if(m_scenario.getBestScore() >
187 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
188 {
189 qDebug();
191 for(std::size_t iter : m_best_alignment.peaks)
192 {
193 if(iter > spectrum.getComplementaryPeak(iter))
194 {
195 break;
196 }
197 else if(std::find(m_best_alignment.peaks.begin(),
198 m_best_alignment.peaks.end(),
199 spectrum.getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
200 {
201 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
202 }
203 }
204 qDebug();
205 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
206 if(corrections.size() > 0)
207 {
208 m_best_alignment.score = 0; // Reset the best alignment score (we dont want to keep the
209 // original alignment if corrections are needed)
210 qDebug();
211 for(auto peaks_to_remove : corrections)
212 {
213 qDebug();
215 1, sequence, protein_id, spectrum, peaks_to_remove, protein_seq.size() - beginning);
216 qDebug();
217 }
218 qDebug();
220 }
221 }
222 qDebug();
223}
224
225void
227 const QString &sequence,
228 const QString &protein_id,
229 const SpOMSSpectrum &spectrum,
230 std::vector<std::size_t> peaks_to_remove,
231 std::size_t offset)
232{
233 qDebug() << recursive_call_count;
234 std::vector<AaPosition> aa_positions;
235 CorrectionTree correction_tree;
236
237 m_interest_cells.at(0).n_row = 0;
238 m_interest_cells.at(0).score = 0;
239 m_interest_cells.at(0).beginning = 0;
240 m_interest_cells.at(0).tree_id = 0;
241 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
242 {
243 m_interest_cells.at(i).n_row = 0;
245 m_interest_cells.at(i).beginning = 0;
246 m_interest_cells.at(i).tree_id = 0;
247 }
248
249 m_scenario.resetScenario();
250 qDebug();
251 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
252 {
253 qDebug() << row_number - 1 << " " << sequence.size();
254 if(sequence[row_number - 1] == 'U' or sequence[row_number - 1] == 'X')
255 {
256 continue;
257 }
259 pappso::Enums::AminoAcidChar(sequence[row_number - 1].toLatin1());
260 qDebug();
261 aa_positions = spectrum.getAaPositions(aa, peaks_to_remove);
262 qDebug();
263 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_id);
264 qDebug();
265 }
266
267 qDebug();
268 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
269 // are generated and aligned. The best alignment is kept.
270 qDebug() << m_scenario.getBestScore();
271 if(m_scenario.getBestScore() >
272 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
273 {
274 qDebug();
275 qDebug() << sequence << " " << recursive_call_count;
276 qDebug() << offset;
277 qDebug() << spectrum.getPrecursorCharge();
278 saveBestAlignment(sequence, spectrum, offset);
279 qDebug();
280 for(std::size_t iter : m_best_alignment.peaks)
281 {
282 if(iter > spectrum.getComplementaryPeak(iter))
283 {
284 break;
285 }
286 else if(std::find(m_best_alignment.peaks.begin(),
287 m_best_alignment.peaks.end(),
288 spectrum.getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
289 {
290 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
291 }
292 }
293 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
294 if(corrections.size() > 0)
295 {
296 for(auto peaks_to_remove : corrections)
297 {
299 recursive_call_count + 1, sequence, protein_id, spectrum, peaks_to_remove, offset);
300 }
301 }
302 else if(m_scenario.getBestScore() > m_best_corrected_alignment.score)
303 {
305 }
306 }
307 qDebug();
308}
309
310void
312 const QString &protein_seq,
313 const QString &protein_id,
314 std::size_t beginning,
315 std::size_t length,
316 const std::vector<double> &shifts)
317{
318 std::size_t current_SPC = m_best_alignment.SPC;
319 int current_best_score = m_best_alignment.score;
321 for(double precursor_mass_error : shifts)
322 {
323 SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
324 preciseAlign(corrected_spectrum, protein_seq, protein_id, beginning, length);
326 {
328 }
329 }
330 if(m_best_post_processed_alignment.SPC > current_SPC &&
331 m_best_post_processed_alignment.score >= current_best_score)
332 {
334 }
335}
336
337void
339 const QString &sequence,
340 const std::size_t row_number,
341 const std::vector<AaPosition> aa_positions,
342 const SpOMSSpectrum &spectrum,
343 const bool fast_align,
344 const QString &protein)
345{
346 int score_found, score_shift, best_score, alt_score, tree_id;
347 uint32_t condition; // FIXME : may be used uninitialised
348 std::size_t best_column, shift, beginning, missing_aas, length, perfect_shift_origin;
349 KeyCell *current_cell_ptr, *tested_cell_ptr;
350 AlignType alignment_type, temp_align_type;
351
352 m_updated_cells.reserve(aa_positions.size());
353
354 // Computation of the threePeaks condition, see spomsspectrum.h for more details.
355 if(fast_align)
356 {
357 condition = 3;
358 if(row_number > 1)
359 {
360 condition += 2 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
361 qDebug() << "condition" << condition << sequence[row_number - 2]
362 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
363 }
364 }
365
366 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
367 aa_position != aa_positions.end();
368 aa_position++)
369 {
370 if(((condition & aa_position->condition) != 0) ||
371 !fast_align) // Verification of the threePeaks condition (only during first alignment).
372 {
373 current_cell_ptr = &m_interest_cells.at(aa_position->r_peak);
374 if(spectrum.peakType(aa_position->r_peak) ==
376 {
377 score_found = m_scorevalues.get(ScoreType::foundDouble);
379 }
380 else
381 {
382 score_found = m_scorevalues.get(ScoreType::found);
383 score_shift = m_scorevalues.get(ScoreType::foundShift);
384 }
385
386 // not found case (always computed)
387 best_column = aa_position->r_peak;
388 best_score = current_cell_ptr->score + (row_number - current_cell_ptr->n_row) *
390 beginning = current_cell_ptr->beginning;
391 tree_id = current_cell_ptr->tree_id;
392 alignment_type = AlignType::notFound;
393
394 // found case (Can only happen if the left peak is supported)
395 if(aa_position->l_support)
396 {
397 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak);
398 if(aa_position->l_peak == 0)
399 {
400 alt_score = tested_cell_ptr->score + score_found;
401 }
402 else
403 {
404 if(tested_cell_ptr->n_row == row_number - 1)
405 {
406 alt_score = tested_cell_ptr->score +
407 (row_number - tested_cell_ptr->n_row - 1) *
409 score_found;
410 }
411 else
412 {
413 alt_score = tested_cell_ptr->score +
414 (row_number - tested_cell_ptr->n_row - 1) *
416 score_shift;
417 }
418 }
419 if(alt_score >= best_score)
420 {
421 alignment_type = AlignType::found;
422 best_score = alt_score;
423 best_column = aa_position->l_peak;
424 if(best_column == 0)
425 {
426 if(row_number < ALIGNMENT_SURPLUS)
427 {
428 beginning = 0;
429 }
430 else
431 {
432 beginning =
433 std::max((std::size_t)(row_number - ALIGNMENT_SURPLUS), (std::size_t)0);
434 }
435 if(fast_align)
436 {
437 tree_id = m_location_saver.getNextTree();
438 }
439 }
440 else
441 {
442 beginning = tested_cell_ptr->beginning;
443 tree_id = tested_cell_ptr->tree_id;
444 }
445 }
446 }
447
448 // generic shift case (all shifts are tested)
449 shift = 0;
450 while(shift < aa_position->next_l_peak)
451 {
452 tested_cell_ptr = &m_interest_cells.at(aa_position->next_l_peak - shift);
453 // verification saut parfait
454 if(perfectShiftPossible(sequence,
455 spectrum,
456 tested_cell_ptr->n_row,
457 row_number,
458 aa_position->next_l_peak - shift,
459 aa_position->r_peak))
460 {
461 alt_score = tested_cell_ptr->score +
462 (row_number - tested_cell_ptr->n_row - 1) *
464 score_found;
465 temp_align_type = AlignType::perfectShift;
466 }
467 else
468 {
469 alt_score = tested_cell_ptr->score +
470 (row_number - tested_cell_ptr->n_row - 1) *
472 score_shift;
473 temp_align_type = AlignType::shift;
474 }
475 if(alt_score > best_score)
476 {
477 alignment_type = temp_align_type;
478 best_score = alt_score;
479 best_column = aa_position->next_l_peak - shift;
480 beginning = tested_cell_ptr->beginning;
481 tree_id = tested_cell_ptr->tree_id;
482 }
483 shift++;
484 }
485
486 // case shift from column 0 (no penalties if all precedent amino acids are missed)
487 tested_cell_ptr = &m_interest_cells.at(0);
488 // verification saut parfait
489 perfect_shift_origin =
490 perfectShiftPossibleFrom0(sequence, spectrum, row_number, aa_position->r_peak);
491 if(perfect_shift_origin != row_number)
492 {
493 alt_score = tested_cell_ptr->score + score_found;
494 temp_align_type = AlignType::perfectShift;
495 }
496 else
497 {
498 alt_score = tested_cell_ptr->score + score_shift;
499 temp_align_type = AlignType::shift;
500 }
501 if(alt_score > best_score)
502 {
503 alignment_type = temp_align_type;
504 best_score = alt_score;
505 best_column = 0;
506 missing_aas =
507 std::floor(spectrum.getMZShift(0, aa_position->l_peak) / m_aaCode.getMass('G'));
508 if(row_number < ALIGNMENT_SURPLUS + missing_aas)
509 {
510 beginning = 0;
511 }
512 else
513 {
514 beginning = std::max((std::size_t)(row_number - missing_aas - ALIGNMENT_SURPLUS),
515 (std::size_t)0);
516 }
517 if(fast_align)
518 {
519 tree_id = m_location_saver.getNextTree();
520 }
521 }
522 if(best_column != aa_position->r_peak)
523 {
524 m_updated_cells.push_back(
525 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
526 }
527 if(best_score > m_location_saver.getMinScore(tree_id) && fast_align)
528 {
529 length =
530 row_number - beginning + 1 +
531 std::ceil(spectrum.getMissingMass(aa_position->r_peak) / m_aaCode.getMass('G')) +
533 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein);
534 }
535 else if(!fast_align)
536 {
537 if(alignment_type == AlignType::perfectShift && best_column == 0)
538 {
539 m_scenario.saveOrigin(row_number,
540 aa_position->r_peak,
541 perfect_shift_origin,
542 0,
543 best_score,
545 }
546 else
547 {
548 m_scenario.saveOrigin(row_number,
549 aa_position->r_peak,
550 m_interest_cells.at(best_column).n_row,
551 best_column,
552 best_score,
553 alignment_type);
554 }
555 }
556 }
557 }
558
559 // Update row number in column 0
560 m_updated_cells.push_back({0, {row_number, 0, 0, 0}});
561
562 // Save updated key cells in the matrix
563 while(m_updated_cells.size() > 0)
564 {
565 m_interest_cells.at(m_updated_cells.back().first) = m_updated_cells.back().second;
566 m_updated_cells.pop_back();
567 }
568}
569
570bool
572 const SpOMSSpectrum &spectrum,
573 const std::size_t origin_row,
574 const std::size_t current_row,
575 const std::size_t l_peak,
576 const std::size_t r_peak) const
577{
578 double missing_mass = 0;
579 for(QString::const_iterator iter = sequence.begin() + origin_row;
580 iter != sequence.begin() + current_row;
581 iter++)
582 {
583 missing_mass += m_aaCode.getMass(iter->toLatin1()); // Aa(iter->unicode()).getMass();
584 }
585 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
586 return mz_range.contains(spectrum.getMZShift(l_peak, r_peak));
587}
588
589std::size_t
591 const QString &sequence,
592 const SpOMSSpectrum &spectrum,
593 const std::size_t current_row,
594 const std::size_t r_peak) const
595{
596 std::size_t perfect_shift_origin = current_row;
597 double missing_mass = spectrum.getMZShift(0, r_peak);
598 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
599 double aa_mass = 0;
600 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.contains(aa_mass))
601 {
602 aa_mass += m_aaCode.getMass(
603 sequence.at(perfect_shift_origin - 1)
604 .toLatin1()); // Aa(sequence.at(perfect_shift_origin - 1).unicode()).getMass();
605 perfect_shift_origin--;
606 }
607 if(mz_range.contains(aa_mass))
608 {
609 return perfect_shift_origin;
610 }
611 else
612 {
613 return current_row;
614 }
615}
616
617std::size_t
619 const SpOMSSpectrum &spectrum,
620 std::size_t end_row,
621 std::size_t end_peak) const
622{
623 std::size_t perfect_shift_end = end_row + 1;
624 double missing_mass = spectrum.getMissingMass(end_peak);
625 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
626 double aa_mass = 0;
627 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
628 !mz_range.contains(aa_mass))
629 {
630 aa_mass += m_aaCode.getMass(
631 sequence.at(perfect_shift_end - 1)
632 .toLatin1()); // Aa(sequence.at(perfect_shift_end - 1).unicode()).getMass();
633 perfect_shift_end++;
634 }
635 if(mz_range.contains(aa_mass))
636 {
637 return perfect_shift_end - 1;
638 }
639 else
640 {
641 return end_row;
642 }
643}
644
648
654
660
661void
663 const SpOMSSpectrum &spectrum,
664 std::size_t offset)
665{
666 qDebug();
667 m_best_alignment.peaks.clear();
668 m_best_alignment.shifts.clear();
669 std::vector<QString> temp_interpretation;
670 std::size_t previous_row; // FIXME : may be used uninitialised
671 std::size_t previous_column = 0;
672 std::size_t perfect_shift_end;
673 std::pair<std::vector<ScenarioCell>, int> best_alignment = m_scenario.getBestAlignment();
674 m_best_alignment.score = best_alignment.second;
675 QString skipped_aa;
676 double skipped_mass;
677 // Retrieving beginning and end
678 if(best_alignment.first.front().previous_row > offset)
679 {
681 QString("best_alignment.first.front().previous_row > offset %1 %2")
682 .arg(offset)
683 .arg(best_alignment.first.front().previous_row));
684 }
685 if(best_alignment.first.back().previous_row > offset)
686 {
688 QString("best_alignment.first.back().previous_row > offset %1 %2")
689 .arg(offset)
690 .arg(best_alignment.first.back().previous_row));
691 }
692 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
693 m_best_alignment.end = offset - best_alignment.first.back().previous_row;
694
695 qDebug();
696 // Filling temp_interpretation and peaks vectors
697 for(auto cell : best_alignment.first)
698 {
699 switch(cell.alignment_type)
700 {
701 case AlignType::found:
702 temp_interpretation.push_back(sequence.at(previous_row - 1));
703 if(previous_row > cell.previous_row + 1)
704 {
705 skipped_mass = m_aaCode.getMass(
706 sequence.at(previous_row - 1)
707 .toLatin1()); // Aa(sequence.at(previous_row - 1).unicode()).getMass();
708 skipped_aa =
709 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
710 for(auto aa : skipped_aa)
711 {
712 temp_interpretation.push_back('[' + aa + ']');
713 skipped_mass += m_aaCode.getMass(aa.toLatin1()); // Aa(aa.unicode()).getMass();
714 }
715 temp_interpretation.push_back(
716 '[' +
717 QString::number(spectrum.getMZShift(cell.previous_column, previous_column) -
718 skipped_mass) +
719 ']');
720 }
721 m_best_alignment.peaks.push_back(cell.previous_column);
722 break;
724 temp_interpretation.push_back('[' + sequence.at(previous_row - 1) + ']');
725 break;
726 case AlignType::shift:
727 m_best_alignment.peaks.push_back(cell.previous_column);
728 temp_interpretation.push_back(sequence.at(previous_row - 1));
729 temp_interpretation.push_back(
730 '[' +
731 QString::number(spectrum.getMZShift(cell.previous_column, previous_column) -
732 m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1())) +
733 ']');
734 m_best_alignment.shifts.push_back(
735 spectrum.getMZShift(cell.previous_column, previous_column) -
736 m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1()));
737 break;
739 m_best_alignment.peaks.push_back(cell.previous_column);
740 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
741 std::reverse(skipped_aa.begin(), skipped_aa.end());
742 temp_interpretation.push_back(skipped_aa);
743 break;
744 case AlignType::init:
745 previous_row = cell.previous_row;
746 previous_column = cell.previous_column;
747 m_best_alignment.peaks.push_back(cell.previous_column);
748 break;
749 }
750 previous_row = cell.previous_row;
751 previous_column = cell.previous_column;
752 }
753 std::reverse(temp_interpretation.begin(), temp_interpretation.end());
754 std::reverse(m_best_alignment.peaks.begin(), m_best_alignment.peaks.end());
755
756 qDebug();
757 // Compute begin_shift and end_shift
758 MzRange zero(0, m_precision_ptr);
759 m_best_alignment.begin_shift = spectrum.getMZShift(0, m_best_alignment.peaks.front());
760 m_best_alignment.end_shift = spectrum.getMissingMass(m_best_alignment.peaks.back());
761 if(zero.contains(m_best_alignment.end_shift))
762 {
763 m_best_alignment.end_shift = 0;
764 }
765
766 qDebug();
767 // Computing SPC
768 m_best_alignment.SPC = 0;
769 for(auto peak : m_best_alignment.peaks)
770 {
771 switch(spectrum.at(peak).type)
772 {
774 qDebug() << peak << "native";
775 m_best_alignment.SPC += 1;
776 break;
778 qDebug() << peak << "both";
779 m_best_alignment.SPC += 2;
780 break;
782 qDebug() << peak << "synthetic";
783 break;
785 qDebug() << peak << "symmetric";
786 m_best_alignment.SPC += 1;
787 break;
788 }
789 }
790
791 qDebug();
792 // Final check of the end shift
793 if(m_best_alignment.end_shift > 0)
794 {
795 perfect_shift_end = perfectShiftPossibleEnd(sequence,
796 spectrum,
797 best_alignment.first.front().previous_row,
798 m_best_alignment.peaks.back());
799 if(perfect_shift_end != best_alignment.first.front().previous_row)
800 {
801 skipped_aa =
802 sequence.sliced(best_alignment.first.front().previous_row,
803 perfect_shift_end - best_alignment.first.front().previous_row);
804 for(auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
805 {
806 temp_interpretation.push_back('[' + *aa + ']');
807 }
808 m_best_alignment.beginning = offset - perfect_shift_end;
809 m_best_alignment.end_shift = 0;
810 }
811 else
812 {
814 }
815 }
816
817 qDebug();
818 // Writing final interpretation
819 m_best_alignment.interpretation = "";
820 if(m_best_alignment.end_shift > 0)
821 {
822 m_best_alignment.interpretation += '[' + QString::number(m_best_alignment.end_shift) + ']';
823 }
824 for(auto str = temp_interpretation.rbegin(); str != temp_interpretation.rend(); str++)
825 {
826 m_best_alignment.interpretation += *(str);
827 }
828 if(m_best_alignment.begin_shift > 0)
829 {
830 m_best_alignment.interpretation += '[' + QString::number(m_best_alignment.begin_shift) + ']';
831 }
832 qDebug();
833}
834
840
841std::vector<double>
843 const Alignment &alignment,
844 const QString &protein_seq)
845{
846 // qWarning() << protein_seq;
847 if(alignment.end > (std::size_t)protein_seq.size())
848 {
849 throw pappso::ExceptionOutOfRange(QString("alignment.end > protein_seq.size() %1 %2")
850 .arg(alignment.end)
851 .arg(protein_seq.size()));
852 }
853 std::vector<double> potential_mass_errors(alignment.shifts);
854 double shift = alignment.end_shift;
855 std::size_t index;
856 if(alignment.beginning > 0)
857 { // -1 on unsigned int makes it wrong
858 index = alignment.beginning - 1;
859 while(shift > 0 && index > 0)
860 {
861 potential_mass_errors.push_back(shift);
862 // qWarning() << " shift=" << shift << " index=" << index
863 // << " letter=" << protein_seq.at(index).toLatin1();
864 shift -= aa_code.getMass(
865 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
866 index--;
867 }
868 }
869
870 // qWarning() << "second";
871 shift = alignment.begin_shift;
872 index = alignment.end + 1;
873 while(shift > 0 && index < (std::size_t)protein_seq.size())
874 {
875 potential_mass_errors.push_back(shift);
876 qWarning() << " shift=" << shift << " index=" << index
877 << " letter=" << protein_seq.at(index).toLatin1();
878 shift -= aa_code.getMass(
879 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
880 index++;
881 }
882 // qWarning();
883 return potential_mass_errors;
884}
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:44
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
Definition aacode.cpp:219
bool contains(pappso_double) const
Definition mzrange.cpp:120
std::vector< std::vector< std::size_t > > getPeaks() const
void addPeaks(std::size_t peak1, std::size_t peak2)
void saveBestAlignment(const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
const Alignment & getBestAlignment() const
Returns a const ref to m_best_alignment.
void preciseAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.
Scenario getScenario() const
Returns a copy of m_scenario.
void updateAlignmentMatrix(const QString &sequence, const std::size_t row_number, const std::vector< AaPosition > aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const QString &protein)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void postProcessingAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
performs the post-processing : generates corrected spectra and align them
bool perfectShiftPossible(const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
std::size_t perfectShiftPossibleEnd(const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
static std::vector< double > getPotentialMassErrors(const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
Returns a list of the potential mass errors corresponding to the provided alignment in the provided p...
void correctAlign(int recursive_call_count, const QString &protein_seq, const QString &protein_id, const SpOMSSpectrum &spectrum, std::vector< std::size_t > peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
SemiGlobalAlignment(const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
void fastAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id)
perform the first alignment search between a protein sequence and a spectrum. The member location hea...
std::size_t perfectShiftPossibleFrom0(const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak.
LocationSaver getLocationSaver() const
Returns a copy of m_location_saver.
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
std::vector< AaPosition > & getAaPositions(pappso::Enums::AminoAcidChar aa) const
Returns the list of aa_positions for a given amino acid.
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::size_t getComplementaryPeak(std::size_t peak) const
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
@ synthetic
does not correspond to existing peak, for computational purpose
Definition types.h:85
@ both
both, the ion and the complement exists in the original spectrum
Definition types.h:83
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
Definition types.h:81
const uint ALIGNMENT_SURPLUS(5)
const int MIN_ALIGNMENT_SCORE(15)
const PrecisionBase * PrecisionPtr
Definition precision.h:122
void reset()
reinitialize to default score_values
std::vector< std::size_t > peaks