libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::specpeptidoms::SemiGlobalAlignment Class Reference

#include <semiglobalalignment.h>

Public Member Functions

 SemiGlobalAlignment (const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
 
 ~SemiGlobalAlignment ()
 
void fastAlign (const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id)
 perform the first alignment search between a protein sequence and a spectrum. The member location heap is filled with the candidates locations.
 
void preciseAlign (const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, const std::size_t beginning, const std::size_t length)
 performs the second alignment search between a protein subsequence and a spectrum.
 
void postProcessingAlign (const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
 performs the post-processing : generates corrected spectra and align them
 
LocationSaver getLocationSaver () const
 Returns a copy of m_location_saver.
 
Scenario getScenario () const
 Returns a copy of m_scenario.
 
const AlignmentgetBestAlignment () const
 Returns a const ref to m_best_alignment.
 

Static Public Member Functions

static std::vector< double > getPotentialMassErrors (const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
 Returns a list of the potential mass errors corresponding to the provided alignment in the provided protein sequence.
 

Private Member Functions

void saveBestAlignment (const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
 Stores the best alignment from m_scenario in m_best_alignment.
 
void correctAlign (int recursive_call_count, const QString &protein_seq, const QString &protein_id, const SpOMSSpectrum &spectrum, std::vector< std::size_t > peaks_to_remove, std::size_t offset)
 Recursively performs the correction of the alignment.
 
void updateAlignmentMatrix (const QString &sequence, const std::size_t row_number, const std::vector< AaPosition > aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const QString &protein)
 updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenario.
 
bool perfectShiftPossible (const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
 indicates if a perfect shift is possible between the provided positions
 
std::size_t perfectShiftPossibleFrom0 (const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
 indicates if a perfect shift is possible from the spectrum beginning to the provided peak.
 
std::size_t perfectShiftPossibleEnd (const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
 indicates if a perfect shift is possible between the provided positions
 

Private Attributes

std::vector< KeyCellm_interest_cells
 
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
 
const ScoreValuesm_scorevalues
 
const int min_score = 15
 
pappso::PrecisionPtr m_precision_ptr
 
const AaCodem_aaCode
 
LocationSaver m_location_saver
 
Scenario m_scenario
 
Alignment m_best_alignment
 
Alignment m_best_corrected_alignment
 
Alignment m_best_post_processed_alignment
 

Detailed Description

Definition at line 73 of file semiglobalalignment.h.

Constructor & Destructor Documentation

◆ SemiGlobalAlignment()

pappso::specpeptidoms::SemiGlobalAlignment::SemiGlobalAlignment ( const ScoreValues & score_values,
const pappso::PrecisionPtr precision_ptr,
const AaCode & aaCode )

Default constructor

Definition at line 58 of file semiglobalalignment.cpp.

62 : m_scorevalues(score_values), m_aaCode(aaCode)
63{
64 m_precision_ptr = precision_ptr;
65 m_interest_cells.push_back({0, 0, 0, 0});
66}

References m_aaCode, m_interest_cells, m_precision_ptr, and m_scorevalues.

◆ ~SemiGlobalAlignment()

pappso::specpeptidoms::SemiGlobalAlignment::~SemiGlobalAlignment ( )

Destructor

Definition at line 645 of file semiglobalalignment.cpp.

646{
647}

Member Function Documentation

◆ correctAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::correctAlign ( int recursive_call_count,
const QString & protein_seq,
const QString & protein_id,
const SpOMSSpectrum & spectrum,
std::vector< std::size_t > peaks_to_remove,
std::size_t offset )
private

Recursively performs the correction of the alignment.

Parameters
protein_seqProtein sequence to align.
protein_idID of the protein to align.
spectrumSpectrum to align.
peaks_to_removePeaks to remove from the spectrum.
offsetSize of the protein sequence minus beginning of the alignment. Used to compute the position of the alignment in the protein sequence.

Definition at line 226 of file semiglobalalignment.cpp.

232{
233 qDebug() << recursive_call_count;
234 std::vector<AaPosition> aa_positions;
235 CorrectionTree correction_tree;
236
237 m_interest_cells.at(0).n_row = 0;
238 m_interest_cells.at(0).score = 0;
239 m_interest_cells.at(0).beginning = 0;
240 m_interest_cells.at(0).tree_id = 0;
241 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
242 {
243 m_interest_cells.at(i).n_row = 0;
245 m_interest_cells.at(i).beginning = 0;
246 m_interest_cells.at(i).tree_id = 0;
247 }
248
249 m_scenario.resetScenario();
250 qDebug();
251 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
252 {
253 qDebug() << row_number - 1 << " " << sequence.size();
254 if(sequence[row_number - 1] == 'U' or sequence[row_number - 1] == 'X')
255 {
256 continue;
257 }
259 pappso::Enums::AminoAcidChar(sequence[row_number - 1].toLatin1());
260 qDebug();
261 aa_positions = spectrum.getAaPositions(aa, peaks_to_remove);
262 qDebug();
263 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_id);
264 qDebug();
265 }
266
267 qDebug();
268 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
269 // are generated and aligned. The best alignment is kept.
270 qDebug() << m_scenario.getBestScore();
271 if(m_scenario.getBestScore() >
272 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
273 {
274 qDebug();
275 qDebug() << sequence << " " << recursive_call_count;
276 qDebug() << offset;
277 qDebug() << spectrum.getPrecursorCharge();
278 saveBestAlignment(sequence, spectrum, offset);
279 qDebug();
280 for(std::size_t iter : m_best_alignment.peaks)
281 {
282 if(iter > spectrum.getComplementaryPeak(iter))
283 {
284 break;
285 }
286 else if(std::find(m_best_alignment.peaks.begin(),
287 m_best_alignment.peaks.end(),
288 spectrum.getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
289 {
290 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
291 }
292 }
293 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
294 if(corrections.size() > 0)
295 {
296 for(auto peaks_to_remove : corrections)
297 {
299 recursive_call_count + 1, sequence, protein_id, spectrum, peaks_to_remove, offset);
300 }
301 }
302 else if(m_scenario.getBestScore() > m_best_corrected_alignment.score)
303 {
305 }
306 }
307 qDebug();
308}
void saveBestAlignment(const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
void updateAlignmentMatrix(const QString &sequence, const std::size_t row_number, const std::vector< AaPosition > aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const QString &protein)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void correctAlign(int recursive_call_count, const QString &protein_seq, const QString &protein_id, const SpOMSSpectrum &spectrum, std::vector< std::size_t > peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
@ aa
best possible : more than one direct MS2 fragmentation in same MSRUN
Definition types.h:45
const int MIN_ALIGNMENT_SCORE(15)

References pappso::specpeptidoms::CorrectionTree::addPeaks(), correctAlign(), pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), pappso::specpeptidoms::SpOMSSpectrum::getComplementaryPeak(), pappso::specpeptidoms::CorrectionTree::getPeaks(), pappso::specpeptidoms::SpOMSSpectrum::getPrecursorCharge(), pappso::specpeptidoms::init, m_best_alignment, m_best_corrected_alignment, m_interest_cells, m_scenario, m_scorevalues, pappso::specpeptidoms::MIN_ALIGNMENT_SCORE(), saveBestAlignment(), and updateAlignmentMatrix().

Referenced by correctAlign(), and preciseAlign().

◆ fastAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::fastAlign ( const SpOMSSpectrum & spectrum,
const QString & protein_seq,
const QString & protein_id )

perform the first alignment search between a protein sequence and a spectrum. The member location heap is filled with the candidates locations.

Parameters
spectrumSpectrum to align
protein_seqProtein sequence to align.
protein_idID of the protein to align.

Definition at line 69 of file semiglobalalignment.cpp.

72{
73 // TODO don't forget to reset any important variable
74 m_best_alignment.reset();
77
78 std::size_t sequence_length = protein_seq_in.size();
79 QString protein_seq(protein_seq_in);
80 std::reverse(protein_seq.begin(), protein_seq.end());
82 std::vector<AaPosition> aa_positions;
83 m_interest_cells.at(0).n_row = 0;
84 m_interest_cells.at(0).score = 0;
85 m_interest_cells.at(0).beginning = 0;
86 m_interest_cells.at(0).tree_id = 0;
87
88 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
89 {
90 m_interest_cells.at(i).n_row = 0;
92 m_interest_cells.at(i).beginning = 0;
93 m_interest_cells.at(i).tree_id = 0;
94 }
95
96 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
97 {
98 m_interest_cells.push_back({0, m_scorevalues.get(ScoreType::init), 0, 0});
99 }
100 m_location_saver.resetLocationSaver();
101 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
102 {
103 if(protein_seq[row_number - 1] == 'U' or protein_seq[row_number - 1] == 'X')
104 {
105 continue;
106 }
107 try
108 {
109 aa = pappso::Enums::AminoAcidChar(protein_seq[row_number - 1].toLatin1());
110 }
111 catch(const pappso::PappsoException &err)
112 {
113 throw err;
114 }
115 aa_positions = spectrum.getAaPositions(aa);
116 updateAlignmentMatrix(protein_seq, row_number, aa_positions, spectrum, true, protein_id);
117 }
118}

References pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), pappso::specpeptidoms::init, m_best_alignment, m_best_corrected_alignment, m_best_post_processed_alignment, m_interest_cells, m_location_saver, m_scorevalues, and updateAlignmentMatrix().

◆ getBestAlignment()

const pappso::specpeptidoms::Alignment & pappso::specpeptidoms::SemiGlobalAlignment::getBestAlignment ( ) const

Returns a const ref to m_best_alignment.

Definition at line 836 of file semiglobalalignment.cpp.

837{
838 return m_best_alignment;
839}

References m_best_alignment.

◆ getLocationSaver()

pappso::specpeptidoms::LocationSaver pappso::specpeptidoms::SemiGlobalAlignment::getLocationSaver ( ) const

Returns a copy of m_location_saver.

Definition at line 650 of file semiglobalalignment.cpp.

651{
652 return m_location_saver;
653}

References m_location_saver.

◆ getPotentialMassErrors()

std::vector< double > pappso::specpeptidoms::SemiGlobalAlignment::getPotentialMassErrors ( const pappso::AaCode & aa_code,
const Alignment & alignment,
const QString & protein_seq )
static

Returns a list of the potential mass errors corresponding to the provided alignment in the provided protein sequence.

Parameters
aa_codethe amino acid code of reference to get aminon acid masses
alignmentAlignment for which to get the potential mass errors.
protein_seqProtein sequence corresponding to the provided alignment.

Definition at line 842 of file semiglobalalignment.cpp.

845{
846 // qWarning() << protein_seq;
847 if(alignment.end > (std::size_t)protein_seq.size())
848 {
849 throw pappso::ExceptionOutOfRange(QString("alignment.end > protein_seq.size() %1 %2")
850 .arg(alignment.end)
851 .arg(protein_seq.size()));
852 }
853 std::vector<double> potential_mass_errors(alignment.shifts);
854 double shift = alignment.end_shift;
855 std::size_t index;
856 if(alignment.beginning > 0)
857 { // -1 on unsigned int makes it wrong
858 index = alignment.beginning - 1;
859 while(shift > 0 && index > 0)
860 {
861 potential_mass_errors.push_back(shift);
862 // qWarning() << " shift=" << shift << " index=" << index
863 // << " letter=" << protein_seq.at(index).toLatin1();
864 shift -= aa_code.getMass(
865 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
866 index--;
867 }
868 }
869
870 // qWarning() << "second";
871 shift = alignment.begin_shift;
872 index = alignment.end + 1;
873 while(shift > 0 && index < (std::size_t)protein_seq.size())
874 {
875 potential_mass_errors.push_back(shift);
876 qWarning() << " shift=" << shift << " index=" << index
877 << " letter=" << protein_seq.at(index).toLatin1();
878 shift -= aa_code.getMass(
879 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
880 index++;
881 }
882 // qWarning();
883 return potential_mass_errors;
884}
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
Definition aacode.cpp:219

References pappso::specpeptidoms::Alignment::begin_shift, pappso::specpeptidoms::Alignment::beginning, pappso::specpeptidoms::Alignment::end, pappso::specpeptidoms::Alignment::end_shift, pappso::AaCode::getMass(), pappso::specpeptidoms::shift, and pappso::specpeptidoms::Alignment::shifts.

◆ getScenario()

pappso::specpeptidoms::Scenario pappso::specpeptidoms::SemiGlobalAlignment::getScenario ( ) const

Returns a copy of m_scenario.

Definition at line 656 of file semiglobalalignment.cpp.

657{
658 return m_scenario;
659}

References m_scenario.

◆ perfectShiftPossible()

bool pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossible ( const QString & sequence,
const SpOMSSpectrum & spectrum,
const std::size_t origin_row,
const std::size_t current_row,
const std::size_t l_peak,
const std::size_t r_peak ) const
private

indicates if a perfect shift is possible between the provided positions

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
origin_rowbeginning row of the aa gap to verify (== index of the first missing aa in sequence)
current_rowrow being processed (== index of the current AaPosition in sequence)
l_peakleft peak index of the mz gap to verify
r_peakright peak index of the mz gap to verify

Definition at line 571 of file semiglobalalignment.cpp.

577{
578 double missing_mass = 0;
579 for(QString::const_iterator iter = sequence.begin() + origin_row;
580 iter != sequence.begin() + current_row;
581 iter++)
582 {
583 missing_mass += m_aaCode.getMass(iter->toLatin1()); // Aa(iter->unicode()).getMass();
584 }
585 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
586 return mz_range.contains(spectrum.getMZShift(l_peak, r_peak));
587}

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), m_aaCode, and m_precision_ptr.

Referenced by updateAlignmentMatrix().

◆ perfectShiftPossibleEnd()

std::size_t pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossibleEnd ( const QString & sequence,
const SpOMSSpectrum & spectrum,
std::size_t end_row,
std::size_t end_peak ) const
private

indicates if a perfect shift is possible between the provided positions

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
end_rowIndex of the last aligned row.
end_peakIndex of the last aligned peak.

Definition at line 618 of file semiglobalalignment.cpp.

622{
623 std::size_t perfect_shift_end = end_row + 1;
624 double missing_mass = spectrum.getMissingMass(end_peak);
625 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
626 double aa_mass = 0;
627 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
628 !mz_range.contains(aa_mass))
629 {
630 aa_mass += m_aaCode.getMass(
631 sequence.at(perfect_shift_end - 1)
632 .toLatin1()); // Aa(sequence.at(perfect_shift_end - 1).unicode()).getMass();
633 perfect_shift_end++;
634 }
635 if(mz_range.contains(aa_mass))
636 {
637 return perfect_shift_end - 1;
638 }
639 else
640 {
641 return end_row;
642 }
643}

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), m_aaCode, and m_precision_ptr.

Referenced by saveBestAlignment().

◆ perfectShiftPossibleFrom0()

std::size_t pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossibleFrom0 ( const QString & sequence,
const SpOMSSpectrum & spectrum,
const std::size_t current_row,
const std::size_t r_peak ) const
private

indicates if a perfect shift is possible from the spectrum beginning to the provided peak.

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
current_rowrow being processed (== index of the current AaPosition in sequence)
r_peakright peak index of the mz gap to verify

Definition at line 590 of file semiglobalalignment.cpp.

595{
596 std::size_t perfect_shift_origin = current_row;
597 double missing_mass = spectrum.getMZShift(0, r_peak);
598 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
599 double aa_mass = 0;
600 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.contains(aa_mass))
601 {
602 aa_mass += m_aaCode.getMass(
603 sequence.at(perfect_shift_origin - 1)
604 .toLatin1()); // Aa(sequence.at(perfect_shift_origin - 1).unicode()).getMass();
605 perfect_shift_origin--;
606 }
607 if(mz_range.contains(aa_mass))
608 {
609 return perfect_shift_origin;
610 }
611 else
612 {
613 return current_row;
614 }
615}

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), m_aaCode, and m_precision_ptr.

Referenced by updateAlignmentMatrix().

◆ postProcessingAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::postProcessingAlign ( const SpOMSSpectrum & spectrum,
const QString & protein_seq,
const QString & protein_id,
std::size_t beginning,
std::size_t length,
const std::vector< double > & shifts )

performs the post-processing : generates corrected spectra and align them

Parameters
spectrumSpectrum to align
protein_seqProtein sequence to align.
protein_idID of the protein to align.
beginningIndex of the beginning of the subsequence to align.
lengthLength of the subsequence to align.
shiftsList of potential precursor mass errors to test.

Definition at line 311 of file semiglobalalignment.cpp.

317{
318 std::size_t current_SPC = m_best_alignment.SPC;
319 int current_best_score = m_best_alignment.score;
321 for(double precursor_mass_error : shifts)
322 {
323 SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
324 preciseAlign(corrected_spectrum, protein_seq, protein_id, beginning, length);
326 {
328 }
329 }
330 if(m_best_post_processed_alignment.SPC > current_SPC &&
331 m_best_post_processed_alignment.score >= current_best_score)
332 {
334 }
335}
void preciseAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.

References m_best_alignment, m_best_post_processed_alignment, and preciseAlign().

◆ preciseAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::preciseAlign ( const SpOMSSpectrum & spectrum,
const QString & protein_seq,
const QString & protein_id,
const std::size_t beginning,
const std::size_t length )

performs the second alignment search between a protein subsequence and a spectrum.

Parameters
spectrumSpectrum to align
protein_seqProtein sequence to align.
protein_idID of the protein to align.
beginningIndex of the beginning of the subsequence to align.
lengthLength of the subsequence to align.

Definition at line 121 of file semiglobalalignment.cpp.

126{
127 qDebug();
128 std::size_t length2;
129 if((qsizetype)(beginning + length) <= protein_seq.size())
130 {
131 length2 = length;
132 }
133 else
134 {
135 length2 = protein_seq.size() - beginning;
136 }
137
138 qDebug();
139 QString sequence = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
140 std::reverse(sequence.begin(), sequence.end());
141 std::vector<AaPosition> aa_positions;
142 CorrectionTree correction_tree;
143
144 qDebug();
145 m_scenario.reserve(length2 + 1, spectrum.size());
146 m_interest_cells.reserve(spectrum.size());
147 m_interest_cells.at(0).n_row = 0;
148 m_interest_cells.at(0).score = 0;
149 m_interest_cells.at(0).beginning = 0;
150 m_interest_cells.at(0).tree_id = 0;
151 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
152 {
153 m_interest_cells.at(i).n_row = 0;
155 m_interest_cells.at(i).beginning = 0;
156 m_interest_cells.at(i).tree_id = 0;
157 }
158 qDebug();
159 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
160 {
161 m_interest_cells.push_back({0, m_scorevalues.get(ScoreType::init), 0, 0});
162 }
163 qDebug();
164 m_scenario.resetScenario();
165 qDebug();
166 for(std::size_t row_number = 1; row_number <= length2; row_number++)
167 {
168 qDebug() << "row_number - 1=" << row_number - 1 << " sequence.size()=" << sequence.size();
169 if(sequence[row_number - 1] == 'U' or sequence[row_number - 1] == 'X')
170 {
171 continue;
172 }
173
174 qDebug() << "row_number - 1=" << row_number - 1 << " sequence.size()=" << sequence.size();
175 // aa = Aa(sequence[row_number - 1].unicode());
176 aa_positions =
177 spectrum.getAaPositions(pappso::Enums::AminoAcidChar(sequence[row_number - 1].toLatin1()));
178 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_id);
179 }
180 qDebug();
181 saveBestAlignment(sequence, spectrum, protein_seq.size() - beginning);
182
183 qDebug() << m_scenario.getBestScore() << " " << MIN_ALIGNMENT_SCORE;
184 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
185 // are generated and aligned. The best alignment is kept.
186 if(m_scenario.getBestScore() >
187 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
188 {
189 qDebug();
191 for(std::size_t iter : m_best_alignment.peaks)
192 {
193 if(iter > spectrum.getComplementaryPeak(iter))
194 {
195 break;
196 }
197 else if(std::find(m_best_alignment.peaks.begin(),
198 m_best_alignment.peaks.end(),
199 spectrum.getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
200 {
201 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
202 }
203 }
204 qDebug();
205 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
206 if(corrections.size() > 0)
207 {
208 m_best_alignment.score = 0; // Reset the best alignment score (we dont want to keep the
209 // original alignment if corrections are needed)
210 qDebug();
211 for(auto peaks_to_remove : corrections)
212 {
213 qDebug();
215 1, sequence, protein_id, spectrum, peaks_to_remove, protein_seq.size() - beginning);
216 qDebug();
217 }
218 qDebug();
220 }
221 }
222 qDebug();
223}

References pappso::specpeptidoms::CorrectionTree::addPeaks(), correctAlign(), pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), pappso::specpeptidoms::SpOMSSpectrum::getComplementaryPeak(), pappso::specpeptidoms::CorrectionTree::getPeaks(), pappso::specpeptidoms::init, m_best_alignment, m_best_corrected_alignment, m_interest_cells, m_scenario, m_scorevalues, pappso::specpeptidoms::MIN_ALIGNMENT_SCORE(), saveBestAlignment(), and updateAlignmentMatrix().

Referenced by postProcessingAlign().

◆ saveBestAlignment()

void pappso::specpeptidoms::SemiGlobalAlignment::saveBestAlignment ( const QString & sequence,
const SpOMSSpectrum & spectrum,
std::size_t offset )
private

Stores the best alignment from m_scenario in m_best_alignment.

Parameters
sequenceProtein sequence of the current alignment.
spectrumSpectrum currently being aligned.
offsetSize of the protein sequence minus beginning of the alignment. Used to compute the position of the alignment in the protein sequence.

Definition at line 662 of file semiglobalalignment.cpp.

665{
666 qDebug();
667 m_best_alignment.peaks.clear();
668 m_best_alignment.shifts.clear();
669 std::vector<QString> temp_interpretation;
670 std::size_t previous_row; // FIXME : may be used uninitialised
671 std::size_t previous_column = 0;
672 std::size_t perfect_shift_end;
673 std::pair<std::vector<ScenarioCell>, int> best_alignment = m_scenario.getBestAlignment();
674 m_best_alignment.score = best_alignment.second;
675 QString skipped_aa;
676 double skipped_mass;
677 // Retrieving beginning and end
678 if(best_alignment.first.front().previous_row > offset)
679 {
680 throw pappso::PappsoException(
681 QString("best_alignment.first.front().previous_row > offset %1 %2")
682 .arg(offset)
683 .arg(best_alignment.first.front().previous_row));
684 }
685 if(best_alignment.first.back().previous_row > offset)
686 {
687 throw pappso::PappsoException(
688 QString("best_alignment.first.back().previous_row > offset %1 %2")
689 .arg(offset)
690 .arg(best_alignment.first.back().previous_row));
691 }
692 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
693 m_best_alignment.end = offset - best_alignment.first.back().previous_row;
694
695 qDebug();
696 // Filling temp_interpretation and peaks vectors
697 for(auto cell : best_alignment.first)
698 {
699 switch(cell.alignment_type)
700 {
701 case AlignType::found:
702 temp_interpretation.push_back(sequence.at(previous_row - 1));
703 if(previous_row > cell.previous_row + 1)
704 {
705 skipped_mass = m_aaCode.getMass(
706 sequence.at(previous_row - 1)
707 .toLatin1()); // Aa(sequence.at(previous_row - 1).unicode()).getMass();
708 skipped_aa =
709 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
710 for(auto aa : skipped_aa)
711 {
712 temp_interpretation.push_back('[' + aa + ']');
713 skipped_mass += m_aaCode.getMass(aa.toLatin1()); // Aa(aa.unicode()).getMass();
714 }
715 temp_interpretation.push_back(
716 '[' +
717 QString::number(spectrum.getMZShift(cell.previous_column, previous_column) -
718 skipped_mass) +
719 ']');
720 }
721 m_best_alignment.peaks.push_back(cell.previous_column);
722 break;
724 temp_interpretation.push_back('[' + sequence.at(previous_row - 1) + ']');
725 break;
726 case AlignType::shift:
727 m_best_alignment.peaks.push_back(cell.previous_column);
728 temp_interpretation.push_back(sequence.at(previous_row - 1));
729 temp_interpretation.push_back(
730 '[' +
731 QString::number(spectrum.getMZShift(cell.previous_column, previous_column) -
732 m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1())) +
733 ']');
734 m_best_alignment.shifts.push_back(
735 spectrum.getMZShift(cell.previous_column, previous_column) -
736 m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1()));
737 break;
739 m_best_alignment.peaks.push_back(cell.previous_column);
740 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
741 std::reverse(skipped_aa.begin(), skipped_aa.end());
742 temp_interpretation.push_back(skipped_aa);
743 break;
744 case AlignType::init:
745 previous_row = cell.previous_row;
746 previous_column = cell.previous_column;
747 m_best_alignment.peaks.push_back(cell.previous_column);
748 break;
749 }
750 previous_row = cell.previous_row;
751 previous_column = cell.previous_column;
752 }
753 std::reverse(temp_interpretation.begin(), temp_interpretation.end());
754 std::reverse(m_best_alignment.peaks.begin(), m_best_alignment.peaks.end());
755
756 qDebug();
757 // Compute begin_shift and end_shift
758 MzRange zero(0, m_precision_ptr);
759 m_best_alignment.begin_shift = spectrum.getMZShift(0, m_best_alignment.peaks.front());
760 m_best_alignment.end_shift = spectrum.getMissingMass(m_best_alignment.peaks.back());
761 if(zero.contains(m_best_alignment.end_shift))
762 {
763 m_best_alignment.end_shift = 0;
764 }
765
766 qDebug();
767 // Computing SPC
768 m_best_alignment.SPC = 0;
769 for(auto peak : m_best_alignment.peaks)
770 {
771 switch(spectrum.at(peak).type)
772 {
774 qDebug() << peak << "native";
775 m_best_alignment.SPC += 1;
776 break;
778 qDebug() << peak << "both";
779 m_best_alignment.SPC += 2;
780 break;
782 qDebug() << peak << "synthetic";
783 break;
785 qDebug() << peak << "symmetric";
786 m_best_alignment.SPC += 1;
787 break;
788 }
789 }
790
791 qDebug();
792 // Final check of the end shift
793 if(m_best_alignment.end_shift > 0)
794 {
795 perfect_shift_end = perfectShiftPossibleEnd(sequence,
796 spectrum,
797 best_alignment.first.front().previous_row,
798 m_best_alignment.peaks.back());
799 if(perfect_shift_end != best_alignment.first.front().previous_row)
800 {
801 skipped_aa =
802 sequence.sliced(best_alignment.first.front().previous_row,
803 perfect_shift_end - best_alignment.first.front().previous_row);
804 for(auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
805 {
806 temp_interpretation.push_back('[' + *aa + ']');
807 }
808 m_best_alignment.beginning = offset - perfect_shift_end;
809 m_best_alignment.end_shift = 0;
810 }
811 else
812 {
814 }
815 }
816
817 qDebug();
818 // Writing final interpretation
819 m_best_alignment.interpretation = "";
820 if(m_best_alignment.end_shift > 0)
821 {
822 m_best_alignment.interpretation += '[' + QString::number(m_best_alignment.end_shift) + ']';
823 }
824 for(auto str = temp_interpretation.rbegin(); str != temp_interpretation.rend(); str++)
825 {
826 m_best_alignment.interpretation += *(str);
827 }
828 if(m_best_alignment.begin_shift > 0)
829 {
830 m_best_alignment.interpretation += '[' + QString::number(m_best_alignment.begin_shift) + ']';
831 }
832 qDebug();
833}
std::size_t perfectShiftPossibleEnd(const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
@ synthetic
does not correspond to existing peak, for computational purpose
Definition types.h:85
@ both
both, the ion and the complement exists in the original spectrum
Definition types.h:83
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
Definition types.h:81

References pappso::specglob::both, pappso::MzRange::contains(), pappso::specpeptidoms::found, pappso::specpeptidoms::foundShift, pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), pappso::specpeptidoms::init, m_aaCode, m_best_alignment, m_precision_ptr, m_scenario, m_scorevalues, pappso::specglob::native, pappso::specpeptidoms::notFound, pappso::specpeptidoms::perfectShift, perfectShiftPossibleEnd(), pappso::specpeptidoms::shift, pappso::specglob::symmetric, and pappso::specglob::synthetic.

Referenced by correctAlign(), and preciseAlign().

◆ updateAlignmentMatrix()

void pappso::specpeptidoms::SemiGlobalAlignment::updateAlignmentMatrix ( const QString & sequence,
const std::size_t row_number,
const std::vector< AaPosition > aa_positions,
const SpOMSSpectrum & spectrum,
const bool fast_align,
const QString & protein )
private

updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenario.

Parameters
sequenceReversed sequence of the protein being aligned
row_numbernumber of the row to update (== index in sequence of the amino acid being aligned)
aa_positionslist of the AaPositions of the current amino acid
spectrumSpectrum being aligned
fast_alignWhether to use the fast version of the algorithm (for 1st alignemnt step)
proteinId of the protein being aligned.

Definition at line 338 of file semiglobalalignment.cpp.

345{
346 int score_found, score_shift, best_score, alt_score, tree_id;
347 uint32_t condition; // FIXME : may be used uninitialised
348 std::size_t best_column, shift, beginning, missing_aas, length, perfect_shift_origin;
349 KeyCell *current_cell_ptr, *tested_cell_ptr;
350 AlignType alignment_type, temp_align_type;
351
352 m_updated_cells.reserve(aa_positions.size());
353
354 // Computation of the threePeaks condition, see spomsspectrum.h for more details.
355 if(fast_align)
356 {
357 condition = 3;
358 if(row_number > 1)
359 {
360 condition += 2 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
361 qDebug() << "condition" << condition << sequence[row_number - 2]
362 << m_aaCode.getAaCode(sequence[row_number - 2].unicode());
363 }
364 }
365
366 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
367 aa_position != aa_positions.end();
368 aa_position++)
369 {
370 if(((condition & aa_position->condition) != 0) ||
371 !fast_align) // Verification of the threePeaks condition (only during first alignment).
372 {
373 current_cell_ptr = &m_interest_cells.at(aa_position->r_peak);
374 if(spectrum.peakType(aa_position->r_peak) ==
376 {
377 score_found = m_scorevalues.get(ScoreType::foundDouble);
379 }
380 else
381 {
382 score_found = m_scorevalues.get(ScoreType::found);
383 score_shift = m_scorevalues.get(ScoreType::foundShift);
384 }
385
386 // not found case (always computed)
387 best_column = aa_position->r_peak;
388 best_score = current_cell_ptr->score + (row_number - current_cell_ptr->n_row) *
390 beginning = current_cell_ptr->beginning;
391 tree_id = current_cell_ptr->tree_id;
392 alignment_type = AlignType::notFound;
393
394 // found case (Can only happen if the left peak is supported)
395 if(aa_position->l_support)
396 {
397 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak);
398 if(aa_position->l_peak == 0)
399 {
400 alt_score = tested_cell_ptr->score + score_found;
401 }
402 else
403 {
404 if(tested_cell_ptr->n_row == row_number - 1)
405 {
406 alt_score = tested_cell_ptr->score +
407 (row_number - tested_cell_ptr->n_row - 1) *
409 score_found;
410 }
411 else
412 {
413 alt_score = tested_cell_ptr->score +
414 (row_number - tested_cell_ptr->n_row - 1) *
416 score_shift;
417 }
418 }
419 if(alt_score >= best_score)
420 {
421 alignment_type = AlignType::found;
422 best_score = alt_score;
423 best_column = aa_position->l_peak;
424 if(best_column == 0)
425 {
426 if(row_number < ALIGNMENT_SURPLUS)
427 {
428 beginning = 0;
429 }
430 else
431 {
432 beginning =
433 std::max((std::size_t)(row_number - ALIGNMENT_SURPLUS), (std::size_t)0);
434 }
435 if(fast_align)
436 {
437 tree_id = m_location_saver.getNextTree();
438 }
439 }
440 else
441 {
442 beginning = tested_cell_ptr->beginning;
443 tree_id = tested_cell_ptr->tree_id;
444 }
445 }
446 }
447
448 // generic shift case (all shifts are tested)
449 shift = 0;
450 while(shift < aa_position->next_l_peak)
451 {
452 tested_cell_ptr = &m_interest_cells.at(aa_position->next_l_peak - shift);
453 // verification saut parfait
454 if(perfectShiftPossible(sequence,
455 spectrum,
456 tested_cell_ptr->n_row,
457 row_number,
458 aa_position->next_l_peak - shift,
459 aa_position->r_peak))
460 {
461 alt_score = tested_cell_ptr->score +
462 (row_number - tested_cell_ptr->n_row - 1) *
464 score_found;
465 temp_align_type = AlignType::perfectShift;
466 }
467 else
468 {
469 alt_score = tested_cell_ptr->score +
470 (row_number - tested_cell_ptr->n_row - 1) *
472 score_shift;
473 temp_align_type = AlignType::shift;
474 }
475 if(alt_score > best_score)
476 {
477 alignment_type = temp_align_type;
478 best_score = alt_score;
479 best_column = aa_position->next_l_peak - shift;
480 beginning = tested_cell_ptr->beginning;
481 tree_id = tested_cell_ptr->tree_id;
482 }
483 shift++;
484 }
485
486 // case shift from column 0 (no penalties if all precedent amino acids are missed)
487 tested_cell_ptr = &m_interest_cells.at(0);
488 // verification saut parfait
489 perfect_shift_origin =
490 perfectShiftPossibleFrom0(sequence, spectrum, row_number, aa_position->r_peak);
491 if(perfect_shift_origin != row_number)
492 {
493 alt_score = tested_cell_ptr->score + score_found;
494 temp_align_type = AlignType::perfectShift;
495 }
496 else
497 {
498 alt_score = tested_cell_ptr->score + score_shift;
499 temp_align_type = AlignType::shift;
500 }
501 if(alt_score > best_score)
502 {
503 alignment_type = temp_align_type;
504 best_score = alt_score;
505 best_column = 0;
506 missing_aas =
507 std::floor(spectrum.getMZShift(0, aa_position->l_peak) / m_aaCode.getMass('G'));
508 if(row_number < ALIGNMENT_SURPLUS + missing_aas)
509 {
510 beginning = 0;
511 }
512 else
513 {
514 beginning = std::max((std::size_t)(row_number - missing_aas - ALIGNMENT_SURPLUS),
515 (std::size_t)0);
516 }
517 if(fast_align)
518 {
519 tree_id = m_location_saver.getNextTree();
520 }
521 }
522 if(best_column != aa_position->r_peak)
523 {
524 m_updated_cells.push_back(
525 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
526 }
527 if(best_score > m_location_saver.getMinScore(tree_id) && fast_align)
528 {
529 length =
530 row_number - beginning + 1 +
531 std::ceil(spectrum.getMissingMass(aa_position->r_peak) / m_aaCode.getMass('G')) +
533 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein);
534 }
535 else if(!fast_align)
536 {
537 if(alignment_type == AlignType::perfectShift && best_column == 0)
538 {
539 m_scenario.saveOrigin(row_number,
540 aa_position->r_peak,
541 perfect_shift_origin,
542 0,
543 best_score,
545 }
546 else
547 {
548 m_scenario.saveOrigin(row_number,
549 aa_position->r_peak,
550 m_interest_cells.at(best_column).n_row,
551 best_column,
552 best_score,
553 alignment_type);
554 }
555 }
556 }
557 }
558
559 // Update row number in column 0
560 m_updated_cells.push_back({0, {row_number, 0, 0, 0}});
561
562 // Save updated key cells in the matrix
563 while(m_updated_cells.size() > 0)
564 {
565 m_interest_cells.at(m_updated_cells.back().first) = m_updated_cells.back().second;
566 m_updated_cells.pop_back();
567 }
568}
bool perfectShiftPossible(const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
std::size_t perfectShiftPossibleFrom0(const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak.
const uint ALIGNMENT_SURPLUS(5)

References pappso::specpeptidoms::ALIGNMENT_SURPLUS(), pappso::specpeptidoms::KeyCell::beginning, pappso::specglob::both, pappso::specpeptidoms::found, pappso::specpeptidoms::foundDouble, pappso::specpeptidoms::foundShift, pappso::specpeptidoms::foundShiftDouble, pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), m_aaCode, m_interest_cells, m_location_saver, m_scenario, m_scorevalues, m_updated_cells, pappso::specpeptidoms::KeyCell::n_row, pappso::specpeptidoms::notFound, pappso::specpeptidoms::SpOMSSpectrum::peakType(), pappso::specpeptidoms::perfectShift, perfectShiftPossible(), perfectShiftPossibleFrom0(), pappso::specpeptidoms::KeyCell::score, pappso::specpeptidoms::shift, and pappso::specpeptidoms::KeyCell::tree_id.

Referenced by correctAlign(), fastAlign(), and preciseAlign().

Member Data Documentation

◆ m_aaCode

const AaCode& pappso::specpeptidoms::SemiGlobalAlignment::m_aaCode
private

◆ m_best_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_alignment
private

◆ m_best_corrected_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_corrected_alignment
private

Definition at line 163 of file semiglobalalignment.h.

Referenced by correctAlign(), fastAlign(), and preciseAlign().

◆ m_best_post_processed_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_post_processed_alignment
private

Definition at line 163 of file semiglobalalignment.h.

Referenced by fastAlign(), and postProcessingAlign().

◆ m_interest_cells

std::vector<KeyCell> pappso::specpeptidoms::SemiGlobalAlignment::m_interest_cells
private

◆ m_location_saver

LocationSaver pappso::specpeptidoms::SemiGlobalAlignment::m_location_saver
private

Definition at line 161 of file semiglobalalignment.h.

Referenced by fastAlign(), getLocationSaver(), and updateAlignmentMatrix().

◆ m_precision_ptr

pappso::PrecisionPtr pappso::specpeptidoms::SemiGlobalAlignment::m_precision_ptr
private

◆ m_scenario

Scenario pappso::specpeptidoms::SemiGlobalAlignment::m_scenario
private

◆ m_scorevalues

const ScoreValues& pappso::specpeptidoms::SemiGlobalAlignment::m_scorevalues
private

◆ m_updated_cells

std::vector<std::pair<std::size_t, KeyCell> > pappso::specpeptidoms::SemiGlobalAlignment::m_updated_cells
private

Definition at line 156 of file semiglobalalignment.h.

Referenced by updateAlignmentMatrix().

◆ min_score

const int pappso::specpeptidoms::SemiGlobalAlignment::min_score = 15
private

Definition at line 158 of file semiglobalalignment.h.


The documentation for this class was generated from the following files: