70 const QString &protein_seq_in,
71 const QString &protein_id)
78 std::size_t sequence_length = protein_seq_in.size();
79 QString protein_seq(protein_seq_in);
80 std::reverse(protein_seq.begin(), protein_seq.end());
82 std::vector<AaPosition> aa_positions;
96 for(std::size_t iter =
m_interest_cells.size(); iter < spectrum.size(); iter++)
101 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
103 if(protein_seq[row_number - 1] ==
'U' or protein_seq[row_number - 1] ==
'X')
122 const QString &protein_seq,
123 const QString &protein_id,
124 const std::size_t beginning,
125 const std::size_t length)
129 if((qsizetype)(beginning + length) <= protein_seq.size())
135 length2 = protein_seq.size() - beginning;
139 QString sequence = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
140 std::reverse(sequence.begin(), sequence.end());
141 std::vector<AaPosition> aa_positions;
145 m_scenario.reserve(length2 + 1, spectrum.size());
159 for(std::size_t iter =
m_interest_cells.size(); iter < spectrum.size(); iter++)
166 for(std::size_t row_number = 1; row_number <= length2; row_number++)
168 qDebug() <<
"row_number - 1=" << row_number - 1 <<
" sequence.size()=" << sequence.size();
169 if(sequence[row_number - 1] ==
'U' or sequence[row_number - 1] ==
'X')
174 qDebug() <<
"row_number - 1=" << row_number - 1 <<
" sequence.size()=" << sequence.size();
205 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
206 if(corrections.size() > 0)
211 for(
auto peaks_to_remove : corrections)
215 1, sequence, protein_id, spectrum, peaks_to_remove, protein_seq.size() - beginning);
227 const QString &sequence,
228 const QString &protein_id,
230 std::vector<std::size_t> peaks_to_remove,
233 qDebug() << recursive_call_count;
234 std::vector<AaPosition> aa_positions;
251 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
253 qDebug() << row_number - 1 <<
" " << sequence.size();
254 if(sequence[row_number - 1] ==
'U' or sequence[row_number - 1] ==
'X')
275 qDebug() << sequence <<
" " << recursive_call_count;
293 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
294 if(corrections.size() > 0)
296 for(
auto peaks_to_remove : corrections)
299 recursive_call_count + 1, sequence, protein_id, spectrum, peaks_to_remove, offset);
312 const QString &protein_seq,
313 const QString &protein_id,
314 std::size_t beginning,
316 const std::vector<double> &shifts)
321 for(
double precursor_mass_error : shifts)
323 SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
324 preciseAlign(corrected_spectrum, protein_seq, protein_id, beginning, length);
339 const QString &sequence,
340 const std::size_t row_number,
341 const std::vector<AaPosition> aa_positions,
343 const bool fast_align,
344 const QString &protein)
346 int score_found, score_shift, best_score, alt_score, tree_id;
348 std::size_t best_column,
shift, beginning, missing_aas, length, perfect_shift_origin;
349 KeyCell *current_cell_ptr, *tested_cell_ptr;
350 AlignType alignment_type, temp_align_type;
360 condition += 2 <<
m_aaCode.getAaCode(sequence[row_number - 2].unicode());
361 qDebug() <<
"condition" << condition << sequence[row_number - 2]
362 <<
m_aaCode.getAaCode(sequence[row_number - 2].unicode());
366 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
367 aa_position != aa_positions.end();
370 if(((condition & aa_position->condition) != 0) ||
374 if(spectrum.
peakType(aa_position->r_peak) ==
387 best_column = aa_position->r_peak;
388 best_score = current_cell_ptr->
score + (row_number - current_cell_ptr->
n_row) *
391 tree_id = current_cell_ptr->
tree_id;
395 if(aa_position->l_support)
398 if(aa_position->l_peak == 0)
400 alt_score = tested_cell_ptr->
score + score_found;
404 if(tested_cell_ptr->
n_row == row_number - 1)
406 alt_score = tested_cell_ptr->
score +
407 (row_number - tested_cell_ptr->
n_row - 1) *
413 alt_score = tested_cell_ptr->
score +
414 (row_number - tested_cell_ptr->
n_row - 1) *
419 if(alt_score >= best_score)
422 best_score = alt_score;
423 best_column = aa_position->l_peak;
443 tree_id = tested_cell_ptr->
tree_id;
456 tested_cell_ptr->
n_row,
458 aa_position->next_l_peak -
shift,
459 aa_position->r_peak))
461 alt_score = tested_cell_ptr->
score +
462 (row_number - tested_cell_ptr->
n_row - 1) *
469 alt_score = tested_cell_ptr->
score +
470 (row_number - tested_cell_ptr->
n_row - 1) *
475 if(alt_score > best_score)
477 alignment_type = temp_align_type;
478 best_score = alt_score;
479 best_column = aa_position->next_l_peak -
shift;
481 tree_id = tested_cell_ptr->
tree_id;
489 perfect_shift_origin =
491 if(perfect_shift_origin != row_number)
493 alt_score = tested_cell_ptr->
score + score_found;
498 alt_score = tested_cell_ptr->
score + score_shift;
501 if(alt_score > best_score)
503 alignment_type = temp_align_type;
504 best_score = alt_score;
514 beginning = std::max((std::size_t)(row_number - missing_aas -
ALIGNMENT_SURPLUS),
522 if(best_column != aa_position->r_peak)
525 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
530 row_number - beginning + 1 +
533 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein);
541 perfect_shift_origin,
573 const std::size_t origin_row,
574 const std::size_t current_row,
575 const std::size_t l_peak,
576 const std::size_t r_peak)
const
578 double missing_mass = 0;
579 for(QString::const_iterator iter = sequence.begin() + origin_row;
580 iter != sequence.begin() + current_row;
583 missing_mass +=
m_aaCode.getMass(iter->toLatin1());
591 const QString &sequence,
593 const std::size_t current_row,
594 const std::size_t r_peak)
const
596 std::size_t perfect_shift_origin = current_row;
597 double missing_mass = spectrum.
getMZShift(0, r_peak);
600 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.
contains(aa_mass))
603 sequence.at(perfect_shift_origin - 1)
605 perfect_shift_origin--;
609 return perfect_shift_origin;
621 std::size_t end_peak)
const
623 std::size_t perfect_shift_end = end_row + 1;
627 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
631 sequence.at(perfect_shift_end - 1)
637 return perfect_shift_end - 1;
669 std::vector<QString> temp_interpretation;
670 std::size_t previous_row;
671 std::size_t previous_column = 0;
672 std::size_t perfect_shift_end;
673 std::pair<std::vector<ScenarioCell>,
int> best_alignment =
m_scenario.getBestAlignment();
678 if(best_alignment.first.front().previous_row > offset)
681 QString(
"best_alignment.first.front().previous_row > offset %1 %2")
683 .arg(best_alignment.first.front().previous_row));
685 if(best_alignment.first.back().previous_row > offset)
688 QString(
"best_alignment.first.back().previous_row > offset %1 %2")
690 .arg(best_alignment.first.back().previous_row));
692 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
697 for(
auto cell : best_alignment.first)
699 switch(cell.alignment_type)
702 temp_interpretation.push_back(sequence.at(previous_row - 1));
703 if(previous_row > cell.previous_row + 1)
706 sequence.at(previous_row - 1)
709 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
710 for(
auto aa : skipped_aa)
712 temp_interpretation.push_back(
'[' + aa +
']');
713 skipped_mass +=
m_aaCode.getMass(aa.toLatin1());
715 temp_interpretation.push_back(
717 QString::number(spectrum.
getMZShift(cell.previous_column, previous_column) -
724 temp_interpretation.push_back(
'[' + sequence.at(previous_row - 1) +
']');
728 temp_interpretation.push_back(sequence.at(previous_row - 1));
729 temp_interpretation.push_back(
731 QString::number(spectrum.
getMZShift(cell.previous_column, previous_column) -
732 m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1())) +
735 spectrum.
getMZShift(cell.previous_column, previous_column) -
736 m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1()));
740 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
741 std::reverse(skipped_aa.begin(), skipped_aa.end());
742 temp_interpretation.push_back(skipped_aa);
745 previous_row = cell.previous_row;
746 previous_column = cell.previous_column;
750 previous_row = cell.previous_row;
751 previous_column = cell.previous_column;
753 std::reverse(temp_interpretation.begin(), temp_interpretation.end());
771 switch(spectrum.at(peak).type)
774 qDebug() << peak <<
"native";
778 qDebug() << peak <<
"both";
782 qDebug() << peak <<
"synthetic";
785 qDebug() << peak <<
"symmetric";
797 best_alignment.first.front().previous_row,
799 if(perfect_shift_end != best_alignment.first.front().previous_row)
802 sequence.sliced(best_alignment.first.front().previous_row,
803 perfect_shift_end - best_alignment.first.front().previous_row);
804 for(
auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
806 temp_interpretation.push_back(
'[' + *aa +
']');
824 for(
auto str = temp_interpretation.rbegin(); str != temp_interpretation.rend(); str++)
844 const QString &protein_seq)
847 if(alignment.
end > (std::size_t)protein_seq.size())
851 .arg(protein_seq.size()));
853 std::vector<double> potential_mass_errors(alignment.
shifts);
859 while(
shift > 0 && index > 0)
861 potential_mass_errors.push_back(
shift);
865 protein_seq.at(index).toLatin1());
872 index = alignment.
end + 1;
873 while(
shift > 0 && index < (std::size_t)protein_seq.size())
875 potential_mass_errors.push_back(
shift);
876 qWarning() <<
" shift=" <<
shift <<
" index=" << index
877 <<
" letter=" << protein_seq.at(index).toLatin1();
879 protein_seq.at(index).toLatin1());
883 return potential_mass_errors;
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
bool contains(pappso_double) const
std::vector< std::vector< std::size_t > > getPeaks() const
void addPeaks(std::size_t peak1, std::size_t peak2)
void saveBestAlignment(const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
LocationSaver m_location_saver
const Alignment & getBestAlignment() const
Returns a const ref to m_best_alignment.
void preciseAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.
Scenario getScenario() const
Returns a copy of m_scenario.
void updateAlignmentMatrix(const QString &sequence, const std::size_t row_number, const std::vector< AaPosition > aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const QString &protein)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void postProcessingAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
performs the post-processing : generates corrected spectra and align them
Alignment m_best_alignment
const ScoreValues & m_scorevalues
bool perfectShiftPossible(const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
std::size_t perfectShiftPossibleEnd(const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
Alignment m_best_post_processed_alignment
Alignment m_best_corrected_alignment
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
std::vector< KeyCell > m_interest_cells
static std::vector< double > getPotentialMassErrors(const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
Returns a list of the potential mass errors corresponding to the provided alignment in the provided p...
void correctAlign(int recursive_call_count, const QString &protein_seq, const QString &protein_id, const SpOMSSpectrum &spectrum, std::vector< std::size_t > peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
SemiGlobalAlignment(const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
void fastAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id)
perform the first alignment search between a protein sequence and a spectrum. The member location hea...
pappso::PrecisionPtr m_precision_ptr
std::size_t perfectShiftPossibleFrom0(const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak.
LocationSaver getLocationSaver() const
Returns a copy of m_location_saver.
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
std::vector< AaPosition > & getAaPositions(pappso::Enums::AminoAcidChar aa) const
Returns the list of aa_positions for a given amino acid.
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::size_t getComplementaryPeak(std::size_t peak) const
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
@ synthetic
does not correspond to existing peak, for computational purpose
@ both
both, the ion and the complement exists in the original spectrum
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
const uint ALIGNMENT_SURPLUS(5)
const int MIN_ALIGNMENT_SCORE(15)
const PrecisionBase * PrecisionPtr
void reset()
reinitialize to default score_values
std::vector< double > shifts
std::vector< std::size_t > peaks