70                                                      const QString &protein_seq_in,
 
   71                                                      const QString &protein_id)
 
   78  std::size_t sequence_length = protein_seq_in.size();
 
   79  QString protein_seq(protein_seq_in);
 
   80  std::reverse(protein_seq.begin(), protein_seq.end());
 
   82  std::vector<AaPosition> aa_positions;
 
   96  for(std::size_t iter = 
m_interest_cells.size(); iter < spectrum.size(); iter++)
 
  101  for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
 
  103      if(protein_seq[row_number - 1] == 
'U' or protein_seq[row_number - 1] == 
'X')
 
 
  122                                                         const QString &protein_seq,
 
  123                                                         const QString &protein_id,
 
  124                                                         const std::size_t beginning,
 
  125                                                         const std::size_t length)
 
  129  if((qsizetype)(beginning + length) <= protein_seq.size())
 
  135      length2 = protein_seq.size() - beginning;
 
  139  QString sequence = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
 
  140  std::reverse(sequence.begin(), sequence.end());
 
  141  std::vector<AaPosition> aa_positions;
 
  145  m_scenario.reserve(length2 + 1, spectrum.size());
 
  159  for(std::size_t iter = 
m_interest_cells.size(); iter < spectrum.size(); iter++)
 
  166  for(std::size_t row_number = 1; row_number <= length2; row_number++)
 
  168      qDebug() << 
"row_number - 1=" << row_number - 1 << 
" sequence.size()=" << sequence.size();
 
  169      if(sequence[row_number - 1] == 
'U' or sequence[row_number - 1] == 
'X')
 
  174      qDebug() << 
"row_number - 1=" << row_number - 1 << 
" sequence.size()=" << sequence.size();
 
  205      std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
 
  206      if(corrections.size() > 0)
 
  211          for(
auto peaks_to_remove : corrections)
 
  215                1, sequence, protein_id, spectrum, peaks_to_remove, protein_seq.size() - beginning);
 
 
  227                                                         const QString &sequence,
 
  228                                                         const QString &protein_id,
 
  230                                                         std::vector<std::size_t> peaks_to_remove,
 
  233  qDebug() << recursive_call_count;
 
  234  std::vector<AaPosition> aa_positions;
 
  251  for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
 
  253      qDebug() << row_number - 1 << 
" " << sequence.size();
 
  254      if(sequence[row_number - 1] == 
'U' or sequence[row_number - 1] == 
'X')
 
  275      qDebug() << sequence << 
" " << recursive_call_count;
 
  293      std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
 
  294      if(corrections.size() > 0)
 
  296          for(
auto peaks_to_remove : corrections)
 
  299                recursive_call_count + 1, sequence, protein_id, spectrum, peaks_to_remove, offset);
 
 
  312                                                                const QString &protein_seq,
 
  313                                                                const QString &protein_id,
 
  314                                                                std::size_t beginning,
 
  316                                                                const std::vector<double> &shifts)
 
  321  for(
double precursor_mass_error : shifts)
 
  323      SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
 
  324      preciseAlign(corrected_spectrum, protein_seq, protein_id, beginning, length);
 
 
  339  const QString &sequence,
 
  340  const std::size_t row_number,
 
  341  const std::vector<AaPosition> aa_positions,
 
  343  const bool fast_align,
 
  344  const QString &protein)
 
  346  int score_found, score_shift, best_score, alt_score, tree_id;
 
  348  std::size_t best_column, 
shift, beginning, missing_aas, length, perfect_shift_origin;
 
  349  KeyCell *current_cell_ptr, *tested_cell_ptr;
 
  350  AlignType alignment_type, temp_align_type;
 
  360          condition += 2 << 
m_aaCode.getAaCode(sequence[row_number - 2].unicode());
 
  361          qDebug() << 
"condition" << condition << sequence[row_number - 2]
 
  362                   << 
m_aaCode.getAaCode(sequence[row_number - 2].unicode());
 
  366  for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
 
  367      aa_position != aa_positions.end();
 
  370      if(((condition & aa_position->condition) != 0) ||
 
  374          if(spectrum.
peakType(aa_position->r_peak) ==
 
  387          best_column = aa_position->r_peak;
 
  388          best_score  = current_cell_ptr->
score + (row_number - current_cell_ptr->
n_row) *
 
  391          tree_id        = current_cell_ptr->
tree_id;
 
  395          if(aa_position->l_support)
 
  398              if(aa_position->l_peak == 0)
 
  400                  alt_score = tested_cell_ptr->
score + score_found;
 
  404                  if(tested_cell_ptr->
n_row == row_number - 1)
 
  406                      alt_score = tested_cell_ptr->
score +
 
  407                                  (row_number - tested_cell_ptr->
n_row - 1) *
 
  413                      alt_score = tested_cell_ptr->
score +
 
  414                                  (row_number - tested_cell_ptr->
n_row - 1) *
 
  419              if(alt_score >= best_score)
 
  422                  best_score     = alt_score;
 
  423                  best_column    = aa_position->l_peak;
 
  443                      tree_id   = tested_cell_ptr->
tree_id;
 
  456                                      tested_cell_ptr->
n_row,
 
  458                                      aa_position->next_l_peak - 
shift,
 
  459                                      aa_position->r_peak))
 
  461                  alt_score = tested_cell_ptr->
score +
 
  462                              (row_number - tested_cell_ptr->
n_row - 1) *
 
  469                  alt_score = tested_cell_ptr->
score +
 
  470                              (row_number - tested_cell_ptr->
n_row - 1) *
 
  475              if(alt_score > best_score)
 
  477                  alignment_type = temp_align_type;
 
  478                  best_score     = alt_score;
 
  479                  best_column    = aa_position->next_l_peak - 
shift;
 
  481                  tree_id        = tested_cell_ptr->
tree_id;
 
  489          perfect_shift_origin =
 
  491          if(perfect_shift_origin != row_number)
 
  493              alt_score       = tested_cell_ptr->
score + score_found;
 
  498              alt_score       = tested_cell_ptr->
score + score_shift;
 
  501          if(alt_score > best_score)
 
  503              alignment_type = temp_align_type;
 
  504              best_score     = alt_score;
 
  514                  beginning = std::max((std::size_t)(row_number - missing_aas - 
ALIGNMENT_SURPLUS),
 
  522          if(best_column != aa_position->r_peak)
 
  525                {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
 
  530                row_number - beginning + 1 +
 
  533              m_location_saver.addLocation(beginning, length, tree_id, best_score, protein);
 
  541                                        perfect_shift_origin,
 
 
  573                                                                 const std::size_t origin_row,
 
  574                                                                 const std::size_t current_row,
 
  575                                                                 const std::size_t l_peak,
 
  576                                                                 const std::size_t r_peak)
 const 
  578  double missing_mass = 0;
 
  579  for(QString::const_iterator iter = sequence.begin() + origin_row;
 
  580      iter != sequence.begin() + current_row;
 
  583      missing_mass += 
m_aaCode.getMass(iter->toLatin1()); 
 
 
  591  const QString &sequence,
 
  593  const std::size_t current_row,
 
  594  const std::size_t r_peak)
 const 
  596  std::size_t perfect_shift_origin = current_row;
 
  597  double missing_mass              = spectrum.
getMZShift(0, r_peak);
 
  600  while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.
contains(aa_mass))
 
  603        sequence.at(perfect_shift_origin - 1)
 
  605      perfect_shift_origin--;
 
  609      return perfect_shift_origin;
 
 
  621                                                                    std::size_t end_peak)
 const 
  623  std::size_t perfect_shift_end = end_row + 1;
 
  627  while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
 
  631        sequence.at(perfect_shift_end - 1)
 
  637      return perfect_shift_end - 1;
 
 
  669  std::vector<QString> temp_interpretation;
 
  670  std::size_t previous_row; 
 
  671  std::size_t previous_column = 0;
 
  672  std::size_t perfect_shift_end;
 
  673  std::pair<std::vector<ScenarioCell>, 
int> best_alignment = 
m_scenario.getBestAlignment();
 
  678  if(best_alignment.first.front().previous_row > offset)
 
  681        QString(
"best_alignment.first.front().previous_row > offset %1 %2")
 
  683          .arg(best_alignment.first.front().previous_row));
 
  685  if(best_alignment.first.back().previous_row > offset)
 
  688        QString(
"best_alignment.first.back().previous_row > offset %1 %2")
 
  690          .arg(best_alignment.first.back().previous_row));
 
  692  m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
 
  697  for(
auto cell : best_alignment.first)
 
  699      switch(cell.alignment_type)
 
  702            temp_interpretation.push_back(sequence.at(previous_row - 1));
 
  703            if(previous_row > cell.previous_row + 1)
 
  706                  sequence.at(previous_row - 1)
 
  709                  sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
 
  710                for(
auto aa : skipped_aa)
 
  712                    temp_interpretation.push_back(
'[' + aa + 
']');
 
  713                    skipped_mass += 
m_aaCode.getMass(aa.toLatin1()); 
 
  715                temp_interpretation.push_back(
 
  717                  QString::number(spectrum.
getMZShift(cell.previous_column, previous_column) -
 
  724            temp_interpretation.push_back(
'[' + sequence.at(previous_row - 1) + 
']');
 
  728            temp_interpretation.push_back(sequence.at(previous_row - 1));
 
  729            temp_interpretation.push_back(
 
  731              QString::number(spectrum.
getMZShift(cell.previous_column, previous_column) -
 
  732                              m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1())) +
 
  735              spectrum.
getMZShift(cell.previous_column, previous_column) -
 
  736              m_aaCode.getMass(sequence.at(previous_row - 1).toLatin1()));
 
  740            skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
 
  741            std::reverse(skipped_aa.begin(), skipped_aa.end());
 
  742            temp_interpretation.push_back(skipped_aa);
 
  745            previous_row    = cell.previous_row;
 
  746            previous_column = cell.previous_column;
 
  750      previous_row    = cell.previous_row;
 
  751      previous_column = cell.previous_column;
 
  753  std::reverse(temp_interpretation.begin(), temp_interpretation.end());
 
  771      switch(spectrum.at(peak).type)
 
  774            qDebug() << peak << 
"native";
 
  778            qDebug() << peak << 
"both";
 
  782            qDebug() << peak << 
"synthetic";
 
  785            qDebug() << peak << 
"symmetric";
 
  797                                                  best_alignment.first.front().previous_row,
 
  799      if(perfect_shift_end != best_alignment.first.front().previous_row)
 
  802            sequence.sliced(best_alignment.first.front().previous_row,
 
  803                            perfect_shift_end - best_alignment.first.front().previous_row);
 
  804          for(
auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
 
  806              temp_interpretation.push_back(
'[' + *aa + 
']');
 
  824  for(
auto str = temp_interpretation.rbegin(); str != temp_interpretation.rend(); str++)
 
 
  844                                                                   const QString &protein_seq)
 
  847  if(alignment.
end > (std::size_t)protein_seq.size())
 
  851                                          .arg(protein_seq.size()));
 
  853  std::vector<double> potential_mass_errors(alignment.
shifts);
 
  859      while(
shift > 0 && index > 0)
 
  861          potential_mass_errors.push_back(
shift);
 
  865            protein_seq.at(index).toLatin1()); 
 
  872  index = alignment.
end + 1;
 
  873  while(
shift > 0 && index < (std::size_t)protein_seq.size())
 
  875      potential_mass_errors.push_back(
shift);
 
  876      qWarning() << 
" shift=" << 
shift << 
" index=" << index
 
  877                 << 
" letter=" << protein_seq.at(index).toLatin1();
 
  879        protein_seq.at(index).toLatin1()); 
 
  883  return potential_mass_errors;
 
 
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
 
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
 
bool contains(pappso_double) const
 
std::vector< std::vector< std::size_t > > getPeaks() const
 
void addPeaks(std::size_t peak1, std::size_t peak2)
 
void saveBestAlignment(const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
 
LocationSaver m_location_saver
 
const Alignment & getBestAlignment() const
Returns a const ref to m_best_alignment.
 
void preciseAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.
 
Scenario getScenario() const
Returns a copy of m_scenario.
 
void updateAlignmentMatrix(const QString &sequence, const std::size_t row_number, const std::vector< AaPosition > aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const QString &protein)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
 
void postProcessingAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
performs the post-processing : generates corrected spectra and align them
 
Alignment m_best_alignment
 
const ScoreValues & m_scorevalues
 
bool perfectShiftPossible(const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
 
std::size_t perfectShiftPossibleEnd(const QString &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
 
Alignment m_best_post_processed_alignment
 
Alignment m_best_corrected_alignment
 
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
 
std::vector< KeyCell > m_interest_cells
 
static std::vector< double > getPotentialMassErrors(const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
Returns a list of the potential mass errors corresponding to the provided alignment in the provided p...
 
void correctAlign(int recursive_call_count, const QString &protein_seq, const QString &protein_id, const SpOMSSpectrum &spectrum, std::vector< std::size_t > peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
 
SemiGlobalAlignment(const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
 
void fastAlign(const SpOMSSpectrum &spectrum, const QString &protein_seq, const QString &protein_id)
perform the first alignment search between a protein sequence and a spectrum. The member location hea...
 
pappso::PrecisionPtr m_precision_ptr
 
std::size_t perfectShiftPossibleFrom0(const QString &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak.
 
LocationSaver getLocationSaver() const
Returns a copy of m_location_saver.
 
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
 
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
 
std::vector< AaPosition > & getAaPositions(pappso::Enums::AminoAcidChar aa) const
Returns the list of aa_positions for a given amino acid.
 
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
 
std::size_t getComplementaryPeak(std::size_t peak) const
 
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
 
@ synthetic
does not correspond to existing peak, for computational purpose
 
@ both
both, the ion and the complement exists in the original spectrum
 
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
 
const uint ALIGNMENT_SURPLUS(5)
 
const int MIN_ALIGNMENT_SCORE(15)
 
const PrecisionBase * PrecisionPtr
 
void reset()
reinitialize to default score_values
 
std::vector< double > shifts
 
std::vector< std::size_t > peaks