libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msrundatasettree.cpp
Go to the documentation of this file.
1// GPL 3+
2// Filippo Rusconi
3
4#include <map>
5#include <limits>
6#include <iostream>
7#include <iomanip>
8
9#include "msrundatasettree.h"
10
13
14#include <QVariant>
15
16
17namespace pappso
18{
19
20
22{
23}
24
25
27{
28 // qDebug();
29
30 for(auto &&node : m_rootNodes)
31 {
32 // Each node is responsible for freeing its children nodes!
33
34 delete node;
35 }
36
37 m_rootNodes.clear();
38
39 // Beware not to delete the node member of the map, as we have already
40 // destroyed them above!
41 //
42 // for(auto iterator = m_indexNodeMap.begin(); iterator !=
43 // m_indexNodeMap.end();
44 //++iterator)
45 //{
46 // delete(iterator->second);
47 //}
48
49 // qDebug();
50}
51
52
55{
56 // qDebug();
57
58 if(mass_spectrum_csp == nullptr)
59 qFatal("Cannot be nullptr");
60
61 if(mass_spectrum_csp.get() == nullptr)
62 qFatal("Cannot be nullptr");
63
64 // We need to get the precursor spectrum index, in case this spectrum is a
65 // fragmentation index.
66
67 MsRunDataSetTreeNode *new_node_p = nullptr;
68
69 std::size_t precursor_spectrum_index = mass_spectrum_csp->getPrecursorSpectrumIndex();
70
71 // qDebug() << "The precursor_spectrum_index:" << precursor_spectrum_index;
72
73 if(precursor_spectrum_index == std::numeric_limits<std::size_t>::max())
74 {
75 // This spectrum is a full scan spectrum, not a fragmentation spectrum.
76 // Create a new node with no parent and push it back to the root nodes
77 // vector.
78
79 new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, nullptr);
80
81 // Since there is no parent in this overload, it is assumed that the node
82 // to be populated with the new node is the root node.
83
84 m_rootNodes.push_back(new_node_p);
85
86 // true: with_data
87 // qDebug().noquote() << "Pushed back to the roots node vector node:"
88 //<< new_node_p->toString(true);
89 }
90 else
91 {
92 // This spectrum is a fragmentation spectrum.
93
94 // Sanity check
95
96 if(mass_spectrum_csp->getMsLevel() <= 1)
97 {
99 "msrundatasettree.cpp -- ERROR the MS level needs to be > 1 in a "
100 "fragmentation spectrum.");
101 }
102
103 // Get the node that contains the precursor ion mass spectrum.
104 MsRunDataSetTreeNode *parent_node_p = findNode(precursor_spectrum_index);
105
106 if(parent_node_p == nullptr)
107 {
108 throw ExceptionNotPossible(QString("%1 @ %2 - %3\n")
109 .arg(__FILE__)
110 .arg(__LINE__)
111 .arg("msrundatasettree.cpp -- ERROR could not find "
112 "a tree node matching the index."));
113 }
114
115 // qDebug() << "Fragmentation spectrum"
116 //<< "Found parent node:" << parent_node_p
117 //<< "for precursor index:" << precursor_spectrum_index;
118
119 // At this point, create a new node with the right parent.
120
121 new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, parent_node_p);
122
123 parent_node_p->m_children.push_back(new_node_p);
124 }
125
126 // And now document that addition in the node index map.
127 m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
128 mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
129
130 // We also want to document the new node relating to the
131 // retention time.
132
133 documentNodeInDtRtMap(mass_spectrum_csp->getRtInMinutes(), new_node_p, Enums::DataKind::rt);
134
135 // Likewise for the ion mobility time.
136
137 double ion_mobility_value = -1;
138
139 Enums::MsDataFormat file_format = mcsp_msRunId->getMsDataFormat();
140 bool ok = false;
141
142 if(file_format == Enums::MsDataFormat::brukerTims)
143 {
144 // Start by looking if there is a OneOverK0 valid value, which
145 // means we have effectively handled a genuine mobility scan spectrum.
146 QVariant ion_mobility_variant_value =
147 mass_spectrum_csp->getParameterValue(QualifiedMassSpectrumParameter::IonMobOneOverK0);
148
149 if(ion_mobility_variant_value.isValid())
150 {
151 // Yes, genuine ion mobility scan handled here.
152
153 ion_mobility_value = ion_mobility_variant_value.toDouble(&ok);
154
155 if(!ok)
156 {
157 qFatal(
158 "The data are Bruker timsTOF data but failed to convert valid "
159 "QVariant 1/K0 value to double.");
160 }
161 }
162 else
163 {
164 // We are not handling a genuine single ion mobility scan here.
165 // We must be handling a mass spectrum that correspond to
166 // the combination of all the ion mobility scans for a single
167 // frame. This is when the user asks that ion mobility scans
168 // be flattended. In this case, the OneOverK0 value is not valid
169 // but there are two values that are set:
170 // TimsFrameInvKoBegin and TimsFrameInvKoEnd.
171 // See TimsFramesMsRunReader::readSpectrumCollection2.
172
173 // Test one of these values as a sanity check. But
174 // give the value of -1 for the ion mobility because we do not
175 // have ion mobility data here.
176
177 ion_mobility_variant_value = mass_spectrum_csp->getParameterValue(
179
180 if(!ion_mobility_variant_value.isValid())
181 {
182 qFatal(
183 "The data are Bruker timsTOF data but failed to get correct "
184 "ion mobility data. Inconsistency found.");
185 }
186 }
187 }
188 else
189 {
190 ion_mobility_value = mass_spectrum_csp->getDtInMilliSeconds();
191 }
192
193 if(ion_mobility_value != -1)
194 documentNodeInDtRtMap(ion_mobility_value, new_node_p, Enums::DataKind::dt);
195
197
198 // qDebug() << "New index/node map:"
199 //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
200 //<< new_node_p;
201
202 return new_node_p;
203}
204
205
206const std::map<std::size_t, MsRunDataSetTreeNode *> &
211
212
213std::size_t
215{
216 // We have a node and we want to get the matching mass spectrum index.
217
218 if(node == nullptr)
219 throw("Cannot be that the node pointer is nullptr");
220
221 std::map<std::size_t, MsRunDataSetTreeNode *>::const_iterator iterator =
222 std::find_if(m_indexNodeMap.begin(),
223 m_indexNodeMap.end(),
224 [node](const std::pair<std::size_t, MsRunDataSetTreeNode *> pair) {
225 return pair.second == node;
226 });
227
228 if(iterator != m_indexNodeMap.end())
229 return iterator->first;
230
231 return std::numeric_limits<std::size_t>::max();
232}
233
234
235std::size_t
237{
238 MsRunDataSetTreeNode *node_p = findNode(qualified_mass_spectrum_csp);
239
240 return massSpectrumIndex(node_p);
241}
242
243
244const std::vector<MsRunDataSetTreeNode *> &
246{
247 return m_rootNodes;
248}
249
250
251void
253{
254 // qDebug() << "Going to call node->accept(visitor) for each root node.";
255
256 for(auto &&node : m_rootNodes)
257 {
258 // qDebug() << "Calling accept for root node:" << node;
259
260 if(visitor.shouldStop())
261 break;
262
263 node->accept(visitor);
264 }
265}
266
267
268void
270 std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_begin_iterator,
271 std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_end_iterator)
272{
273 // qDebug() << "Visitor:" << &visitor << "The distance is between iterators
274 // is:"
275 //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
276
277 using Iterator = std::vector<MsRunDataSetTreeNode *>::const_iterator;
278
279 Iterator iter = nodes_begin_iterator;
280
281 // Inform the visitor of the number of nodes to work on.
282
283 std::size_t node_count = std::distance(nodes_begin_iterator, nodes_end_iterator);
284
285 visitor.setNodesToProcessCount(node_count);
286
287 while(iter != nodes_end_iterator)
288 {
289 // qDebug() << "Visitor:" << &visitor
290 //<< "The distance is between iterators is:"
291 //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
292
293 // qDebug() << "Node visited:" << (*iter)->toString();
294
295 if(visitor.shouldStop())
296 break;
297
298 (*iter)->accept(visitor);
299 ++iter;
300 }
301}
302
303
306{
307 // qDebug();
308
309 for(auto &node : m_rootNodes)
310 {
311 // qDebug() << "In one node of the root nodes.";
312
313 MsRunDataSetTreeNode *iterNode = node->findNode(mass_spectrum_csp);
314 if(iterNode != nullptr)
315 return iterNode;
316 }
317
318 return nullptr;
319}
320
321
323MsRunDataSetTree::findNode(std::size_t spectrum_index) const
324{
325 // qDebug();
326
327 for(auto &node : m_rootNodes)
328 {
329 // qDebug() << "In one node of the root nodes.";
330
331 MsRunDataSetTreeNode *iterNode = node->findNode(spectrum_index);
332 if(iterNode != nullptr)
333 return iterNode;
334 }
335
336 return nullptr;
337}
338
339
340std::vector<MsRunDataSetTreeNode *>
342{
343 // We want to push back all the nodes of the tree in a flat vector of nodes.
344
345 std::vector<MsRunDataSetTreeNode *> nodes;
346
347 for(auto &&node : m_rootNodes)
348 {
349 // The node will store itself and all of its children.
350 node->flattenedView(nodes, true /* with_descendants */);
351 }
352
353 return nodes;
354}
355
356
357std::vector<MsRunDataSetTreeNode *>
358MsRunDataSetTree::flattenedViewMsLevel(std::size_t ms_level, bool with_descendants)
359{
360 std::vector<MsRunDataSetTreeNode *> nodes;
361
362 // Logically, ms_level cannot be 0.
363
364 if(!ms_level)
365 {
366 throw ExceptionNotPossible("msrundatasettree.cpp -- ERROR the MS level cannot be 0.");
367
368 return nodes;
369 }
370
371 // The depth of the tree at which we are right at this point is 0, we have not
372 // gone into the children yet.
373
374 std::size_t depth = 0;
375
376 // If ms_level is 1, then that means that we want the nodes starting right at
377 // the root nodes with or without the descendants.
378
379 // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
380 //<< "ms_level: " << ms_level << " depth: " << depth << std::endl;
381
382 if(ms_level == 1)
383 {
384 for(auto &&node : m_rootNodes)
385 {
386 // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__
387 //<< " () "
388 //<< "Handling one of the root nodes at ms_level = 1."
389 //<< std::endl;
390
391 node->flattenedView(nodes, with_descendants);
392 }
393
394 return nodes;
395 }
396
397 // At this point, we know that we want the descendants of the root nodes since
398 // we want ms_level > 1, so we need go to to the children of the root nodes.
399
400 // Let depth to 0, because if we go to the children of the root nodes we will
401 // still be at depth 0, that is MS level 1.
402
403 for(auto &node : m_rootNodes)
404 {
405 // std::cout
406 //<< __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
407 //<< std::setprecision(15)
408 //<< "Requesting a flattened view of the root's child nodes with depth: "
409 //<< depth << std::endl;
410
411 node->flattenedViewMsLevelNodes(ms_level, depth, nodes, with_descendants);
412 }
413
414 return nodes;
415}
416
417
420{
421
422 // qDebug();
423
424 // Find the node that holds the mass spectrum that was acquired as the
425 // precursor that when fragmented gave a spectrum at spectrum_index;
426
427 // Get the node that contains the product_spectrum_index first.
428 MsRunDataSetTreeNode *node = nullptr;
429 node = findNode(product_spectrum_index);
430
431 // Now get the node that contains the precursor_spectrum_index.
432
433 return findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex());
434}
435
436
437std::vector<MsRunDataSetTreeNode *>
439{
440 std::vector<MsRunDataSetTreeNode *> nodes;
441
442 // First get the node of the precursor spectrum index.
443
444 MsRunDataSetTreeNode *precursor_node = findNode(precursor_spectrum_index);
445
446 if(precursor_node == nullptr)
447 return nodes;
448
449 nodes.assign(precursor_node->m_children.begin(), precursor_node->m_children.end());
450
451 return nodes;
452}
453
454
455std::vector<MsRunDataSetTreeNode *>
457{
458
459 // Find all the precursor nodes holding a mass spectrum that contained a
460 // precursor mz-value.
461
462 if(precision_ptr == nullptr)
463 throw ExceptionNotPossible("msrundatasettree.cpp -- ERROR precision_ptr cannot be nullptr.");
464
465 std::vector<MsRunDataSetTreeNode *> product_nodes;
466
467 // As a first step, find all the nodes that hold a mass spectrum that was
468 // acquired as a fragmentation spectrum of an ion of mz, that is, search all
469 // the product ion nodes for which precursor was mz.
470
471 for(auto &&node : m_rootNodes)
472 {
473 node->productNodesByPrecursorMz(mz, precision_ptr, product_nodes);
474 }
475
476 // Now, for each node found get the precursor node
477
478 std::vector<MsRunDataSetTreeNode *> precursor_nodes;
479
480 for(auto &&node : product_nodes)
481 {
482 precursor_nodes.push_back(findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex()));
483 }
484
485 return precursor_nodes;
486}
487
488
489bool
491 MsRunDataSetTreeNode *node_p,
492 Enums::DataKind data_kind)
493{
494 // qDebug();
495
496 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
497 using DoubleNodeVectorMap = std::map<double, NodeVector>;
498 using MapPair = std::pair<double, NodeVector>;
499 using MapIterator = DoubleNodeVectorMap::iterator;
500
501 DoubleNodeVectorMap *map_p;
502
503 if(data_kind == Enums::DataKind::rt)
504 {
506 }
507 else if(data_kind == Enums::DataKind::dt)
508 {
510 }
511 else
512 qFatal("Programming error.");
513
514 // There are two possibilities:
515 //
516 // 1. The time was never encountered yet. We won't find it. We need to
517 // allocate a vector of Node's and set it associated to time in the map.
518 //
519 // 2. The time was encountered already, we will find it in the maps, we'll
520 // just push_back the Node in the vector of nodes.
521
522 MapIterator found_iterator = map_p->find(time);
523
524 if(found_iterator != map_p->end())
525 {
526 // The time value was encountered already.
527
528 found_iterator->second.push_back(node_p);
529
530 // qDebug() << "Found iterator for time:" << time;
531 }
532 else
533 {
534 // We need to create a new vector with the node.
535
536 NodeVector node_vector = {node_p};
537
538 map_p->insert(MapPair(time, node_vector));
539
540 // qDebug() << "Inserted new time:node_vector pair.";
541 }
542
543 return true;
544}
545
546
549 MsRunDataSetTreeNode *parent_p)
550{
551 // qDebug();
552
553 // We want to add a mass spectrum. Either the parent_p argument is nullptr or
554 // not. If it is nullptr, then we just append the mass spectrum to the vector
555 // of root nodes. If it is not nullptr, we need to append the mass spectrum to
556 // that node.
557
558 MsRunDataSetTreeNode *new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, parent_p);
559
560 if(parent_p == nullptr)
561 {
562 m_rootNodes.push_back(new_node_p);
563
564 // qDebug() << "Pushed back" << new_node << "to root nodes:" <<
565 // &m_rootNodes;
566 }
567 else
568 {
569 parent_p->m_children.push_back(new_node_p);
570
571 // qDebug() << "Pushed back" << new_node << "with parent:" << parent_p;
572 }
573
575
576 // And now document that addition in the node index map.
577 m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
578 mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
579
580 // We also want to document the new node relating to the
581 // retention time.
582
583 documentNodeInDtRtMap(mass_spectrum_csp->getRtInMinutes(), new_node_p, Enums::DataKind::rt);
584
585 // Likewise for the ion mobility time.
586
587 double ion_mobility_value = -1;
588
589 Enums::MsDataFormat file_format = mcsp_msRunId->getMsDataFormat();
590 bool ok = false;
591
592 if(file_format == Enums::MsDataFormat::brukerTims)
593 {
594 // Start by looking if there is a OneOverK0 valid value, which
595 // means we have effectively handled a genuine mobility scan spectrum.
596 QVariant ion_mobility_variant_value =
597 mass_spectrum_csp->getParameterValue(QualifiedMassSpectrumParameter::IonMobOneOverK0);
598
599 if(ion_mobility_variant_value.isValid())
600 {
601 // Yes, genuine ion mobility scan handled here.
602
603 ion_mobility_value = ion_mobility_variant_value.toDouble(&ok);
604
605 if(!ok)
606 {
607 qFatal(
608 "The data are Bruker timsTOF data but failed to convert valid "
609 "QVariant 1/K0 value to double.");
610 }
611 }
612 else
613 {
614 // We are not handling a genuine single ion mobility scan here.
615 // We must be handling a mass spectrum that correspond to
616 // the combination of all the ion mobility scans for a single
617 // frame. This is when the user asks that ion mobility scans
618 // be flattended. In this case, the OneOverK0 value is not valid
619 // but there are two values that are set:
620 // TimsFrameInvKoBegin and TimsFrameInvKoEnd.
621 // See TimsFramesMsRunReader::readSpectrumCollection2.
622
623 // Test one of these values as a sanity check. But
624 // give the value of -1 for the ion mobility because we do not
625 // have ion mobility data here.
626
627 ion_mobility_variant_value = mass_spectrum_csp->getParameterValue(
629
630 if(!ion_mobility_variant_value.isValid())
631 {
632 qFatal(
633 "The data are Bruker timsTOF data but failed to get correct "
634 "ion mobility data. Inconsistency found.");
635 }
636 }
637 }
638 else
639 {
640 ion_mobility_value = mass_spectrum_csp->getDtInMilliSeconds();
641 }
642
643 if(ion_mobility_value != -1)
644 documentNodeInDtRtMap(ion_mobility_value, new_node_p, Enums::DataKind::dt);
645
646 // qDebug() << "New index/node map:"
647 //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
648 //<< new_node;
649
650 return new_node_p;
651}
652
653
656 std::size_t precursor_spectrum_index)
657{
658 // qDebug();
659
660 // First get the node containing the mass spectrum that was acquired at index
661 // precursor_spectrum_index.
662
663 // qDebug() << "Need to find the precursor's mass spectrum node for precursor
664 // "
665 //"spectrum index:"
666 //<< precursor_spectrum_index;
667
668 MsRunDataSetTreeNode *mass_spec_data_node_p = findNode(precursor_spectrum_index);
669
670 // qDebug() << "Found node" << mass_spec_data_node_p
671 //<< "for precursor index:" << precursor_spectrum_index;
672
673 if(mass_spec_data_node_p == nullptr)
674 {
676 "msrundatasettree.cpp -- ERROR could not find a a "
677 "tree node matching the index.");
678 }
679
680 // qDebug() << "Calling addMassSpectrum with parent node:"
681 //<< mass_spec_data_node_p;
682
683 return addMassSpectrum(mass_spectrum_csp, mass_spec_data_node_p);
684}
685
686
687std::size_t
689 double end,
690 NodeVector &nodes,
691 Enums::DataKind data_kind) const
692{
693 qDebug() << "Adding ms run data set tree nodes" << "inside of [" << start << "-" << end << "]"
694 << dataKindMap[data_kind] << "range";
695
696 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
697 using DoubleNodeVectorMap = std::map<double, NodeVector>;
698 using MapIterator = DoubleNodeVectorMap::const_iterator;
699
700 const DoubleNodeVectorMap *map_p;
701
702 if(data_kind == Enums::DataKind::rt)
703 {
705 }
706 else if(data_kind == Enums::DataKind::dt)
707 {
709 }
710 else
711 qFatal("Programming error.");
712
713 std::size_t added_nodes = 0;
714
715 // Get the iterator to the map item that has the key greater or equal to
716 // start.
717
718 MapIterator start_iterator = map_p->lower_bound(start);
719
720 if(start_iterator == map_p->end())
721 return 0;
722
723 // Now get the end of the map useful range of items.
724
725 MapIterator end_iterator = map_p->upper_bound(end);
726
727 // Now that we have the iterator range, iterate in it and get the mass spectra
728 // from each item's pair.second node vector.
729
730 for(MapIterator iterator = start_iterator; iterator != end_iterator; ++iterator)
731 {
732 // We are iterating in MapPair items.
733
734 NodeVector node_vector = iterator->second;
735
736 // All the nodes in the node vector need to be copied to the mass_spectra
737 // vector passed as parameter.
738
739 for(auto &&node_p : node_vector)
740 {
741 nodes.push_back(node_p);
742
743 ++added_nodes;
744 }
745 }
746
747 return added_nodes;
748}
749
750
751std::size_t
753 double end,
754 NodeVector &nodes,
755 Enums::DataKind data_kind) const
756{
757 qDebug() << "Removing ms run data set tree nodes" << "outside of [" << start << "-" << end << "]"
758 << dataKindMap[data_kind] << "range";
759
760 using NodeVector = std::vector<MsRunDataSetTreeNode *>;
761 using NodeVectorIterator = NodeVector::iterator;
762
763 using DoubleNodeVectorMap = std::map<double, NodeVector>;
764 using MapIterator = DoubleNodeVectorMap::const_iterator;
765
766 const DoubleNodeVectorMap *map_p;
767
768 if(data_kind == Enums::DataKind::rt)
769 {
771 }
772 else if(data_kind == Enums::DataKind::dt)
773 {
775 }
776 else
777 qFatal("Programming error.");
778
779 std::size_t removed_vector_items = 0;
780
781 // We want to remove from the nodes vector all the nodes that contain a mass
782 // spectrum acquired at a time range outside of [ start-end ], that is, the
783 // time values [begin() - start [ and ]end -- end()[.
784
785 // Get the iterator to the map item that has the key less to
786 // start (we want to keep the map item having key == start).
787
788 MapIterator first_end_iterator = (*map_p).upper_bound(start);
789
790 // Now that we have the first_end_iterator, we can iterate between [begin --
791 // first_end_iterator[
792
793 for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator; ++iterator)
794 {
795 // Remove from the nodes vector the nodes.
796
797 // We are iterating in MapPair items.
798
799 NodeVector node_vector = iterator->second;
800
801 // All the nodes in the node vector need to be removed from the
802 // mass_spectra vector passed as parameter if found.
803
804 for(auto &&node_p : node_vector)
805 {
806 NodeVectorIterator iterator = std::find(nodes.begin(), nodes.end(), node_p);
807
808 if(iterator != nodes.end())
809 {
810 // We found the node: remove it.
811
812 nodes.erase(iterator);
813
814 ++removed_vector_items;
815 }
816 }
817 }
818
819 // Now the second begin iterator, so that we can remove all the items
820 // contained in the second range, that is, ]end--end()[.
821
822 // The second_first_iterator will point to the item having its time value less
823 // or equal to end. But we do not want to get items having their time equal to
824 // end, only < end. So, if the iterator is not begin(), we just need to
825 // decrement it once.
826 MapIterator second_first_iterator = map_p->upper_bound(end);
827 if(second_first_iterator != map_p->begin())
828 --second_first_iterator;
829
830 for(MapIterator iterator = second_first_iterator; iterator != map_p->end(); ++iterator)
831 {
832 // We are iterating in MapPair items.
833
834 NodeVector node_vector = iterator->second;
835
836 // All the nodes in the node vector need to be removed from the
837 // mass_spectra vector passed as parameter if found.
838
839 for(auto &&node_p : node_vector)
840 {
841 NodeVectorIterator iterator = std::find(nodes.begin(), nodes.end(), node_p);
842
843 if(iterator != nodes.end())
844 {
845 // We found the node: remove it.
846
847 nodes.erase(iterator);
848
849 ++removed_vector_items;
850 }
851 }
852 }
853
854 return removed_vector_items;
855}
856
857
858std::size_t
860 double end,
861 QualMassSpectraVector &mass_spectra,
862 Enums::DataKind data_kind) const
863{
864 qDebug() << "Adding spectra" << "inside of [" << start << "-" << end << "]"
865 << dataKindMap[data_kind] << "range";
866
867 QElapsedTimer timer;
868 timer.start();
869
870 // if(start == end)
871 // qDebug() << "Special case, start and end are equal:" << start;
872
873 // We will use the maps that relate rt | dt to a vector of data tree nodes.
874 // Indeed, we may have more than one mass spectrum acquired for a given rt, in
875 // case of ion mobility mass spectrometry. Same for dt: we will have as many
876 // spectra for each dt as there are retention time values...
877
878 using DoubleNodeVectorMap = std::map<double, NodeVector>;
879 using MapIterator = DoubleNodeVectorMap::const_iterator;
880
881 const DoubleNodeVectorMap *map_p;
882
883 if(data_kind == Enums::DataKind::rt)
884 {
886
887 // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
888 // start
889 //<< "end:" << end;
890 }
891 else if(data_kind == Enums::DataKind::dt)
892 {
894
895 // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
896 // start
897 //<< "end:" << end;
898 }
899 else
900 qFatal("Programming error.");
901
902 QString msg = QString("The %1/mz map has size: %2 with start : %3 and end : %4\n")
903 .arg(dataKindMap[data_kind])
904 .arg(map_p->size())
905 .arg(start)
906 .arg(end);
907
908 qDebug() << msg;
909
910 std::size_t added_mass_spectra = 0;
911
912 // Get the iterator to the map item that has the key greater or equal to
913 // start.
914
915 MapIterator start_iterator = map_p->lower_bound(start);
916
917 if(start_iterator == map_p->end())
918 {
919 // qDebug() << "The start iterator is end()!";
920 return 0;
921 }
922
923 qDebug() << "The start_iterator points to:" << start_iterator->first << "as a"
924 << dataKindMap[data_kind] << "time.";
925
926 // Now get the end of the map's useful range of items.
927
928 // Returns an iterator pointing to the first element in the container whose
929 // key is considered to go after 'end'.
930
931 MapIterator end_iterator = map_p->upper_bound(end);
932
933 // Immediately verify if there is no distance between start and end.
934 if(!std::distance(start_iterator, end_iterator))
935 {
936 // qDebug() << "No range of mass spectra could be selected.";
937 return 0;
938 }
939
940 if(end_iterator == map_p->end())
941 {
942 qDebug() << "The end_iterator points to the end of the map."
943 << "The last map item is prev() at" << dataKindMap[data_kind]
944 << "key value: " << std::prev(end_iterator)->first;
945 }
946 else
947 {
948 qDebug() << "The end_iterator points to:" << end_iterator->first << "as a"
949 << dataKindMap[data_kind] << "time and the accounted key value is actually"
950 << std::prev(end_iterator)->first;
951 }
952
953 qDebug() << "The number of time values to iterate through the" << dataKindMap[data_kind]
954 << "/mz map:" << std::distance(start_iterator, end_iterator)
955 << "with values: start: " << start_iterator->first
956 << "and end: " << std::prev(end_iterator)->first;
957
958 // Now that we have the iterator range, iterate in it and get the mass
959 // spectra from each item's pair.second node vector.
960
961 for(MapIterator iterator = start_iterator; iterator != end_iterator; ++iterator)
962 {
963 // We are iterating in DoubleNodeVectorMap MapPair items,
964 // with double = rt or dt and NodeVector being just that.
965
966 NodeVector node_vector = iterator->second;
967
968 // All the nodes' mass spectra in the node vector need to be copied to
969 // the mass_spectra vector passed as parameter.
970
971 for(auto &&node_p : node_vector)
972 {
973 QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp =
974 node_p->getQualifiedMassSpectrum();
975
976#if 0
977 // Sanity check only for deep debugging.
978
979 if(qualified_mass_spectrum_csp == nullptr ||
980 qualified_mass_spectrum_csp.get() == nullptr)
981 {
983 "The QualifiedMassSpectrumCstSPtr cannot be nullptr.");
984 }
985 else
986 {
987 //qDebug() << "Current mass spectrum is valid with rt:"
988 //<< qualified_mass_spectrum_csp->getRtInMinutes();
989 }
990#endif
991
992 mass_spectra.push_back(qualified_mass_spectrum_csp);
993
994 ++added_mass_spectra;
995 }
996 }
997
998 qint64 elapsed = timer.elapsed(); // milliseconds
999
1000 qDebug() << "Took" << elapsed << "ms to add" << added_mass_spectra << "mass spectra";
1001
1002 return added_mass_spectra;
1003}
1004
1005
1006std::size_t
1008 double start, double end, QualMassSpectraVector &mass_spectra, Enums::DataKind data_kind) const
1009{
1010 qDebug() << "Removing spectra" << "outside of [" << start << "-" << end << "]"
1011 << dataKindMap[data_kind] << "range";
1012
1013 QElapsedTimer timer;
1014 timer.start();
1015
1016 using DoubleNodeVectorMap = std::map<double, NodeVector>;
1017 using MapIterator = DoubleNodeVectorMap::const_iterator;
1018
1019 const DoubleNodeVectorMap *map_p;
1020
1021 if(data_kind == Enums::DataKind::rt)
1022 {
1023 map_p = &m_rtDoubleNodeVectorMap;
1024
1025 // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
1026 // start
1027 //<< "end:" << end;
1028 }
1029 else if(data_kind == Enums::DataKind::dt)
1030 {
1031 map_p = &m_dtDoubleNodeVectorMap;
1032
1033 // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
1034 // start
1035 //<< "end:" << end;
1036 }
1037 else
1038 qFatal("Programming error.");
1039
1040 std::size_t removed_vector_items = 0;
1041
1042 // We want to remove from the nodes vector all the nodes that contain a mass
1043 // spectrum acquired at a time range outside of [ start-end ], that is, the
1044 // time values [begin() - start [ and ]end -- end()[.
1045
1046 // Looking for an iterator that points to an item having a time < start.
1047
1048 // lower_bound returns an iterator pointing to the first element in the
1049 // range [first, last) that is not less than (i.e. greater or equal to)
1050 // value, or last if no such element is found.
1051
1052 MapIterator first_end_iterator = (*map_p).lower_bound(start);
1053
1054 // first_end_iterator points to the item that has the next time value with
1055 // respect to start. This is fine because we'll not remove that point
1056 // because the for loop below will stop one item short of
1057 // first_end_iterator. That means that we effectively remove all the items
1058 // [begin() -> start[ (start not include). Exactly what we want.
1059
1060 // qDebug() << "lower_bound for start:" << first_end_iterator->first;
1061
1062 // Now that we have the first_end_iterator, we can iterate between [begin --
1063 // first_end_iterator[ and remove all the mass spectra listed in these
1064 // iterated nodes.
1065
1066 QSet<QualifiedMassSpectrumCstSPtr> set_of_mass_spectra_to_remove;
1067
1068 for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator; ++iterator)
1069 {
1070 // We are iterating in MapPair items.
1071
1072 NodeVector node_vector = iterator->second;
1073
1074 for(std::size_t index = 0; index < node_vector.size(); ++index)
1075 {
1076 set_of_mass_spectra_to_remove.insert(node_vector.at(index)->getQualifiedMassSpectrum());
1077
1078 ++removed_vector_items;
1079 }
1080 }
1081
1082 // Now the second begin iterator, so that we can remove all the items
1083 // contained in the second range, that is, ]end--end()[.
1084
1085 // The second_first_iterator will point to the item having its time value
1086 // less or equal to end. But we do not want to get items having their time
1087 // equal to end, only < end. So, if the iterator is not begin(), we just
1088 // need to decrement it once.
1089
1090 MapIterator second_first_iterator = map_p->upper_bound(end);
1091
1092 // second_first_iterator now points to the item after the one having time
1093 // end. Which is exactly what we want: we want to remove ]end--end()[ and
1094 // this is exactly what the loop starting a the point after end below.
1095
1096 // qDebug() << "second_first_iterator for end:" <<
1097 // second_first_iterator->first;
1098
1099 for(MapIterator iterator = second_first_iterator; iterator != map_p->end(); ++iterator)
1100 {
1101 // We are iterating in MapPair items.
1102
1103 NodeVector node_vector = iterator->second;
1104
1105
1106 for(std::size_t index = 0; index < node_vector.size(); ++index)
1107 {
1108 set_of_mass_spectra_to_remove.insert(node_vector.at(index)->getQualifiedMassSpectrum());
1109
1110 ++removed_vector_items;
1111 }
1112 }
1113
1114 // Now that we have the set of all the mass spectra to be removed from mass_spectra...
1115
1116 mass_spectra.erase(
1117 std::remove_if(mass_spectra.begin(),
1118 mass_spectra.end(),
1119 [&set_of_mass_spectra_to_remove](QualifiedMassSpectrumCstSPtr &ptr) {
1120 return set_of_mass_spectra_to_remove.find(ptr) !=
1121 set_of_mass_spectra_to_remove.end();
1122 }),
1123 mass_spectra.end());
1124
1125 qint64 elapsed = timer.elapsed(); // milliseconds
1126
1127 qDebug() << "Took" << elapsed << "ms to remove" << removed_vector_items << "mass spectra";
1128
1129 return removed_vector_items;
1130}
1131
1132
1133std::size_t
1135{
1136 // We want to know what is the depth of the tree, that is the highest level
1137 // of MSn, that is, n.
1138
1139 if(!m_rootNodes.size())
1140 return 0;
1141
1142 // qDebug() << "There are" << m_rootNodes.size() << "root nodes";
1143
1144 // By essence, we are at MS0: only if we have at least one root node do we
1145 // know we have MS1 data. So we already know that we have at least one
1146 // child, so start with depth 1.
1147
1148 std::size_t depth = 1;
1149 std::size_t tmp_depth = 0;
1150 std::size_t greatest_depth = 0;
1151
1152 for(auto &node : m_rootNodes)
1153 {
1154 tmp_depth = node->depth(depth);
1155
1156 // qDebug() << "Returned depth:" << tmp_depth;
1157
1158 if(tmp_depth > greatest_depth)
1159 greatest_depth = tmp_depth;
1160 }
1161
1162 return greatest_depth;
1163}
1164
1165
1166std::size_t
1168{
1169
1170 std::size_t cumulative_node_count = 0;
1171
1172 for(auto &node : m_rootNodes)
1173 {
1174 node->size(cumulative_node_count);
1175
1176 // qDebug() << "Returned node_count:" << node_count;
1177 }
1178
1179 return cumulative_node_count;
1180}
1181
1182
1183std::size_t
1185{
1186 return m_indexNodeMap.size();
1187}
1188
1189
1190std::size_t
1195
1196
1197} // namespace pappso
virtual void setNodesToProcessCount(std::size_t)=0
QualifiedMassSpectrumCstSPtr mcsp_massSpectrum
MsRunDataSetTreeNode * findNode(std::size_t spectrum_index)
std::vector< MsRunDataSetTreeNode * > m_children
MsRunDataSetTreeNode * findNode(QualifiedMassSpectrumCstSPtr mass_spectrum_csp) const
const std::vector< MsRunDataSetTreeNode * > & getRootNodes() const
std::vector< QualifiedMassSpectrumCstSPtr > QualMassSpectraVector
std::vector< MsRunDataSetTreeNode * > flattenedViewMsLevel(std::size_t ms_level, bool with_descendants=false)
std::size_t indexNodeMapSize() const
void accept(MsRunDataSetTreeNodeVisitorInterface &visitor)
MsRunDataSetTree(MsRunIdCstSPtr ms_run_id_csp)
std::vector< MsRunDataSetTreeNode * > flattenedView()
std::size_t addDataSetQualMassSpectraInsideDtOrRtRange(double start, double end, QualMassSpectraVector &mass_spectra, Enums::DataKind data_kind) const
bool documentNodeInDtRtMap(double time, MsRunDataSetTreeNode *node_p, Enums::DataKind data_kind)
std::size_t removeDataSetTreeNodesOutsideDtOrRtRange(double start, double end, NodeVector &nodes, Enums::DataKind data_kind) const
std::size_t getSpectrumCount() const
std::map< std::size_t, MsRunDataSetTreeNode * > m_indexNodeMap
std::vector< MsRunDataSetTreeNode * > m_rootNodes
std::map< double, NodeVector > DoubleNodeVectorMap
std::vector< MsRunDataSetTreeNode * > precursorNodesByPrecursorMz(pappso_double mz, PrecisionPtr precision_ptr)
std::vector< MsRunDataSetTreeNode * > NodeVector
MsRunDataSetTreeNode * precursorNodeByProductSpectrumIndex(std::size_t product_spectrum_index)
const std::map< std::size_t, MsRunDataSetTreeNode * > & getIndexNodeMap() const
MsRunDataSetTreeNode * addMassSpectrum(QualifiedMassSpectrumCstSPtr mass_spectrum)
std::size_t massSpectrumIndex(const MsRunDataSetTreeNode *node) const
DoubleNodeVectorMap m_rtDoubleNodeVectorMap
std::vector< MsRunDataSetTreeNode * > productNodesByPrecursorSpectrumIndex(std::size_t precursor_spectrum_index)
DoubleNodeVectorMap m_dtDoubleNodeVectorMap
std::size_t addDataSetTreeNodesInsideDtOrRtRange(double start, double end, NodeVector &nodes, Enums::DataKind data_kind) const
std::size_t removeDataSetQualMassSpectraOutsideDtOrRtRange(double start, double end, QualMassSpectraVector &mass_spectra, Enums::DataKind data_kind) const
@ dt
Drift time.
Definition types.h:252
@ rt
Retention time.
Definition types.h:251
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::map< Enums::DataKind, QString > dataKindMap
Definition types.cpp:8
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
double pappso_double
A type definition for doubles.
Definition types.h:61
std::shared_ptr< const QualifiedMassSpectrum > QualifiedMassSpectrumCstSPtr
const PrecisionBase * PrecisionPtr
Definition precision.h:122