libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msfileaccessor.cpp
Go to the documentation of this file.
1// #include <proteowizard/pwiz/data/msdata/DefaultReaderList.hpp>
2
3#include <QDebug>
4#include <QFile>
5#include <QFileInfo>
6
7
8#include "msfileaccessor.h"
9#include "pwizmsfilereader.h"
13
19
26
28
29
30namespace pappso
31{
32
33
34MsFileAccessor::MsFileAccessor(const QString &file_name, const QString &xml_prefix)
35 : m_fileName(file_name), m_xmlPrefix(xml_prefix)
36{
37 QFile file(file_name);
38 if(!file.exists())
40 QObject::tr("File %1 not found.").arg(QFileInfo(file_name).absoluteFilePath())));
41
42
43 m_oboPsiModTermNativeIDFormat.setAccession("MS:1000824");
44 m_oboPsiModTermNativeIDFormat.m_name = "no nativeID format";
46 "No nativeID format indicates that the file tagged with this term does not "
47 "contain spectra that can have a nativeID format.";
48}
49
50
59
63
64
65const QString &
67{
68 return m_fileName;
69}
70
71
77
78const OboPsiModTerm
80{
81 OboPsiModTerm term;
82
83 // is_a: MS:1000560 ! mass spectrometer file format
84 switch(m_fileFormat)
85 {
87 term.setAccession("MS:1001560");
88 term.m_name = "SCIEX TOF/TOF T2D format";
89 term.m_definition =
90 "Applied Biosystems/MDS Analytical Technologies TOF/TOF instrument "
91 "export format.";
92 break;
94 term.setAccession("MS:1000562");
95 term.m_name = "ABI WIFF format";
96 term.m_definition = "Applied Biosystems WIFF file format.";
97 break;
99 term.setAccession("MS:1001509");
100 term.m_name = "Agilent MassHunter format";
101 term.m_definition =
102 "A data file format found in an Agilent MassHunter directory which "
103 "contains raw data acquired by an Agilent mass spectrometer.";
104 break;
106 break;
108 term.setAccession("MS:1000825");
109 term.m_name = "Bruker FID format";
110 term.m_definition = "Bruker FID file format.";
111 break;
113 term.setAccession("MS:1002817");
114 term.m_name = "Bruker TDF format";
115 term.m_definition = "Bruker TDF raw file format.";
116 break;
118 term.setAccession("MS:1000567");
119 term.m_name = "Bruker/Agilent YEP format";
120 term.m_definition = "Bruker/Agilent YEP file format.";
121 break;
123 term.setAccession("MS:1001062");
124 term.m_name = "Mascot MGF format";
125 term.m_definition = "Mascot MGF file format.";
126 break;
128 break;
130 term.setAccession("MS:1001881");
131 term.m_name = "mz5 format";
132 term.m_definition = "mz5 file format, modelled after mzML.";
133 break;
135 term.setAccession("MS:1000584");
136 term.m_name = "mzML format";
137 term.m_definition = "Proteomics Standards Inititative mzML file format.";
138 break;
140 term.setAccession("MS:1000566");
141 term.m_name = "ISB mzXML format";
142 term.m_definition = "Institute of Systems Biology mzXML file format.";
143 break;
145 break;
147
148 term.setAccession("MS:1000563");
149 term.m_name = "Thermo RAW format";
150 term.m_definition = "Thermo Scientific RAW file format.";
151 break;
153 break;
155 term.setAccession("MS:1000526");
156 term.m_name = "Waters raw format";
157 term.m_definition =
158 "Waters data file format found in a Waters RAW directory, generated "
159 "from an MS acquisition.";
160 break;
162 term.setAccession("MS:1001369");
163 term.m_name = "BafAscii text format";
164 term.m_definition =
165 "Simple text file format obtained by exporting Bruker Baf to ascii "
166 "using Bruker software";
167 break;
169 term.setAccession("MS:1001369");
170 term.m_name = "text format";
171 term.m_definition =
172 "Simple text file format of \"m/z<separator>intensity\" value pairs "
173 "for a single mass spectrum, a PMF (or single MS2) search.";
174 break;
175 default:
176 break;
177 }
178
179 return term;
180}
181
182
183const OboPsiModTerm &
190
191
192std::vector<MsRunIdCstSPtr>
194{
195 // qDebug();
196
197 // Try the PwizMsFileReader
198
199 PwizMsFileReader pwiz_ms_file_reader(m_fileName);
200
201 std::vector<MsRunIdCstSPtr> ms_run_ids = pwiz_ms_file_reader.getMsRunIds(m_xmlPrefix);
202 if(ms_run_ids.size())
203 {
204 qDebug() << "Might well be handled using the Pwiz code.";
205
206 m_fileFormat = pwiz_ms_file_reader.getFileFormat();
208
209 // But the user might have configured one preferred reader type.
210
212 if(pref != m_preferredFileReaderTypeMap.end())
213 {
214 m_fileReaderType = pref->second;
215 }
216
217 return ms_run_ids;
218 }
219
220 qDebug() << "The Pwiz reader did not work.";
221
222 // Try the TimsData reader
223
224 QString tims_dir = m_fileName;
225 if(!QFileInfo(tims_dir).isDir())
226 {
227 tims_dir = QFileInfo(m_fileName).absolutePath();
228 }
229
230 TimsMsFileReader tims_file_reader(tims_dir);
231
232 ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
233
234 if(ms_run_ids.size())
235 {
236 qDebug() << "Might well be handled using the Bruker code";
237
238 m_fileName = tims_dir;
239 m_fileFormat = tims_file_reader.getFileFormat();
241
243 if(pref != m_preferredFileReaderTypeMap.end())
244 {
245 m_fileReaderType = pref->second;
246 }
247
248 qDebug() << "Returning Bruker::tims ms run(s)."
249 << "with preferred reader type:" << Utils::fileReaderTypeAsString(m_fileReaderType);
250
251 return ms_run_ids;
252 }
253
254 qDebug() << "The Tims reader did not work.";
255
256 // Try the Baf->ascii export format from Bruker Compass
257
258 try
259 {
260 ms_run_ids.clear();
261 BafAsciiFileReader baf_ascii_ms_file_reader(m_fileName);
262
263 ms_run_ids = baf_ascii_ms_file_reader.getMsRunIds(m_xmlPrefix);
264
265 if(ms_run_ids.size())
266 {
267 qDebug() << "Might well be handled using the BafAscii code";
268
270
271 m_fileFormat = baf_ascii_ms_file_reader.getFileFormat();
272
274 {
275 ms_run_ids.clear();
276 }
277 else
278 {
279 return ms_run_ids;
280 }
281 }
282 }
283 catch(const pappso::PappsoException &error)
284 {
285 qDebug() << "This is not a BafAscii code file" << error.qwhat();
286 }
287
288
289 qDebug() << "The BafAscii reader did not work.";
290
291 // At this point try the XyMsFileReader
292
293 XyMsFileReader xy_ms_file_reader(m_fileName);
294
295 ms_run_ids = xy_ms_file_reader.getMsRunIds(m_xmlPrefix);
296
297 if(ms_run_ids.size())
298 {
299 qDebug() << "Might well be handled using the XY code";
300
302
303 m_fileFormat = xy_ms_file_reader.getFileFormat();
304
305 return ms_run_ids;
306 }
307
308 qDebug() << "The XY reader did not work.";
309
310 return ms_run_ids;
311}
312
313
314void
316{
317 // qDebug();
318
319 auto ret = m_preferredFileReaderTypeMap.insert(
320 std::pair<Enums::MsDataFormat, Enums::FileReaderType>(format, reader_type));
321
322 if(!ret.second)
323 {
324 // replace
325 ret.first->second = reader_type;
326 }
327}
328
329
332{
333 // qDebug();
334
335 auto ret = m_preferredFileReaderTypeMap.find(format);
336
337 if(ret != m_preferredFileReaderTypeMap.end())
338 {
339 return ret->second;
340 }
341
342 return m_fileReaderType;
343}
344
345
351
352
353void
355{
356 mcsp_selectedMsRunId = ms_run_id_csp;
357}
358
359
365
368{
369 // try TimsData reader
370 QString tims_dir = m_fileName;
371 if(!QFileInfo(tims_dir).isDir())
372 {
373 tims_dir = QFileInfo(m_fileName).absolutePath();
374 }
375 TimsMsFileReader tims_file_reader(tims_dir);
376
377 std::vector<MsRunIdCstSPtr> ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
378
379 if(ms_run_ids.size())
380 {
381 // qDebug() << "Might well be handled using the Bruker code";
383 m_fileFormat = tims_file_reader.getFileFormat();
384 m_fileName = tims_dir;
385
386 return std::make_shared<TimsMsRunReaderMs2>(ms_run_ids.front());
387 }
388 else
389 {
391 QObject::tr("Unable to read mz data directory %1 with TimsTOF reader.").arg(tims_dir)));
392 }
393}
394
395
398{
399 // qDebug();
400
401 // We want to return a MsRunReader that accounts for the configuration that
402 // the user might have set.
403
404 if(m_fileName != ms_run_id->getFileName())
405 throw(
406 ExceptionNotPossible(QObject::tr("The MsRunId instance must have the name file name as the "
407 "MsFileAccessor.")));
408
410 {
411 // qDebug() << "Returning a PwizMsRunReader.";
412 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
413 m_oboPsiModTermNativeIDFormat = pwiz_reader->getOboPsiModTermNativeIDFormat();
414 return pwiz_reader;
415 }
417 {
418 // qDebug() << "Returning a XyMsRunReader.";
419
420 return std::make_shared<XyMsRunReader>(ms_run_id);
421 }
423 {
424 // qDebug() << "Returning a TimsMsRunReader.";
425
426 return std::make_shared<TimsMsRunReader>(ms_run_id);
427 }
429 {
430 // qDebug() << "Returning a TimsFramesMsRunReader.";
431
432 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
433 }
435 {
436 // qDebug() << "Returning a TimsMsRunReaderMs2.";
437
438 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
439 }
441 {
442 // qDebug() << "Returning a TimsMsRunReaderMs2.";
443
444 // qInfo() << "std::make_shared<TimsMsRunReaderDia>(ms_run_id);";
445 return std::make_shared<TimsMsRunReaderDia>(ms_run_id);
446 }
448 {
449 // qDebug() << "Returning a BafAsciiMsRunReader.";
450
451 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
452 }
454 {
455 if(ms_run_id.get()->getMsDataFormat() == Enums::MsDataFormat::xy)
456 {
457 return std::make_shared<XyMsRunReader>(ms_run_id);
458 }
459 else
460 {
461 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
462 m_oboPsiModTermNativeIDFormat = pwiz_reader->getOboPsiModTermNativeIDFormat();
463 return pwiz_reader;
464 }
465 }
466 else
467 {
468 throw PappsoException(QObject::tr("No file format was found."));
469 }
470
471 return nullptr;
472}
473
474
476MsFileAccessor::msRunReaderSPtr(std::size_t ms_run_id_index)
477{
478 std::vector<MsRunIdCstSPtr> ms_run_ids = getMsRunIds();
479 if(ms_run_id_index >= ms_run_ids.size())
480 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
481
482 return msRunReaderSPtr(ms_run_ids.at(ms_run_id_index));
483}
484
485
493
494
500
503 Enums::FileReaderType preferred_file_reader_type)
504{
505 QFile file(ms_run_id.get()->getFileName());
506 if(!file.exists())
507 throw(ExceptionNotFound(QObject::tr("unable to build a reader : file %1 not found.")
508 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
509
510 Enums::MsDataFormat file_format = ms_run_id.get()->getMsDataFormat();
511
512 if(file_format == Enums::MsDataFormat::xy)
513 {
514 // qDebug() << "Returning a XyMsRunReader.";
515
516 return std::make_shared<XyMsRunReader>(ms_run_id);
517 }
518 else if(file_format == Enums::MsDataFormat::brukerBafAscii)
519 {
520 // qDebug() << "Returning a XyMsRunReader.";
521
522 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
523 }
524 else if(file_format == Enums::MsDataFormat::unknown)
525 {
526 throw(PappsoException(QObject::tr("unable to build a reader for %1 : unknown file format")
527 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
528 }
529
530 else if(file_format == Enums::MsDataFormat::brukerTims)
531 {
532 if(preferred_file_reader_type == Enums::FileReaderType::tims)
533 {
534 return std::make_shared<TimsMsRunReader>(ms_run_id);
535 }
536 else if(preferred_file_reader_type == Enums::FileReaderType::tims_ms2)
537 {
538 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
539 }
540 else if(preferred_file_reader_type == Enums::FileReaderType::tims_frames)
541 {
542 qDebug() << "returning std::make_shared<TimsFramesMsRunReader>(ms_run_id).";
543 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
544 }
545 // qDebug() << "by default, build a TimsMsRunReader.";
546 return std::make_shared<TimsMsRunReader>(ms_run_id);
547 }
548 else
549 {
550 // qDebug() << "Returning a PwizMsRunReader .";
551 return std::make_shared<PwizMsRunReader>(ms_run_id);
552 }
553}
554
555
557MsFileAccessor::getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
558{
559 std::vector<MsRunIdCstSPtr> run_list = getMsRunIds();
560 MsRunReaderSPtr reader_sp;
561 for(MsRunIdCstSPtr &original_run_id : run_list)
562 {
563 if(original_run_id.get()->getRunId() == run_id)
564 {
565 MsRunId new_run_id(*original_run_id.get());
566 new_run_id.setXmlId(xml_id);
567
568 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
569 }
570 }
571
572 if((run_id.isEmpty()) && (run_list.size() == 1))
573 {
574 MsRunId new_run_id(*run_list[0].get());
575 new_run_id.setXmlId(xml_id);
576
577 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
578 }
579
580
581 if(reader_sp == nullptr)
582 {
583 throw(ExceptionNotFound(QObject::tr("run id %1 not found in file %2")
584 .arg(run_id)
585 .arg(QFileInfo(m_fileName).absoluteFilePath())));
586 }
587 return reader_sp;
588}
589
590
591} // namespace pappso
virtual Enums::MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
const OboPsiModTerm & getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
MsRunIdCstSPtr getSelectedMsRunId() const
MsRunReaderSPtr msRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
Enums::FileReaderType m_fileReaderType
Enums::MsDataFormat m_fileFormat
void setPreferredFileReaderType(Enums::MsDataFormat format, Enums::FileReaderType reader_type)
given an mz format, explicitly set the preferred reader
Enums::MsDataFormat getFileFormat() const
get the raw format of mz data
Enums::FileReaderType getFileReaderType() const
get the file reader type
MsRunReaderSPtr msRunReaderSPtrForSelectedMsRunId()
MsRunIdCstSPtr mcsp_selectedMsRunId
std::vector< MsRunIdCstSPtr > getMsRunIds()
OboPsiModTerm m_oboPsiModTermNativeIDFormat
void setSelectedMsRunId(MsRunIdCstSPtr ms_run_id_csp)
const OboPsiModTerm getOboPsiModTermFileFormat() const
get OboPsiModTerm corresponding to the raw format of mz data
MsRunReaderSPtr getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
get an msrun reader by finding the run_id in file
Enums::FileReaderType getpreferredFileReaderType(Enums::MsDataFormat format)
MsFileAccessor(const QString &file_name, const QString &xml_prefix)
static MsRunReaderSPtr buildMsRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
get an MsRunReader directly from a valid MsRun ID
std::map< Enums::MsDataFormat, Enums::FileReaderType > m_preferredFileReaderTypeMap
TimsMsRunReaderMs2SPtr buildTimsMsRunReaderMs2SPtr()
if possible, builds directly a dedicated Tims TOF tdf file reader
const QString & getFileName() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setXmlId(const QString &xml_id)
set an XML unique identifier for this MsRunId
Definition msrunid.cpp:137
void setAccession(const QString &accession)
virtual const QString & qwhat() const
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual Enums::MsDataFormat getFileFormat() override
virtual Enums::MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
static QString fileReaderTypeAsString(Enums::FileReaderType file_reader_type)
Definition utils.cpp:501
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual Enums::MsDataFormat getFileFormat() override
@ unknown
unknown format
Definition types.h:151
@ SQLite3
SQLite3 format.
Definition types.h:155
@ MGF
Mascot format.
Definition types.h:154
@ pwiz
using libpwizlite
Definition types.h:178
@ tims
TimsMsRunReader : each scan is returned as a mass spectrum.
Definition types.h:181
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition msrunreader.h:57
std::shared_ptr< TimsMsRunReaderMs2 > TimsMsRunReaderMs2SPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46