Skip to content

Commit

Permalink
use infer_from_path to determine filetype
Browse files Browse the repository at this point in the history
This should add support for gzipped files, MzMLb and Thermo raw files.
  • Loading branch information
paretje committed Aug 13, 2024
1 parent 86da407 commit 503e0c9
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 18 deletions.
37 changes: 23 additions & 14 deletions src/file_types.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
use mzdata::io::MassSpectrometryFormat;

pub enum SpectrumFileType {
MascotGenericFormat,
MzML,
MzMLb,
BrukerRaw,
// ThermoRaw,
ThermoRaw,
Unknown,
}

pub fn match_file_type(spectrum_path: &str) -> SpectrumFileType {
let extension = spectrum_path.split('.').last().unwrap_or("").to_lowercase();
match extension.as_str() {
"mgf" => SpectrumFileType::MascotGenericFormat,
"mzml" => SpectrumFileType::MzML,
"d" | "ms2" => SpectrumFileType::BrukerRaw,
// "raw" => SpectrumFileType::ThermoRaw,
_ => match (
folder_contains_extension(spectrum_path, "bin"),
folder_contains_extension(spectrum_path, "parquet"),
) {
(true, true) => SpectrumFileType::BrukerRaw,
_ => SpectrumFileType::Unknown,
},
match mzdata::io::infer_from_path(spectrum_path).0 {
MassSpectrometryFormat::MGF => SpectrumFileType::MascotGenericFormat,
MassSpectrometryFormat::MzML => SpectrumFileType::MzML,
MassSpectrometryFormat::MzMLb => SpectrumFileType::MzMLb,
MassSpectrometryFormat::ThermoRaw => SpectrumFileType::ThermoRaw,
MassSpectrometryFormat::Unknown => {
let extension = spectrum_path.split('.').last().unwrap_or("").to_lowercase();
match extension.as_str() {
"d" | "ms2" => SpectrumFileType::BrukerRaw,
_ => match (
folder_contains_extension(spectrum_path, "bin"),
folder_contains_extension(spectrum_path, "parquet"),
) {
(true, true) => SpectrumFileType::BrukerRaw,
_ => SpectrumFileType::Unknown,
},
}
}
_ => SpectrumFileType::Unknown
}
}

Expand Down
6 changes: 2 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@ pub fn get_precursor_info(spectrum_path: String) -> PyResult<HashMap<String, Pre
let file_type = match_file_type(&spectrum_path);

let precursors = match file_type {
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML => {
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML | SpectrumFileType::MzMLb | SpectrumFileType:: ThermoRaw => {
parse_mzdata::parse_precursor_info(&spectrum_path)
}
SpectrumFileType::BrukerRaw => parse_timsrust::parse_precursor_info(&spectrum_path),
// SpectrumFileType::ThermoRaw => parse_with_mzdata_thermo(&spectrum_path, file_type),
SpectrumFileType::Unknown => return Err(PyOSError::new_err("Unsupported file type")),
};

Expand All @@ -39,11 +38,10 @@ pub fn get_ms2_spectra(spectrum_path: String) -> PyResult<Vec<ms2_spectrum::MS2S
let file_type = match_file_type(&spectrum_path);

let spectra = match file_type {
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML => {
SpectrumFileType::MascotGenericFormat | SpectrumFileType::MzML | SpectrumFileType::MzMLb | SpectrumFileType:: ThermoRaw => {
parse_mzdata::read_ms2_spectra(&spectrum_path)
}
SpectrumFileType::BrukerRaw => parse_timsrust::read_ms2_spectra(&spectrum_path),
// SpectrumFileType::ThermoRaw => parse_with_mzdata_thermo(&spectrum_path, file_type),
SpectrumFileType::Unknown => return Err(PyOSError::new_err("Unsupported file type")),
};

Expand Down

0 comments on commit 503e0c9

Please sign in to comment.