Toffee C++ API¶
Contents
Creating a Toffee File¶
The following API documentation gives a flavour for how you can create a
toffee
file. However, it is best explained by way of example, such as the
following python code in the unit testing framework. As mentioned elsewhere,
the python and C++ APIs are laregly equivalent.
class DummyToffeeFile():
def __init__(
self,
name,
num_rt_points=1626,
scan_cycle_time=3.32,
ms1_rt_offset=0.17,
ms2_rt_offset=0.506,
num_mz_points=1000,
ms2_name=None,
ims_type=toffee.IntrinsicMassSpacingType.TOF,
fraction_not_zero=0.01,
):
self.name = name
self.hash = sum(ord(ch) for ch in name)
self.seed = self.hash % ((np.iinfo(np.int32).max + 1) * 2)
logger.info('%s: hash=%s, seed=%s', self.name, self.hash, self.seed)
np.random.seed(self.seed)
self.header = self._header(ims_type)
# create retention time vectors for each window
self.scan_cycle_time = scan_cycle_time
self.ms1_rt_offset = ms1_rt_offset
self.ms2_rt_offset = ms2_rt_offset
self.ms1_rt_vector = np.arange(num_rt_points) * self.scan_cycle_time + self.ms1_rt_offset
self.ms2_rt_vector = np.arange(num_rt_points) * self.scan_cycle_time + self.ms2_rt_offset
# set the mass over charge bounds of the data
lower = 90
upper = 2100
(
self.ims_alpha,
self.ims_beta,
self.ims_gamma,
self.ims_coord_vector,
self.mz_vector,
) = self.create_mz_vector(ims_type, lower, upper, num_mz_points)
# we're going to create 1x MS1 window and 1x MS2 window
self.ms1_name = toffee.ToffeeWriter.MS1_NAME
if ms2_name is None:
ms2_name = toffee.ToffeeWriter.ms2Name(41)
self.ms2_name = ms2_name
# window boundaries match ProCan90-M06-07.tof/ms2-041
self.window_lower, self.window_center, self.window_upper = (608.5, 611.5, 614.5)
# create a dense matrix of fake data
ms1_mz, ms1_rt = np.meshgrid(self.mz_vector, self.ms1_rt_vector)
self.expected_intensities_ms1 = (ms1_rt * ms1_mz).astype(np.uint32)
ms2_mz, ms2_rt = np.meshgrid(self.mz_vector, self.ms2_rt_vector)
self.expected_intensities_ms2 = (ms2_rt * ms2_mz).astype(np.uint32)
# generate our toffee writer so we can add windows to it
self.tof_fname = name + '.tof'
self.ims_type = ims_type
writer = toffee.ToffeeWriter(self.tof_fname, self.ims_type)
writer.addHeaderMetadata(self.header)
# loop through the two windows, modify their intensities by setting
# a random seclectin to zero, convert this into a point cloud and
# then add it to the toffee writer
self.scan_data = {}
for map_name, rt_vector, expected_intensities in zip(
[self.ms1_name, self.ms2_name],
[self.ms1_rt_vector, self.ms2_rt_vector],
[self.expected_intensities_ms1, self.expected_intensities_ms2],
):
# create the raw data container
scan_data = toffee.RawSwathScanData(map_name, ims_type)
if map_name == self.ms2_name:
scan_data.windowLower = self.window_lower
scan_data.windowCenter = self.window_center
scan_data.windowUpper = self.window_upper
else:
scan_data.windowLower = -1
scan_data.windowCenter = -1
scan_data.windowUpper = -1
# slice the data as if we were a mass spectrometer creating 'scans'
for r, rt_value in enumerate(rt_vector):
intensity_vector = expected_intensities[r, :]
# add some zeros by generating a mask and assigning a random
# fraction to be True - then set anything in the intensity
# vector, masked by True, to be zero. In one case, set all
# intensities to be zero (mimicking a scan with no data)
if r % 10 == 0:
tmp_fraction_not_zero = 0.0
else:
tmp_fraction_not_zero = fraction_not_zero
mask = np.random.uniform(0, 1, size=intensity_vector.shape)
mask[mask < tmp_fraction_not_zero] = 0
mask[mask != 0] = 1
mask = mask.astype(bool)
intensity_vector[mask] = 0
# double check the masking worked as expected
if (~mask).any():
assert (intensity_vector > 0).any()
mz = self.mz_vector[~mask]
intens = intensity_vector[~mask]
# add to the scan
scan_data.addScan(rt_value, mz, intens)
# create our point cloud
pcl = scan_data.toPointCloud()
# save scan data for later
self.scan_data[map_name] = scan_data
# assert that the point cloud properties were set correctly
assert pcl.name == map_name
assert pcl.imsType == ims_type
assert pcl.windowLower == scan_data.windowLower
assert pcl.windowCenter == scan_data.windowCenter
assert pcl.windowUpper == scan_data.windowUpper
# assert that the retention time was correctly calculated
np.testing.assert_allclose(pcl.scanCycleTime, self.scan_cycle_time)
if map_name == self.ms2_name:
np.testing.assert_allclose(pcl.firstScanRetentionTimeOffset, self.ms2_rt_offset)
else:
np.testing.assert_allclose(pcl.firstScanRetentionTimeOffset, self.ms1_rt_offset)
ims_props = pcl.imsProperties
assert len(ims_props.sliceIndex) == num_rt_points, \
'{} {}\n{}'.format(len(ims_props.sliceIndex), num_rt_points, ims_props.sliceIndex)
# assert that the Intrinsic Mass Spacing parameters were calculated
# correctly
np.testing.assert_allclose(ims_props.medianAlpha, self.ims_alpha)
np.testing.assert_allclose(ims_props.medianBeta, self.ims_beta)
np.testing.assert_allclose(ims_props.gamma, self.ims_gamma)
# add to the file
writer.addPointCloud(pcl)
@classmethod
def create_mz_vector(cls, ims_type, lower, upper, num_mz_points):
"""
Create a m/z vector that respects the IMS spacings for a Sciex (TOF)
type instrument
"""
if ims_type == toffee.IntrinsicMassSpacingType.TOF:
return cls._create_mz_vector_tof(lower, upper, num_mz_points)
elif ims_type == toffee.IntrinsicMassSpacingType.ORBITRAP:
return cls._create_mz_vector_orbitrap(lower, upper, num_mz_points)
raise ValueError('Upsupported IMS type: {}'.format(ims_type))
@classmethod
def _create_mz_vector_tof(cls, lower, upper, num_mz_points):
"""
Create a m/z vector that respects the IMS spacings for a Sciex (TOF)
type instrument
"""
sqrt_lower = np.sqrt(lower)
sqrt_upper = np.sqrt(upper)
# and use these to create dummy Intrinsic Mass Spacing properties
ims_alpha = (sqrt_upper - sqrt_lower) / num_mz_points
ims_gamma = int(np.round(sqrt_lower / ims_alpha, decimals=0))
ims_beta = sqrt_lower - ims_gamma * ims_alpha
if ims_beta > 0:
ims_gamma += 1
ims_beta -= ims_alpha
# from these create an mass over charge vector with known properties
ims_coord = np.arange(num_mz_points + 1).astype(int)
mz = ((ims_coord + ims_gamma) * ims_alpha + ims_beta) ** 2
# add some padding to ims
ims_gamma -= 10
ims_coord += 10
return ims_alpha, ims_beta, ims_gamma, ims_coord, mz
@classmethod
def _create_mz_vector_orbitrap(cls, lower, upper, num_mz_points):
"""
Create a m/z vector that respects the IMS spacings for a Thermo (Orbitrap)
type instrument
"""
inverse_sqrt_lower = 1 / np.sqrt(lower)
inverse_sqrt_upper = 1 / np.sqrt(upper)
# and use these to create dummy Intrinsic Mass Spacing properties
ims_alpha = (inverse_sqrt_upper - inverse_sqrt_lower) / num_mz_points
ims_gamma = int(np.round(inverse_sqrt_lower / ims_alpha, decimals=0))
ims_beta = inverse_sqrt_lower - ims_gamma * ims_alpha
if ims_beta > 0:
ims_gamma -= 1
ims_beta += ims_alpha
# from these create an mass over charge vector with known properties
ims_coord = np.arange(num_mz_points + 1).astype(int)
mz = ((ims_coord + ims_gamma) * ims_alpha + ims_beta) ** -2
# add some padding to ims
ims_gamma -= 10
ims_coord += 10
return ims_alpha, ims_beta, ims_gamma, ims_coord, mz
@classmethod
def _header(cls, ims_type):
if ims_type == toffee.IntrinsicMassSpacingType.TOF:
header_fname = 'Header-TOF.mzML'
elif ims_type == toffee.IntrinsicMassSpacingType.ORBITRAP:
header_fname = 'Header-Orbitrap.mzML'
else:
raise ValueError('Upsupported IMS type: {}'.format(ims_type))
curdir = os.path.dirname(os.path.abspath(__file__))
header_fname = os.path.join(curdir, header_fname)
with open(header_fname, 'r') as f:
return f.read()
ScanDataPointCloud¶
-
class ScanDataPointCloud¶
The data from a single MS1 or MS2 window collected as a point cloud of
X
points discovered fromN
scansPublic Members
-
std::string name¶
The name of the window. This should match one of the formats specified by toffee::ToffeeWriter
-
double windowLower¶
The lower (m/z) bound of the MS selection window - should be
-1
for MS1 scans
-
double windowCenter¶
The center (m/z) of the MS selection window - should be
-1
for MS1 scans
-
double windowUpper¶
The upper (m/z) bound of the MS selection window - should be
-1
for MS1 scans
-
double scanCycleTime¶
The time (in seconds) between each scan for this window. In a typical DIA workflow, we anticipate that this is constant for all windows in a SwathRun
-
double firstScanRetentionTimeOffset¶
The time (in seconds) from the start of the experiment to the first scan for this window.
-
IntrinsicMassSpacingType imsType¶
The Intrinsic Mass Spacing
type
(i.e. ‘TOF’, ‘Orbitrap’)
-
IMSProperties imsProperties¶
The Intrinsic Mass Spacing properties.
-
std::vector<uint32_t> intensity¶
The intensity value for each of the
X
points in the point cloud.
-
std::string name¶
RawSwathScanData¶
-
class RawSwathScanData¶
The raw data from a single MS1 or MS2 window collected as a vector of
N
individual scans at discrete retention timesPublic Functions
-
explicit RawSwathScanData(const std::string &name, IntrinsicMassSpacingType imsType)¶
- Parameters
name – the name of the window. This should match one of the formats specified by toffee::ToffeeWriter
-
void addScan(double rt, const std::vector<double> &mz, const std::vector<uint32_t> &intensity)¶
Add a scan to the raw data
- Parameters
rt – the retention time of this scan
mz – the mass over charge data collected during this scan
intensity – the intensity data collected during this scan
The mass accuracy can be used as a debugging tool to keep track of the actual and converted mass over charge values during conversion to point cloud (index space) using the Intrinsic Mass Spacing.
- Parameters
if – this is not null, then a debugging step will be taken during the conversion to a point cloud
-
ScanDataPointCloud toPointCloud() const¶
Convert the raw data into the point cloud format. Included in this is the conversion of mass over charge to √(m/z) index space using the Intrinsic Mass Spacing properties (c.f. toffee::IMassOverChargeRange)
Public Members
-
const std::string name¶
The name of the window. This should match one of the formats specified by toffee::ToffeeWriter
-
double windowLower¶
The lower (m/z) bound of the MS selection window - should be
-1
for MS1 scans
-
double windowCenter¶
The center (m/z) of the MS selection window - should be
-1
for MS1 scans
-
double windowUpper¶
The upper (m/z) bound of the MS selection window - should be
-1
for MS1 scans
-
IntrinsicMassSpacingType imsType¶
The type of mass analyzer in terms of the Intrinsic Mass Spacing.
-
size_t numberOfScans = 0¶
The number (
N
) of scans in this window.
-
std::vector<double> retentionTime¶
A vector (of size
N
) retention times.
-
std::vector<std::vector<double>> mzData¶
A vector (of size
N
) of vectors where the nested vector contains the mass over charge data collected during then-th
scan
-
std::vector<std::vector<uint32_t>> intensityData¶
A vector (of size
N
) of vectors where the nested vector contains the intensity data collected during then-th
scan
-
explicit RawSwathScanData(const std::string &name, IntrinsicMassSpacingType imsType)¶
ToffeeWriter¶
-
class ToffeeWriter¶
If we consider the raw data collected within a given window across a full gradient during a SWATH-MS run, each data point represents:
Mass over charge The abscissa refers to the mass over charge ratio of the peptide ion
Retention time: The ordinate refers to the time at which the peptide ion is elutes from the Liquid Chromatography column
Intensity: The applicate is a count of the number of times a peptide ion is detected on the sensor
Through the Intrinsic Mass Spacing, mass over charge can be represented by an unsigned integer, as can intensity. Furthermore, the retention time for each data point can be considered as an index into an associated (but much smaller) vector of doubles. In this manner, we can save the raw data into a file format as 3 vectors of unsigned 32-bit integers with some associated metadata.
toffee
takes this exact approach, saving each SWATH-MS window as a group in an HDF5 file; where each group has:double
attributeslower
,center
, andupper
to describe the properties of the window;double
attributesscanCycleTime
($t_c$) andfirstScanRetentionTimeOffset
($t_0$) that describe the retention time vector for each window such that $t(i) = t_c i + t_0$ in seconds;a
uint32
dataset ofretentionTimeIdx
, of equal length as the retention time vector, that represents indices into theimsCoord
andintensity
between which the corresponding value of $t(i)$ is applied;double
attributesimsAlpha
(${IMS}$) andimsBeta
(${trunc}$) for the Intrinsic Mass Spacing m/z conversion such that ${m}{z} = ( {IMS} i_{{m/z}} + {trunc} )^2$; anduint32
datasets ofimsCoord
andintensity
for the point cloud, with one entry for each point in the cloud.
Thus, through this ToffeeWriter, we can generate a file with lossless compression that can represent a SWATH-MS
mzML
file with a ten-fold decrease in the file size - making it on par with the file size of the original vendor format.Public Functions
-
explicit ToffeeWriter(const std::string &h5FilePath, IntrinsicMassSpacingType imsType)¶
- Parameters
h5FilePath – the output path of the toffee file. If a file already exists at that path, it will be over-written
-
void addPointCloud(const ScanDataPointCloud &pcl) const¶
Add a point cloud for a given window to the toffee file. If a window with the same name already exists in the file, it will throw an exception
- Parameters
pcl – the point cloud to be added
-
void addHeaderMetadata(const std::string &header) const¶
Add a header string to the HDF5 file. In normal circumstances, this will be a PSI-formatted XML string matching the header for mzML files
Public Static Functions
-
static std::string ms2Name(const size_t &idx)¶
Name of the MS2 HDF5 group corresponding to the specified index.
-
static std::string imsTypeAsStr(const IntrinsicMassSpacingType &imsType)¶
Name of the IMS Type in string format.
-
static IntrinsicMassSpacingType imsTypeFromStr(const std::string &imsTypeAsStr)¶
Convert the name of the IMS Type in string format to the enum.
Public Static Attributes
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_MAJOR_VERSION = "FILE_FORMAT_MAJOR_VERSION"
Name of the file major version HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_MINOR_VERSION = "FILE_FORMAT_MINOR_VERSION"
Name of the file minor version HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_LIBRARY_VERSION = "CREATED_BY_LIBRARY_VERSION"
Name of the softare version used to create the file HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_EXPERIMENT_METADATA = "metadataXML"
Name of the run’s header HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_IMS_TYPE = "IMSType"
Name of a window’s Intrinsic Mass Spacing
type
HDF5 attribute Valid values are: TOF, Orbitrap
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_IMS_ALPHA = "IMSAlpha"
Name of a window’s Intrinsic Mass Spacing
alpha
HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_IMS_BETA = "IMSBeta"
Name of a window’s Intrinsic Mass Spacing
beta
HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_IMS_GAMMA = "IMSGamma"
Name of a window’s Intrinsic Mass Spacing
gamma
HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_WINDOW_BOUNDS_LOWER = "precursorLower"
Name of a window’s mass over charge lower bounds HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_WINDOW_BOUNDS_UPPER = "precursorUpper"
Name of a window’s center mass over charge HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_WINDOW_CENTER = "precursorCenter"
Name of a window’s mass over charge upper bounds HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_SCAN_CYCLE_TIME = "scanCycleTime"
Name of a window’s scan cycle time HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string ATTR_WINDOW_RETENTION_TIME_OFFSET = "firstScanRetentionTimeOffset"
Name of a window’s first retention time offset HDF5 attribute.
- static TOF_WIN_DLL_DECLSPEC const std::string DATASET_RETENTION_TIME_IDX = "retentionTimeIdx"
Name of a window’s retention time index HDF5 dataset.
- static TOF_WIN_DLL_DECLSPEC const std::string DATASET_IMS_ALPHA_PER_SCAN = "IMSAlphaPerScan"
Name of a window’s Intrinsic Mass Spacing
alpha per scan
HDF5 dataset.
- static TOF_WIN_DLL_DECLSPEC const std::string DATASET_IMS_BETA_PER_SCAN = "IMSBetaPerScan"
Name of a window’s Intrinsic Mass Spacing
beta per scan
HDF5 dataset.
- static TOF_WIN_DLL_DECLSPEC const std::string DATASET_MZ_IMS_IDX = "imsCoord"
Name of a window’s √(m/z) index HDF5 dataset.
- static TOF_WIN_DLL_DECLSPEC const std::string DATASET_INTENSITY = "intensity"
Name of a window’s intensity HDF5 dataset.
- static TOF_WIN_DLL_DECLSPEC const std::string MS1_NAME = std::string{"ms1"}
Name of the MS1 HDF5 group.
- static TOF_WIN_DLL_DECLSPEC const std::string MS2_PREFIX = std::string{"ms2-"}
Prefix for the MS2 HDF5 groups.
Accessing Data in a Toffee File¶
SwathRun¶
-
class SwathRun¶
A class that enables access to properties of a full toffee file.
Public Functions
-
explicit SwathRun(const std::string &fname)¶
- Parameters
fname – path to the toffee file of interest
-
inline std::string header() const¶
Extract the metadata header from the specified file. In usual circumstances one could expect this to look like a PSI-formatter XML description of the run
-
inline int formatMajorVersion() const¶
The file format major version.
-
inline int formatMinorVersion() const¶
The file format minor version.
-
inline IntrinsicMassSpacingType imsType() const¶
The type of mass analyzer Intrinsic Mass Spacing.
-
inline bool hasMS1Data() const¶
Return true if this toffee file contains data for the MS1 window.
-
inline std::vector<MS2WindowDescriptor> ms2Windows() const¶
Return a vector of descriptions of each the MS2 windows contained in the toffee file
-
std::map<double, std::string> mapPrecursorsToMS2Names(const std::vector<double> &precursorMzValues, double lowerMzOverlap, double upperMzOverlap) const¶
It is often useful to know which precursors map to specific MS2 windows in the toffee file. For instance, this can be useful for performance reasons when iterating through a spectral library
- Parameters
precursorMzValues – A vector of mass over charge values that correspond to the precursors that we wish to map to MS2 windows
lowerMzOverlap – It is usual to have MS2 windows overlap in an experiment, this allows us to exclude precursors with this offest from the lower boundary of an MS2 window
upperMzOverlap – It is usual to have MS2 windows overlap in an experiment, this allows us to exclude precursors with this offest from the upper boundary of an MS2 window
-
SwathMap loadSwathMap(const std::string &mapName, bool lossyAdjustIMSCoords = false) const¶
Load a SwathMap for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.This function is primarily for the python wrapping, when calling from C++, we recommend using
SwathRun::loadSwathMapPtr
- Parameters
mapName – name of the window of interest (must match the naming convention to
ToffeeWriter
)lossyAdjustIMSCoords – if true, the IMS coords are translated from the scan specific alpha and beta to the window alpha and beta in a way that minimizes the ppm error. Note, this will minimize the mass error of the transform, but this is a lossy operation that cannot be undone!
-
std::shared_ptr<SwathMap> loadSwathMapPtr(const std::string &mapName, bool lossyAdjustIMSCoords = false) const¶
Load a SwathMap for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.- Parameters
mapName – name of the window of interest (must match the naming convention to
ToffeeWriter
)lossyAdjustIMSCoords – if true, the IMS coords are translated from the scan specific alpha and beta to the window alpha and beta in a way that minimizes the ppm error. Note, this will minimize the mass error of the transform, but this is a lossy operation that cannot be undone!
-
SwathMapInMemorySpectrumAccess loadSwathMapInMemorySpectrumAccess(const std::string &mapName) const¶
Load a SwathMapInMemorySpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.This function is primarily for the python wrapping, when calling from C++, we recommend using
SwathRun::loadSwathMapPtr
- Parameters
mapName – name of the window of interest (must match the naming convetion to
ToffeeWriter
)
-
std::shared_ptr<SwathMapInMemorySpectrumAccess> loadSwathMapInMemorySpectrumAccessPtr(const std::string &mapName) const¶
Load a SwathMapInMemorySpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.- Parameters
mapName – name of the window of interest (must match the naming convetion to
ToffeeWriter
)
-
SwathMapSpectrumAccess loadSwathMapSpectrumAccess(const std::string &mapName) const¶
Load a SwathMapSpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.This function is primarily for the python wrapping, when calling from C++, we recommend using
SwathRun::loadSwathMapPtr
- Parameters
mapName – name of the window of interest (must match the naming convetion to
ToffeeWriter
)
-
std::shared_ptr<SwathMapSpectrumAccess> loadSwathMapSpectrumAccessPtr(const std::string &mapName) const¶
Load a SwathMapSpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.- Parameters
mapName – name of the window of interest (must match the naming convetion to
ToffeeWriter
)
-
explicit SwathRun(const std::string &fname)¶
-
class MS2WindowDescriptor¶
A simple data class for describing an MS2 window.
Public Functions
-
inline bool operator==(const MS2WindowDescriptor &other) const¶
Windows are considered equal if and only if they have the same name, and upper and lower mass over charge ranges
Public Members
-
std::string name¶
The name of the window in the toffee file.
-
double lower¶
The lower bound of the range of precursor mass over charge values that are selected to be fragmented in this MS2 window
-
double upper¶
The upper bound of the range of precursor mass over charge values that are selected to be fragmented in this MS2 window
-
inline bool operator==(const MS2WindowDescriptor &other) const¶
SwathMap¶
-
class SwathMap¶
The SwathMap class is one of the key elements of the the
toffee
library. It allows access raw Time of Flight mass spectrometry data in near constant time, with relatively a small memory footprint.Public Types
-
using RTreeIndex = int32_t¶
Defines the type used to index the mass over charge and retention time values in the RTree. For search performance and memory optimisation, we use
int32_t
as our index type — if the resolution of the mass spectrometer means greater than 4e9 mass over charge values can be recorded using the intrinsic mass spacing method of toffee, then this will need to change
-
using RtImsCoord = boost::geometry::model::point<RTreeIndex, 2, boost::geometry::cs::cartesian>¶
A coordinate in the Point Cloud with retention time as the ‘x’ value and √(m/z) index as the ‘y’ value.
-
using RTreeIntensity = uint32_t¶
Defines the type used to record intensity values in the RTree. For memory optimisation, intensity values remain as
uint32_t
— if the sensitivity of the mass spectrometer means signals greater than 4e9 can be recorded, then this will need to change
-
using PCLPoint = std::pair<RtImsCoord, RTreeIntensity>¶
A point in the point cloud, with the signal intesity as the ‘value’.
-
using RStar = boost::geometry::index::rstar<32>¶
The integer value passed here defines the number of points per leaf node of the RTree. It is selected based on a balance between search performance and memory. Using 4 is analogous to a quad-tree.
-
using RTree = boost::geometry::index::rtree<PCLPoint, RStar>¶
A spatial search tree from boost that can be used for high performance spatial searching of the data contained within a SwathMap
-
using Box = boost::geometry::model::box<RtImsCoord>¶
A type used for defining axis aligned box queries of the RTree.
-
using Segment = boost::geometry::model::segment<RtImsCoord>¶
A type used for defining segment queries of the RTree.
Public Functions
Primary constructor of a SwathMap. In general operation, it is unlikely that a user of the library will need to call this directly. Instead, prefer functions on
SwathRun
that load in windows.- Parameters
name – name of the MS selection window that is being represented by this data. This must take the form of ToffeeWriter::MS1_NAME or ToffeeWriter::ms2Name
lower – lower (m/z) bound of the MS selection window
center – center (m/z) of the MS selection window
upper – upper (m/z) bound of the MS selection window
isMS1 – if true, this SwathMap represents data from the MS1 precursor ions, otherwise, the data is from MS2 fragment ions
scanCycleTime – the time (in seconds) between each scan for this window
firstScanRetentionTimeOffset – the time (in seconds) from the start of the experiment to the first scan for this window
numberOfScans – the number of entries in the retention time vector
mzTransformer – transforms to and from the mass over charge world and IMS index space
points – A vector of toffee::SwathMap::PCLPoint representing the signal intensities detected by the mass spectrometer
-
SwathMap()¶
-
inline std::string name() const¶
Return the name of the SwathMap as it was recorded in the original toffee file
-
inline bool isMS1() const¶
Return true if this SwathMap represents data from the MS1 precursor ions, otherwise false if the data is from MS2 fragment ions
-
inline double precursorLowerMz() const¶
Lower √(m/z) bound of the MS selection window.
-
inline double precursorCenterMz() const¶
Center √(m/z) of the MS selection window.
-
inline double precursorUpperMz() const¶
Upper √(m/z) bound of the MS selection window.
-
inline double scanCycleTime() const¶
The time (in seconds) between each scan for this window. In a typical DIA workflow, we anticipate that this is constant for all windows in a SwathRun
-
inline double firstScanRetentionTimeOffset() const¶
The time (in seconds) from the start of the experiment to the first scan for this window.
-
inline const Eigen::VectorXd &retentionTime() const¶
A vector (of size
N
) retention times whereN
is the number of scans performed, or spectra, for this window
-
inline ptrdiff_t numberOfSpectra() const¶
The number of scans performed, or spectra, for this window.
-
inline const Eigen::VectorXd &massOverCharge() const¶
A vector (of size
M
) mass over charge (m/z) values, whereM
represents all possible (m/z) values that could be detected by the mass spectrometer in its current configuration
-
inline std::shared_ptr<const MassOverChargeTransformer> getMzTransformer() const¶
Get a transformer that can report the bounds of the mass over charge for this SwathMap, as well as perform conversions between (m/z) and √(m/z) index
-
Chromatogram2DDense extractedIonChromatogram(const IMassOverChargeRange &mzRange) const¶
Extract a dense two-dimensional chromatogram from the SwathMap based on the specified mass over change range. In this instance, the returned chromatogram will span the full retention time range — if you only require a subset of retention times, see toffee::SwathMap::filteredExtractedIonChromatogram
Warning
If the mass range is large, you can expect a large memory requirement for this function. In this case, you may wish to use toffee::SwathMap::extractedIonChromatogramSparse
- Parameters
mzRange – specifies the upper and lower bounds of mass over charge for querying the SwathMap
-
Chromatogram2DSparse extractedIonChromatogramSparse(const IMassOverChargeRange &mzRange) const¶
Extract a sparse two-dimensional chromatogram from the SwathMap based on the specified mass over change range. In this instance, the returned chromatogram will span the full retention time range — if you only require a subset of retention times, see toffee::SwathMap::filteredExtractedIonChromatogramSparse
- Parameters
mzRange – specifies the upper and lower bounds of mass over charge for querying the SwathMap
-
Chromatogram2DDense filteredExtractedIonChromatogram(const IMassOverChargeRange &mzRange, const IRetentionTimeRange &rtRange) const¶
Extract a dense two-dimensional chromatogram from the SwathMap based on the specified mass over change and retention time ranges.
Warning
If the ranges are large, you can expect a large memory requirement for this function. In this case, you may wish to use toffee::SwathMap::filteredExtractedIonChromatogramSparse
-
Chromatogram2DSparse filteredExtractedIonChromatogramSparse(const IMassOverChargeRange &mzRange, const IRetentionTimeRange &rtRange) const¶
Extract a sparse two-dimensional chromatogram from the SwathMap based on the specified mass over change and retention time ranges.
-
using RTreeIndex = int32_t¶
-
class MassOverChargeTransformer¶
A simple class for converting between (m/z) space and √(m/z) index space. Furthermore, it is typically used to define the full range of allowable mass over charge of a SwathMap. Importantly, this class and its concrete children allow a user to convert between mass over charge space and √(m/z) index space using the Intrinsic Mass Spacing parameters
alpha
andbeta
where:\[ \frac{m}{z} = \left( \alpha_{IMS} i_{\sqrt{m/z}} + \beta_{trunc} \right)^2 \]where \(\alpha_{IMS}\) is the IMS value, \(i_{\sqrt{m/z}}\) is the unsigned integer index of the \(m/z\) value in \(\sqrt{m/z}\) space, and \(\beta_{trunc}\) is to control for any truncation error that may be introduced from the conversion process.
The principle use of the ranges is to enable multiple different ways of defining a range, and then converting between world and index space. In the instance of converting from world to index, the range classes will ‘snap’ to the closest index. The upper and lower bounds are inclusive, meaning that a range with lowerIMSCoord, upperIMSCoord = 154, 154 would return data with a single row (the data contained along index 154). Similarly, a range with lowerIMSCoord, upperIMSCoord = 153, 155 would return data with three rows, centered at imsCoord 154.
Subclassed by toffee::MassOverChargeTransformerOrbitrap, toffee::MassOverChargeTransformerTOF
Public Functions
-
MassOverChargeTransformer() = default¶
-
virtual ~MassOverChargeTransformer() = default¶
-
virtual double lowerMzInWorldCoords() const = 0¶
The lower bound of mass over charge range in “normal” space.
-
virtual double upperMzInWorldCoords() const = 0¶
The upper bound of mass over charge range in “normal” space.
-
virtual int32_t lowerMzInIMSCoords() const = 0¶
The lower bound of mass over charge range in √(m/z) index space.
-
virtual int32_t upperMzInIMSCoords() const = 0¶
The upper bound of mass over charge range in √(m/z) index space.
-
inline IntrinsicMassSpacingType imsType() const¶
The Intrinsic Mass Spacing type See toffee::IMassOverChargeRange for more details
-
inline double imsAlpha() const¶
The Intrinsic Mass Spacing
alpha
parameter. See toffee::IMassOverChargeRange for more details
-
inline double imsBeta() const¶
The Intrinsic Mass Spacing
beta
parameter See toffee::IMassOverChargeRange for more details
-
inline int32_t imsGamma() const¶
The Intrinsic Mass Spacing
gamma
parameter See toffee::IMassOverChargeRange for more details
-
int32_t toIMSCoords(double mz) const¶
Convert the specified mass over charge into Intrinsic Mass Spacing index space
- Parameters
mz – mass over charge value to be converted
-
virtual int32_t toIMSCoordsPreGammaAdjustment(double mz) const = 0¶
Convert the specified mass over charge into Intrinsic Mass Spacing index space before applying IMS gamma. This function allows the coord to be negative, and should only every be used when first creating the the point cloud
- Parameters
mz – mass over charge value to be converted
-
virtual double toWorldCoords(int32_t imsIdx) const = 0¶
Convert the specified Intrinsic Mass Spacing index into mass over charge space
- Parameters
imsCoord – IMS index to be converted
-
MassOverChargeTransformer() = default¶
-
class MassOverChargeTransformerTOF : public toffee::MassOverChargeTransformer¶
A simple class for converting between (m/z) space and √(m/z) index space. Furthermore, it is typically used to define the full range of allowable mass over charge of a SwathMap.
See toffee::IMassOverChargeRange for how this conversion takes place
Public Functions
-
explicit MassOverChargeTransformerTOF()¶
-
explicit MassOverChargeTransformerTOF(double imsAlpha, double imsBeta, int32_t imsGamma)¶
- Parameters
imsAlpha – the Intrinsic Mass Spacing
alpha
parameterimsBeta – the Intrinsic Mass Spacing
beta
parameter
-
explicit MassOverChargeTransformerTOF(double imsAlpha, double imsBeta, int32_t imsGamma, int32_t lowerMzInIMSCoords, int32_t upperMzInIMSCoords)¶
- Parameters
imsAlpha – the Intrinsic Mass Spacing
alpha
parameterimsBeta – the Intrinsic Mass Spacing
beta
parameterlowerMzInIMSCoords – the lower bound of mass over charge range in IMS index space
upperMzInIMSCoords – the upper bound of mass over charge range in IMS index space
-
inline virtual double lowerMzInWorldCoords() const final¶
The lower bound of mass over charge range in “normal” space.
-
inline virtual double upperMzInWorldCoords() const final¶
The upper bound of mass over charge range in “normal” space.
-
inline virtual int32_t lowerMzInIMSCoords() const final¶
The lower bound of mass over charge range in √(m/z) index space.
-
inline virtual int32_t upperMzInIMSCoords() const final¶
The upper bound of mass over charge range in √(m/z) index space.
-
virtual int32_t toIMSCoordsPreGammaAdjustment(double mz) const final¶
Convert the specified mass over charge into Intrinsic Mass Spacing index space before applying IMS gamma. This function allows the coord to be negative, and should only every be used when first creating the the point cloud
- Parameters
mz – mass over charge value to be converted
-
virtual double toWorldCoords(int32_t imsCoord) const final¶
Convert the specified Intrinsic Mass Spacing index into mass over charge space
- Parameters
imsCoord – IMS index to be converted
-
explicit MassOverChargeTransformerTOF()¶
Spectra & Chromatograms¶
-
class Spectrum1D¶
A simple data holding class primarily used to return results from querying a SwathMap.
Public Functions
-
inline Spectrum1D(const Eigen::VectorXd &mz)¶
Initialise the Spectrum1D with a given set of mass over charge values and setting a correspondingly sized intensity vector to zeroes
- Parameters
mz – a vector (of size
M
) mass over charge values captured by this spectrum
-
inline Spectrum1D(const std::vector<double> &mz, const std::vector<int> &intens)¶
Initialise the Spectrum1D with a given set of mass over charge values and a correspondingly sized intensity vector
- Parameters
mz – a vector (of size
M
) mass over charge values captured by this spectrumintens – a vector (of size
M
) intensity values captured by this spectrum
Public Members
-
Eigen::VectorXd massOverCharge¶
An Eigen::VectorXd (of size
M
) mass over charge values captured by this spectrum
-
Eigen::VectorXi intensities¶
An Eigen::VectorXi (of size
M
) intensity values captured by this spectrum
-
inline Spectrum1D(const Eigen::VectorXd &mz)¶
-
class Chromatogram1D¶
A simple data holding class primarily used to return results from querying a SwathMap.
Public Functions
-
inline Chromatogram1D(const Eigen::VectorXd &rt)¶
Initialise the Chromatogram1D with a given set of retention times and setting a correspondingly sized intensity vector to zeroes
- Parameters
rt – a vector (of size
N
) retention times captured by this chromatogram
Public Members
-
Eigen::VectorXd retentionTime¶
An Eigen::VectorXd (of size
N
) retention times captured by this chromatogram
-
Eigen::VectorXi intensities¶
An Eigen::VectorXi (of size
N
) intensity values captured by this chromatogram
-
inline Chromatogram1D(const Eigen::VectorXd &rt)¶
-
class Chromatogram2DSparse¶
A simple data holding class primarily used to return results from querying a SwathMap. The sparse moiniker refers to the fact that zero intensities are excluded from the intensity matrix to improve performance
Public Types
-
using Value = int¶
The type used for representing the intensity values.
-
using Triplet = Eigen::Triplet<Value>¶
A (retention time index, √(m/z) index, intensity) triple that corresponds to a point in the point cloud. These are primarily used for loading the toffee::Chromatogram2DSparse::Martix
-
using Matrix = Eigen::SparseMatrix<Value>¶
An Eigen::SparseMatrix in column major layout that is used to represent the intensities
Public Functions
-
inline Chromatogram2DSparse(size_t retentionTimeSize, size_t massOverChargeSize, const std::vector<Triplet> &triplets)¶
- Parameters
retentionTimeSize – the number of retention times captured by this chromatogram
massOverChargeSize – the number of mass over charge values captured by this chromatogram
triplets – a vector of triplets that represent points in the point cloud that are used to initialise the Chromatogram2DSparse::intensities sparse matrix
-
inline Chromatogram2DSparse(Chromatogram2DSparse &&other) noexcept¶
-
inline Chromatogram2DSparse &operator=(Chromatogram2DSparse &&other) noexcept¶
Public Members
-
Eigen::VectorXd retentionTime¶
An Eigen::VectorXd (of size
N
) retention times captured by this chromatogram
-
Eigen::VectorXd massOverCharge¶
An Eigen::VectorXd (of size
M
) mass over charge values captured by this chromatogram
-
Matrix intensities¶
An Eigen::SparseMatrix (of shape
N
xM
) in column major layout, of intensity values captured by this chromatogram
-
using Value = int¶
-
class Chromatogram2DDense¶
A simple data holding class primarily used to return results from querying a SwathMap. The dense moniker refers to the fact that zero intensities are included in the intensity matrix and may result in a large memory footprint.
Warning
Careless useage of this class may result in very large memory requirements of the calling function. In these cases, Chromatogram2DSparse may be useful.
Public Functions
-
inline explicit Chromatogram2DDense(const Chromatogram2DSparse &chromatogram2DSparse)¶
Convert a two-dimensional chromatogram with sparse intensities into a two-dimensional chromatogram with intensities in a dense matrix format.
- Parameters
chromatogram2DSparse – the sparse representation of the chromatogram
Public Members
-
Eigen::VectorXd retentionTime¶
An Eigen::VectorXd (of size
N
) retention times captured by this chromatogram
-
Eigen::VectorXd massOverCharge¶
An Eigen::VectorXd (of size
M
) mass over charge values captured by this chromatogram
-
Eigen::MatrixXi intensities¶
An Eigen::MatrixXi (of shape
N
xM
) in column major layout, of intensity values captured by this chromatogram
-
inline explicit Chromatogram2DDense(const Chromatogram2DSparse &chromatogram2DSparse)¶
MassOverChargeRange¶
-
class IMassOverChargeRange¶
Subclassed by toffee::MassOverChargeRange, toffee::MassOverChargeRangeIMSCoords, toffee::MassOverChargeRangeWithPixelHalfWidth, toffee::MassOverChargeRangeWithPPMFullWidth
Public Functions
-
virtual ~IMassOverChargeRange() = default¶
Using the Intrinsic Mass Spacing parameters
alpha
andbeta
, convert the lower bound of the concrete child class into its corresponding √(m/z) index
Using the Intrinsic Mass Spacing parameters
alpha
andbeta
, convert the upper bound of the concrete child class into its corresponding √(m/z) index
-
virtual ~IMassOverChargeRange() = default¶
Warning
doxygenclass: Cannot find class “toffee::MassOverChargeRangeWithPixelWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/
Warning
doxygenclass: Cannot find class “toffee::MassOverChargeRangeWithPPMWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/
-
class MassOverChargeRange : public toffee::IMassOverChargeRange¶
An implementation of IMassOverChargeRange that allows the user to explicitly set the upper and lower bounds of the mass over charge range
Public Functions
-
explicit MassOverChargeRange(double lowerMz, double upperMz)¶
- Parameters
lowerMz – the lower bound of mass over charge for this range
upperMz – the upper bound of mass over charge for this range
-
inline double massOverChargeLower() const¶
The lower bound of mass over charge for this range.
-
inline double massOverChargeUpper() const¶
The upper bound of mass over charge for this range.
-
explicit MassOverChargeRange(double lowerMz, double upperMz)¶
-
class MassOverChargeRangeIMSCoords : public toffee::IMassOverChargeRange¶
An implementation of IMassOverChargeRange that allows the user to explicitly set the upper and lower bounds of the mass over charge range in terms of the √(m/z) index space
Public Functions
-
explicit MassOverChargeRangeIMSCoords(int32_t lowerIMSCoord, int32_t upperIMSCoord)¶
- Parameters
lowerIMSCoord – the lower bound of mass over charge for this range in √(m/z) index space
upperIMSCoord – the upper bound of mass over charge for this range in √(m/z) index space
-
explicit MassOverChargeRangeIMSCoords(int32_t lowerIMSCoord, int32_t upperIMSCoord)¶
RetentionTimeRange¶
-
class IRetentionTimeRange¶
An interface class that allows users to define retention time ranges using various methods. These ranges are used by toffee::SwathMap when querying the intensity data.
The principle use of the ranges is to enable multiple different ways of defining a range, and then converting between world and index space. In the instance of converting from world to index, the range classes will ‘snap’ to the closest index. The upper and lower bounds are inclusive, meaning that a range with lowerRTIdx, upperRTIdx = 154, 154 would return data with a single row (the data contained along index 154). Similarly, a range with lowerRTIdx, upperRTIdx = 153, 155 would return data with three rows, centered at retentionTimeIdx 154.
Subclassed by toffee::RetentionTimeRange, toffee::RetentionTimeRangeWithPixelHalfWidth
Public Functions
-
virtual ~IRetentionTimeRange() = default¶
-
virtual int32_t lowerRTIdx(const Eigen::VectorXd &retentionTimes) const = 0¶
Find the index in the specified retention time vector that contains the value that is closest to the lower bound specified in the construction of a concrete child class
Warning
This function assumes, but does not check, that the retention time vector is sorted in ascending order
- Parameters
retentionTimes – a vector of retention times in ascending order
-
virtual int32_t upperRTIdx(const Eigen::VectorXd &retentionTimes) const = 0¶
Find the index in the specified retention time vector that contains the value that is closest to the upper bound specified in the construction of a concrete child class
Warning
This function assumes, but does not check, that the retention time vector is sorted in ascending order
- Parameters
retentionTimes – a vector of retention times in ascending order
Public Static Functions
-
static int32_t toRTIdx(double retentionTime, const Eigen::VectorXd &retentionTimes)¶
Find the index in the specified retention time vector that contains the value that is closest to the specified retention time we are searching for. If there are two values equidistant, arbitrarily preference the higher index.
Warning
This function assumes, but does not check, that the retention time vector is sorted in ascending order
- Parameters
retentionTime – the retention time value that we wish to search for
retentionTimes – a vector of retention times in ascending order
-
virtual ~IRetentionTimeRange() = default¶
Warning
doxygenclass: Cannot find class “toffee::RetentionTimeRangeWithPixelWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/
-
class RetentionTimeRange : public toffee::IRetentionTimeRange¶
An implementation of IRetentionTimeRange that allows the user to explicitly set the upper and lower bounds of the retention time range
Public Functions
-
RetentionTimeRange(double lowerRetentionTime, double upperRetentionTime)¶
- Parameters
lowerRetentionTime – the lower bound of retention time for this range
upperRetentionTime – the upper bound of retention time for this range
-
inline double retentionTimeLower() const¶
The lower bound of retention time for this range.
-
inline double retentionTimeUpper() const¶
The upper bound of retention time for this range.
-
virtual int32_t lowerRTIdx(const Eigen::VectorXd &retentionTimes) const final¶
-
virtual int32_t upperRTIdx(const Eigen::VectorXd &retentionTimes) const final¶
-
RetentionTimeRange(double lowerRetentionTime, double upperRetentionTime)¶
Versioning¶
-
class Version¶
Public Static Functions
-
static inline int combineFileFormatVersions(int majorVersion, int minorVersion)¶
A helper function to combine major and minor version numbers into a form that can be easily compared
Public Static Attributes
- static TOF_WIN_DLL_DECLSPEC const std::string LIBRARY_VERSION = "dev"
The version of the library.
- static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_MAJOR_VERSION_CURRENT = 1
The latest major version of the
toffee
file format.
- static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_MINOR_VERSION_CURRENT = 2
The latest minor version of the
toffee
file format.
- static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_MAJOR_VERSION_COMPATIBILITY = 0
The version of the major
toffee
file that we are backwards compatible with
- static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_MINOR_VERSION_COMPATIBILITY = 2
The version of the minor
toffee
file that we are backwards compatible with
- static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_NUM_ALLOWED_MINOR_VERSIONS = 1000
We do not support minor versions greater than or equal to this number.
-
static inline int combineFileFormatVersions(int majorVersion, int minorVersion)¶