Toffee C++ API¶
Contents
Creating a Toffee File¶
The following API documentation gives a flavour for how you can create a
toffee
file. However, it is best explained by way of example, such as the
following python code in the unit testing framework. As mentioned elsewhere,
the python and C++ APIs are laregly equivalent.
class DummyToffeeFile():
def __init__(
self,
name,
num_rt_points=1626,
scan_cycle_time=3.32,
ms1_rt_offset=0.17,
ms2_rt_offset=0.506,
num_mz_points=1000,
ms2_name=None,
ims_type=toffee.IntrinsicMassSpacingType.TOF,
fraction_not_zero=0.01,
):
self.name = name
self.hash = sum(ord(ch) for ch in name)
self.seed = self.hash % ((np.iinfo(np.int32).max + 1) * 2)
logger.info('%s: hash=%s, seed=%s', self.name, self.hash, self.seed)
np.random.seed(self.seed)
self.header = self._header(ims_type)
# create retention time vectors for each window
self.scan_cycle_time = scan_cycle_time
self.ms1_rt_offset = ms1_rt_offset
self.ms2_rt_offset = ms2_rt_offset
self.ms1_rt_vector = np.arange(num_rt_points) * self.scan_cycle_time + self.ms1_rt_offset
self.ms2_rt_vector = np.arange(num_rt_points) * self.scan_cycle_time + self.ms2_rt_offset
# set the mass over charge bounds of the data
lower = 90
upper = 2100
(
self.ims_alpha,
self.ims_beta,
self.ims_gamma,
self.ims_coord_vector,
self.mz_vector,
) = self.create_mz_vector(ims_type, lower, upper, num_mz_points)
# we're going to create 1x MS1 window and 1x MS2 window
self.ms1_name = toffee.ToffeeWriter.MS1_NAME
if ms2_name is None:
ms2_name = toffee.ToffeeWriter.ms2Name(41)
self.ms2_name = ms2_name
# window boundaries match ProCan90-M06-07.tof/ms2-041
self.window_lower, self.window_center, self.window_upper = (608.5, 611.5, 614.5)
# create a dense matrix of fake data
ms1_mz, ms1_rt = np.meshgrid(self.mz_vector, self.ms1_rt_vector)
self.expected_intensities_ms1 = (ms1_rt * ms1_mz).astype(np.uint32)
ms2_mz, ms2_rt = np.meshgrid(self.mz_vector, self.ms2_rt_vector)
self.expected_intensities_ms2 = (ms2_rt * ms2_mz).astype(np.uint32)
# generate our toffee writer so we can add windows to it
self.tof_fname = name + '.tof'
self.ims_type = ims_type
writer = toffee.ToffeeWriter(self.tof_fname, self.ims_type)
writer.addHeaderMetadata(self.header)
# loop through the two windows, modify their intensities by setting
# a random seclectin to zero, convert this into a point cloud and
# then add it to the toffee writer
self.scan_data = {}
for map_name, rt_vector, expected_intensities in zip(
[self.ms1_name, self.ms2_name],
[self.ms1_rt_vector, self.ms2_rt_vector],
[self.expected_intensities_ms1, self.expected_intensities_ms2],
):
# create the raw data container
scan_data = toffee.RawSwathScanData(map_name, ims_type)
if map_name == self.ms2_name:
scan_data.windowLower = self.window_lower
scan_data.windowCenter = self.window_center
scan_data.windowUpper = self.window_upper
else:
scan_data.windowLower = -1
scan_data.windowCenter = -1
scan_data.windowUpper = -1
# slice the data as if we were a mass spectrometer creating 'scans'
for r, rt_value in enumerate(rt_vector):
intensity_vector = expected_intensities[r, :]
# add some zeros by generating a mask and assigning a random
# fraction to be True - then set anything in the intensity
# vector, masked by True, to be zero. In one case, set all
# intensities to be zero (mimicking a scan with no data)
if r % 10 == 0:
tmp_fraction_not_zero = 0.0
else:
tmp_fraction_not_zero = fraction_not_zero
mask = np.random.uniform(0, 1, size=intensity_vector.shape)
mask[mask < tmp_fraction_not_zero] = 0
mask[mask != 0] = 1
mask = mask.astype(bool)
intensity_vector[mask] = 0
# double check the masking worked as expected
if (~mask).any():
assert (intensity_vector > 0).any()
mz = self.mz_vector[~mask]
intens = intensity_vector[~mask]
# add to the scan
scan_data.addScan(rt_value, mz, intens)
# create our point cloud
pcl = scan_data.toPointCloud()
# save scan data for later
self.scan_data[map_name] = scan_data
# assert that the point cloud properties were set correctly
assert pcl.name == map_name
assert pcl.imsType == ims_type
assert pcl.windowLower == scan_data.windowLower
assert pcl.windowCenter == scan_data.windowCenter
assert pcl.windowUpper == scan_data.windowUpper
# assert that the retention time was correctly calculated
np.testing.assert_allclose(pcl.scanCycleTime, self.scan_cycle_time)
if map_name == self.ms2_name:
np.testing.assert_allclose(pcl.firstScanRetentionTimeOffset, self.ms2_rt_offset)
else:
np.testing.assert_allclose(pcl.firstScanRetentionTimeOffset, self.ms1_rt_offset)
ims_props = pcl.imsProperties
assert len(ims_props.sliceIndex) == num_rt_points, \
'{} {}\n{}'.format(len(ims_props.sliceIndex), num_rt_points, ims_props.sliceIndex)
# assert that the Intrinsic Mass Spacing parameters were calculated
# correctly
np.testing.assert_allclose(ims_props.medianAlpha, self.ims_alpha)
np.testing.assert_allclose(ims_props.medianBeta, self.ims_beta)
np.testing.assert_allclose(ims_props.gamma, self.ims_gamma)
# add to the file
writer.addPointCloud(pcl)
@classmethod
def create_mz_vector(cls, ims_type, lower, upper, num_mz_points):
"""
Create a m/z vector that respects the IMS spacings for a Sciex (TOF)
type instrument
"""
if ims_type == toffee.IntrinsicMassSpacingType.TOF:
return cls._create_mz_vector_tof(lower, upper, num_mz_points)
elif ims_type == toffee.IntrinsicMassSpacingType.ORBITRAP:
return cls._create_mz_vector_orbitrap(lower, upper, num_mz_points)
raise ValueError('Upsupported IMS type: {}'.format(ims_type))
@classmethod
def _create_mz_vector_tof(cls, lower, upper, num_mz_points):
"""
Create a m/z vector that respects the IMS spacings for a Sciex (TOF)
type instrument
"""
sqrt_lower = np.sqrt(lower)
sqrt_upper = np.sqrt(upper)
# and use these to create dummy Intrinsic Mass Spacing properties
ims_alpha = (sqrt_upper - sqrt_lower) / num_mz_points
ims_gamma = int(np.round(sqrt_lower / ims_alpha, decimals=0))
ims_beta = sqrt_lower - ims_gamma * ims_alpha
if ims_beta > 0:
ims_gamma += 1
ims_beta -= ims_alpha
# from these create an mass over charge vector with known properties
ims_coord = np.arange(num_mz_points + 1).astype(int)
mz = ((ims_coord + ims_gamma) * ims_alpha + ims_beta) ** 2
# add some padding to ims
ims_gamma -= 10
ims_coord += 10
return ims_alpha, ims_beta, ims_gamma, ims_coord, mz
@classmethod
def _create_mz_vector_orbitrap(cls, lower, upper, num_mz_points):
"""
Create a m/z vector that respects the IMS spacings for a Thermo (Orbitrap)
type instrument
"""
inverse_sqrt_lower = 1 / np.sqrt(lower)
inverse_sqrt_upper = 1 / np.sqrt(upper)
# and use these to create dummy Intrinsic Mass Spacing properties
ims_alpha = (inverse_sqrt_upper - inverse_sqrt_lower) / num_mz_points
ims_gamma = int(np.round(inverse_sqrt_lower / ims_alpha, decimals=0))
ims_beta = inverse_sqrt_lower - ims_gamma * ims_alpha
if ims_beta > 0:
ims_gamma -= 1
ims_beta += ims_alpha
# from these create an mass over charge vector with known properties
ims_coord = np.arange(num_mz_points + 1).astype(int)
mz = ((ims_coord + ims_gamma) * ims_alpha + ims_beta) ** -2
# add some padding to ims
ims_gamma -= 10
ims_coord += 10
return ims_alpha, ims_beta, ims_gamma, ims_coord, mz
@classmethod
def _header(cls, ims_type):
if ims_type == toffee.IntrinsicMassSpacingType.TOF:
header_fname = 'Header-TOF.mzML'
elif ims_type == toffee.IntrinsicMassSpacingType.ORBITRAP:
header_fname = 'Header-Orbitrap.mzML'
else:
raise ValueError('Upsupported IMS type: {}'.format(ims_type))
curdir = os.path.dirname(os.path.abspath(__file__))
header_fname = os.path.join(curdir, header_fname)
with open(header_fname, 'r') as f:
return f.read()
ScanDataPointCloud¶
-
class
ScanDataPointCloud
¶ The data from a single MS1 or MS2 window collected as a point cloud of
X
points discovered fromN
scansPublic Members
-
std::string
name
¶ The name of the window. This should match one of the formats specified by toffee::ToffeeWriter
-
double
windowLower
¶ The lower (m/z) bound of the MS selection window - should be
-1
for MS1 scans
-
double
windowCenter
¶ The center (m/z) of the MS selection window - should be
-1
for MS1 scans
-
double
windowUpper
¶ The upper (m/z) bound of the MS selection window - should be
-1
for MS1 scans
-
double
scanCycleTime
¶ The time (in seconds) between each scan for this window. In a typical DIA workflow, we anticipate that this is constant for all windows in a SwathRun
-
double
firstScanRetentionTimeOffset
¶ The time (in seconds) from the start of the experiment to the first scan for this window.
-
IntrinsicMassSpacingType
imsType
¶ The Intrinsic Mass Spacing
type
(i.e. ‘TOF’, ‘Orbitrap’)
-
IMSProperties
imsProperties
¶ The Intrinsic Mass Spacing properties.
-
std::vector<uint32_t>
intensity
¶ The intensity value for each of the
X
points in the point cloud.
-
std::string
RawSwathScanData¶
-
class
RawSwathScanData
¶ The raw data from a single MS1 or MS2 window collected as a vector of
N
individual scans at discrete retention timesPublic Functions
-
RawSwathScanData
(const std::string &name, IntrinsicMassSpacingType imsType)¶ - Parameters
name
: the name of the window. This should match one of the formats specified by toffee::ToffeeWriter
-
void
addScan
(double rt, const std::vector<double> &mz, const std::vector<uint32_t> &intensity)¶ Add a scan to the raw data
- Parameters
rt
: the retention time of this scanmz
: the mass over charge data collected during this scanintensity
: the intensity data collected during this scan
The mass accuracy can be used as a debugging tool to keep track of the actual and converted mass over charge values during conversion to point cloud (index space) using the Intrinsic Mass Spacing.
- Parameters
if
: this is not null, then a debugging step will be taken during the conversion to a point cloud
-
ScanDataPointCloud
toPointCloud
() const¶ Convert the raw data into the point cloud format. Included in this is the conversion of mass over charge to √(m/z) index space using the Intrinsic Mass Spacing properties (c.f. toffee::IMassOverChargeRange)
Public Members
-
const std::string
name
¶ The name of the window. This should match one of the formats specified by toffee::ToffeeWriter
-
double
windowLower
¶ The lower (m/z) bound of the MS selection window - should be
-1
for MS1 scans
-
double
windowCenter
¶ The center (m/z) of the MS selection window - should be
-1
for MS1 scans
-
double
windowUpper
¶ The upper (m/z) bound of the MS selection window - should be
-1
for MS1 scans
-
IntrinsicMassSpacingType
imsType
¶ The type of mass analyzer in terms of the Intrinsic Mass Spacing.
-
size_t
numberOfScans
= 0¶ The number (
N
) of scans in this window.
-
std::vector<double>
retentionTime
¶ A vector (of size
N
) retention times.
-
std::vector<std::vector<double>>
mzData
¶ A vector (of size
N
) of vectors where the nested vector contains the mass over charge data collected during then-th
scan
-
std::vector<std::vector<uint32_t>>
intensityData
¶ A vector (of size
N
) of vectors where the nested vector contains the intensity data collected during then-th
scan
-
ToffeeWriter¶
-
class
ToffeeWriter
¶ If we consider the raw data collected within a given window across a full gradient during a SWATH-MS run, each data point represents:
Mass over charge The abscissa refers to the mass over charge ratio of the peptide ion
Retention time: The ordinate refers to the time at which the peptide ion is elutes from the Liquid Chromatography column
Intensity: The applicate is a count of the number of times a peptide ion is detected on the sensor
Through the Intrinsic Mass Spacing, mass over charge can be represented by an unsigned integer, as can intensity. Furthermore, the retention time for each data point can be considered as an index into an associated (but much smaller) vector of doubles. In this manner, we can save the raw data into a file format as 3 vectors of unsigned 32-bit integers with some associated metadata.
toffee
takes this exact approach, saving each SWATH-MS window as a group in an HDF5 file; where each group has:double
attributeslower
,center
, andupper
to describe the properties of the window;double
attributesscanCycleTime
($t_c$) andfirstScanRetentionTimeOffset
($t_0$) that describe the retention time vector for each window such that $t(i) = t_c i + t_0$ in seconds;a
uint32
dataset ofretentionTimeIdx
, of equal length as the retention time vector, that represents indices into theimsCoord
andintensity
between which the corresponding value of $t(i)$ is applied;double
attributesimsAlpha
(${IMS}$) andimsBeta
(${trunc}$) for the Intrinsic Mass Spacing m/z conversion such that ${m}{z} = ( {IMS} i_{{m/z}} + {trunc} )^2$; anduint32
datasets ofimsCoord
andintensity
for the point cloud, with one entry for each point in the cloud.
Thus, through this ToffeeWriter, we can generate a file with lossless compression that can represent a SWATH-MS
mzML
file with a ten-fold decrease in the file size - making it on par with the file size of the original vendor format.Public Functions
-
ToffeeWriter
(const std::string &h5FilePath, IntrinsicMassSpacingType imsType)¶ - Parameters
h5FilePath
: the output path of the toffee file. If a file already exists at that path, it will be over-written
-
void
addPointCloud
(const ScanDataPointCloud &pcl) const¶ Add a point cloud for a given window to the toffee file. If a window with the same name already exists in the file, it will throw an exception
- Parameters
pcl
: the point cloud to be added
-
void
addHeaderMetadata
(const std::string &header) const¶ Add a header string to the HDF5 file. In normal circumstances, this will be a PSI-formatted XML string matching the header for mzML files
Public Static Functions
-
std::string
ms2Name
(const size_t &idx)¶ Name of the MS2 HDF5 group corresponding to the specified index.
-
std::string
imsTypeAsStr
(const IntrinsicMassSpacingType &imsType)¶ Name of the IMS Type in string format.
-
IntrinsicMassSpacingType
imsTypeFromStr
(const std::string &imsTypeAsStr)¶ Convert the name of the IMS Type in string format to the enum.
Public Static Attributes
-
const std::string
ATTR_MAJOR_VERSION
= "FILE_FORMAT_MAJOR_VERSION"¶ Name of the file major version HDF5 attribute.
-
const std::string
ATTR_MINOR_VERSION
= "FILE_FORMAT_MINOR_VERSION"¶ Name of the file minor version HDF5 attribute.
-
const std::string
ATTR_LIBRARY_VERSION
= "CREATED_BY_LIBRARY_VERSION"¶ Name of the softare version used to create the file HDF5 attribute.
-
const std::string
ATTR_EXPERIMENT_METADATA
= "metadataXML"¶ Name of the run’s header HDF5 attribute.
-
const std::string
ATTR_IMS_TYPE
= "IMSType"¶ Name of a window’s Intrinsic Mass Spacing
type
HDF5 attribute Valid values are: TOF, Orbitrap
-
const std::string
ATTR_IMS_ALPHA
= "IMSAlpha"¶ Name of a window’s Intrinsic Mass Spacing
alpha
HDF5 attribute.
-
const std::string
ATTR_IMS_BETA
= "IMSBeta"¶ Name of a window’s Intrinsic Mass Spacing
beta
HDF5 attribute.
-
const std::string
ATTR_IMS_GAMMA
= "IMSGamma"¶ Name of a window’s Intrinsic Mass Spacing
gamma
HDF5 attribute.
-
const std::string
ATTR_WINDOW_BOUNDS_LOWER
= "precursorLower"¶ Name of a window’s mass over charge lower bounds HDF5 attribute.
-
const std::string
ATTR_WINDOW_BOUNDS_UPPER
= "precursorUpper"¶ Name of a window’s center mass over charge HDF5 attribute.
-
const std::string
ATTR_WINDOW_CENTER
= "precursorCenter"¶ Name of a window’s mass over charge upper bounds HDF5 attribute.
-
const std::string
ATTR_SCAN_CYCLE_TIME
= "scanCycleTime"¶ Name of a window’s scan cycle time HDF5 attribute.
-
const std::string
ATTR_WINDOW_RETENTION_TIME_OFFSET
= "firstScanRetentionTimeOffset"¶ Name of a window’s first retention time offset HDF5 attribute.
-
const std::string
DATASET_RETENTION_TIME_IDX
= "retentionTimeIdx"¶ Name of a window’s retention time index HDF5 dataset.
-
const std::string
DATASET_IMS_ALPHA_PER_SCAN
= "IMSAlphaPerScan"¶ Name of a window’s Intrinsic Mass Spacing
alpha per scan
HDF5 dataset.
-
const std::string
DATASET_IMS_BETA_PER_SCAN
= "IMSBetaPerScan"¶ Name of a window’s Intrinsic Mass Spacing
beta per scan
HDF5 dataset.
-
const std::string
DATASET_MZ_IMS_IDX
= "imsCoord"¶ Name of a window’s √(m/z) index HDF5 dataset.
-
const std::string
DATASET_INTENSITY
= "intensity"¶ Name of a window’s intensity HDF5 dataset.
-
const std::string
MS1_NAME
= std::string{"ms1"}¶ Name of the MS1 HDF5 group.
-
const std::string
MS2_PREFIX
= std::string{"ms2-"}¶ Prefix for the MS2 HDF5 groups.
Accessing Data in a Toffee File¶
SwathRun¶
-
class
SwathRun
¶ A class that enables access to properties of a full toffee file.
Public Functions
-
SwathRun
(const std::string &fname)¶ - Parameters
fname
: path to the toffee file of interest
-
std::string
header
() const¶ Extract the metadata header from the specified file. In usual circumstances one could expect this to look like a PSI-formatter XML description of the run
-
int
formatMajorVersion
() const¶ The file format major version.
-
int
formatMinorVersion
() const¶ The file format minor version.
-
IntrinsicMassSpacingType
imsType
() const¶ The type of mass analyzer Intrinsic Mass Spacing.
-
bool
hasMS1Data
() const¶ Return true if this toffee file contains data for the MS1 window.
-
std::vector<MS2WindowDescriptor>
ms2Windows
() const¶ Return a vector of descriptions of each the MS2 windows contained in the toffee file
-
std::map<double, std::string>
mapPrecursorsToMS2Names
(const std::vector<double> &precursorMzValues, double lowerMzOverlap, double upperMzOverlap) const¶ It is often useful to know which precursors map to specific MS2 windows in the toffee file. For instance, this can be useful for performance reasons when iterating through a spectral library
- Parameters
precursorMzValues
: A vector of mass over charge values that correspond to the precursors that we wish to map to MS2 windowslowerMzOverlap
: It is usual to have MS2 windows overlap in an experiment, this allows us to exclude precursors with this offest from the lower boundary of an MS2 windowupperMzOverlap
: It is usual to have MS2 windows overlap in an experiment, this allows us to exclude precursors with this offest from the upper boundary of an MS2 window
-
SwathMap
loadSwathMap
(const std::string &mapName, bool lossyAdjustIMSCoords = false) const¶ Load a SwathMap for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.This function is primarily for the python wrapping, when calling from C++, we recommend using
SwathRun::loadSwathMapPtr
- Parameters
mapName
: name of the window of interest (must match the naming convention toToffeeWriter
)lossyAdjustIMSCoords
: if true, the IMS coords are translated from the scan specific alpha and beta to the window alpha and beta in a way that minimizes the ppm error. Note, this will minimize the mass error of the transform, but this is a lossy operation that cannot be undone!
-
std::shared_ptr<SwathMap>
loadSwathMapPtr
(const std::string &mapName, bool lossyAdjustIMSCoords = false) const¶ Load a SwathMap for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.- Parameters
mapName
: name of the window of interest (must match the naming convention toToffeeWriter
)lossyAdjustIMSCoords
: if true, the IMS coords are translated from the scan specific alpha and beta to the window alpha and beta in a way that minimizes the ppm error. Note, this will minimize the mass error of the transform, but this is a lossy operation that cannot be undone!
-
SwathMapInMemorySpectrumAccess
loadSwathMapInMemorySpectrumAccess
(const std::string &mapName) const¶ Load a SwathMapInMemorySpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.This function is primarily for the python wrapping, when calling from C++, we recommend using
SwathRun::loadSwathMapPtr
- Parameters
mapName
: name of the window of interest (must match the naming convetion toToffeeWriter
)
-
std::shared_ptr<SwathMapInMemorySpectrumAccess>
loadSwathMapInMemorySpectrumAccessPtr
(const std::string &mapName) const¶ Load a SwathMapInMemorySpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.- Parameters
mapName
: name of the window of interest (must match the naming convetion toToffeeWriter
)
-
SwathMapSpectrumAccess
loadSwathMapSpectrumAccess
(const std::string &mapName) const¶ Load a SwathMapSpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.This function is primarily for the python wrapping, when calling from C++, we recommend using
SwathRun::loadSwathMapPtr
- Parameters
mapName
: name of the window of interest (must match the naming convetion toToffeeWriter
)
-
std::shared_ptr<SwathMapSpectrumAccess>
loadSwathMapSpectrumAccessPtr
(const std::string &mapName) const¶ Load a SwathMapSpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same
SwathRun
concurrently.- Parameters
mapName
: name of the window of interest (must match the naming convetion toToffeeWriter
)
-
-
class
MS2WindowDescriptor
¶ A simple data class for describing an MS2 window.
Public Functions
-
bool
operator==
(const MS2WindowDescriptor &other) const¶ Windows are considered equal if and only if they have the same name, and upper and lower mass over charge ranges
Public Members
-
std::string
name
¶ The name of the window in the toffee file.
-
double
lower
¶ The lower bound of the range of precursor mass over charge values that are selected to be fragmented in this MS2 window
-
double
upper
¶ The upper bound of the range of precursor mass over charge values that are selected to be fragmented in this MS2 window
-
bool
SwathMap¶
-
class
SwathMap
¶ The SwathMap class is one of the key elements of the the
toffee
library. It allows access raw Time of Flight mass spectrometry data in near constant time, with relatively a small memory footprint.Public Types
-
using
RTreeIndex
= int32_t¶ Defines the type used to index the mass over charge and retention time values in the RTree. For search performance and memory optimisation, we use
int32_t
as our index type if the resolution of the mass spectrometer means greater than 4e9 mass over charge values can be recorded using the intrinsic mass spacing method of toffee, then this will need to change
-
using
RtImsCoord
= boost::geometry::model::point<RTreeIndex, 2, boost::geometry::cs::cartesian>¶ A coordinate in the Point Cloud with retention time as the ‘x’ value and √(m/z) index as the ‘y’ value.
-
using
RTreeIntensity
= uint32_t¶ Defines the type used to record intensity values in the RTree. For memory optimisation, intensity values remain as
uint32_t
if the sensitivity of the mass spectrometer means signals greater than 4e9 can be recorded, then this will need to change
-
using
PCLPoint
= std::pair<RtImsCoord, RTreeIntensity>¶ A point in the point cloud, with the signal intesity as the ‘value’.
-
using
RStar
= boost::geometry::index::rstar<32>¶ The integer value passed here defines the number of points per leaf node of the RTree. It is selected based on a balance between search performance and memory. Using 4 is analogous to a quad-tree.
-
using
RTree
= boost::geometry::index::rtree<PCLPoint, RStar>¶ A spatial search tree from boost that can be used for high performance spatial searching of the data contained within a SwathMap
-
using
Box
= boost::geometry::model::box<RtImsCoord>¶ A type used for defining axis aligned box queries of the RTree.
-
using
Segment
= boost::geometry::model::segment<RtImsCoord>¶ A type used for defining segment queries of the RTree.
Public Functions
Primary constructor of a SwathMap. In general operation, it is unlikely that a user of the library will need to call this directly. Instead, prefer functions on
SwathRun
that load in windows.- Parameters
name
: name of the MS selection window that is being represented by this data. This must take the form of ToffeeWriter::MS1_NAME or ToffeeWriter::ms2Namelower
: lower (m/z) bound of the MS selection windowcenter
: center (m/z) of the MS selection windowupper
: upper (m/z) bound of the MS selection windowisMS1
: if true, this SwathMap represents data from the MS1 precursor ions, otherwise, the data is from MS2 fragment ionsscanCycleTime
: the time (in seconds) between each scan for this windowfirstScanRetentionTimeOffset
: the time (in seconds) from the start of the experiment to the first scan for this windownumberOfScans
: the number of entries in the retention time vectormzTransformer
: transforms to and from the mass over charge world and IMS index spacepoints
: A vector of toffee::SwathMap::PCLPoint representing the signal intensities detected by the mass spectrometer
-
SwathMap
()¶
-
std::string
name
() const¶ Return the name of the SwathMap as it was recorded in the original toffee file
-
bool
isMS1
() const¶ Return true if this SwathMap represents data from the MS1 precursor ions, otherwise false if the data is from MS2 fragment ions
-
double
precursorLowerMz
() const¶ Lower √(m/z) bound of the MS selection window.
-
double
precursorCenterMz
() const¶ Center √(m/z) of the MS selection window.
-
double
precursorUpperMz
() const¶ Upper √(m/z) bound of the MS selection window.
-
double
scanCycleTime
() const¶ The time (in seconds) between each scan for this window. In a typical DIA workflow, we anticipate that this is constant for all windows in a SwathRun
-
double
firstScanRetentionTimeOffset
() const¶ The time (in seconds) from the start of the experiment to the first scan for this window.
-
const Eigen::VectorXd &
retentionTime
() const¶ A vector (of size
N
) retention times whereN
is the number of scans performed, or spectra, for this window
-
ptrdiff_t
numberOfSpectra
() const¶ The number of scans performed, or spectra, for this window.
-
const Eigen::VectorXd &
massOverCharge
() const¶ A vector (of size
M
) mass over charge (m/z) values, whereM
represents all possible (m/z) values that could be detected by the mass spectrometer in its current configuration
-
std::shared_ptr<const MassOverChargeTransformer>
getMzTransformer
() const¶ Get a transformer that can report the bounds of the mass over charge for this SwathMap, as well as perform conversions between (m/z) and √(m/z) index
-
Chromatogram2DDense
extractedIonChromatogram
(const IMassOverChargeRange &mzRange) const¶ Extract a dense two-dimensional chromatogram from the SwathMap based on the specified mass over change range. In this instance, the returned chromatogram will span the full retention time range if you only require a subset of retention times, see toffee::SwathMap::filteredExtractedIonChromatogram
- Warning
If the mass range is large, you can expect a large memory requirement for this function. In this case, you may wish to use toffee::SwathMap::extractedIonChromatogramSparse
- Parameters
mzRange
: specifies the upper and lower bounds of mass over charge for querying the SwathMap
-
Chromatogram2DSparse
extractedIonChromatogramSparse
(const IMassOverChargeRange &mzRange) const¶ Extract a sparse two-dimensional chromatogram from the SwathMap based on the specified mass over change range. In this instance, the returned chromatogram will span the full retention time range if you only require a subset of retention times, see toffee::SwathMap::filteredExtractedIonChromatogramSparse
- Parameters
mzRange
: specifies the upper and lower bounds of mass over charge for querying the SwathMap
-
Chromatogram2DDense
filteredExtractedIonChromatogram
(const IMassOverChargeRange &mzRange, const IRetentionTimeRange &rtRange) const¶ Extract a dense two-dimensional chromatogram from the SwathMap based on the specified mass over change and retention time ranges.
- Warning
If the ranges are large, you can expect a large memory requirement for this function. In this case, you may wish to use toffee::SwathMap::filteredExtractedIonChromatogramSparse
- Parameters
-
Chromatogram2DSparse
filteredExtractedIonChromatogramSparse
(const IMassOverChargeRange &mzRange, const IRetentionTimeRange &rtRange) const¶ Extract a sparse two-dimensional chromatogram from the SwathMap based on the specified mass over change and retention time ranges.
-
using
-
class
MassOverChargeTransformer
¶ A simple class for converting between (m/z) space and √(m/z) index space. Furthermore, it is typically used to define the full range of allowable mass over charge of a SwathMap. Importantly, this class and its concrete children allow a user to convert between mass over charge space and √(m/z) index space using the Intrinsic Mass Spacing parameters
alpha
andbeta
where:\[ \frac{m}{z} = \left( \alpha_{IMS} i_{\sqrt{m/z}} + \beta_{trunc} \right)^2 \]where \(\alpha_{IMS}\) is the IMS value, \(i_{\sqrt{m/z}}\) is the unsigned integer index of the \(m/z\) value in \(\sqrt{m/z}\) space, and \(\beta_{trunc}\) is to control for any truncation error that may be introduced from the conversion process.
The principle use of the ranges is to enable multiple different ways of defining a range, and then converting between world and index space. In the instance of converting from world to index, the range classes will ‘snap’ to the closest index. The upper and lower bounds are inclusive, meaning that a range with lowerIMSCoord, upperIMSCoord = 154, 154 would return data with a single row (the data contained along index 154). Similarly, a range with lowerIMSCoord, upperIMSCoord = 153, 155 would return data with three rows, centered at imsCoord 154.
Subclassed by toffee::MassOverChargeTransformerOrbitrap, toffee::MassOverChargeTransformerTOF
Public Functions
-
MassOverChargeTransformer
()¶
-
virtual
~MassOverChargeTransformer
()¶
-
virtual double
lowerMzInWorldCoords
() const = 0¶ The lower bound of mass over charge range in “normal” space.
-
virtual double
upperMzInWorldCoords
() const = 0¶ The upper bound of mass over charge range in “normal” space.
-
virtual int32_t
lowerMzInIMSCoords
() const = 0¶ The lower bound of mass over charge range in √(m/z) index space.
-
virtual int32_t
upperMzInIMSCoords
() const = 0¶ The upper bound of mass over charge range in √(m/z) index space.
-
IntrinsicMassSpacingType
imsType
() const¶ The Intrinsic Mass Spacing type See toffee::IMassOverChargeRange for more details
-
double
imsAlpha
() const¶ The Intrinsic Mass Spacing
alpha
parameter. See toffee::IMassOverChargeRange for more details
-
double
imsBeta
() const¶ The Intrinsic Mass Spacing
beta
parameter See toffee::IMassOverChargeRange for more details
-
int32_t
imsGamma
() const¶ The Intrinsic Mass Spacing
gamma
parameter See toffee::IMassOverChargeRange for more details
-
int32_t
toIMSCoords
(double mz) const¶ Convert the specified mass over charge into Intrinsic Mass Spacing index space
- Parameters
mz
: mass over charge value to be converted
-
virtual int32_t
toIMSCoordsPreGammaAdjustment
(double mz) const = 0¶ Convert the specified mass over charge into Intrinsic Mass Spacing index space before applying IMS gamma. This function allows the coord to be negative, and should only every be used when first creating the the point cloud
- Parameters
mz
: mass over charge value to be converted
-
virtual double
toWorldCoords
(int32_t imsIdx) const = 0¶ Convert the specified Intrinsic Mass Spacing index into mass over charge space
- Parameters
imsCoord
: IMS index to be converted
-
-
class
MassOverChargeTransformerTOF
: public toffee::MassOverChargeTransformer¶ A simple class for converting between (m/z) space and √(m/z) index space. Furthermore, it is typically used to define the full range of allowable mass over charge of a SwathMap.
See toffee::IMassOverChargeRange for how this conversion takes place
Public Functions
-
MassOverChargeTransformerTOF
()¶
-
MassOverChargeTransformerTOF
(double imsAlpha, double imsBeta, int32_t imsGamma)¶ - Parameters
imsAlpha
: the Intrinsic Mass Spacingalpha
parameterimsBeta
: the Intrinsic Mass Spacingbeta
parameter
-
MassOverChargeTransformerTOF
(double imsAlpha, double imsBeta, int32_t imsGamma, int32_t lowerMzInIMSCoords, int32_t upperMzInIMSCoords)¶ - Parameters
imsAlpha
: the Intrinsic Mass Spacingalpha
parameterimsBeta
: the Intrinsic Mass Spacingbeta
parameterlowerMzInIMSCoords
: the lower bound of mass over charge range in IMS index spaceupperMzInIMSCoords
: the upper bound of mass over charge range in IMS index space
-
double
lowerMzInWorldCoords
() const¶ The lower bound of mass over charge range in “normal” space.
-
double
upperMzInWorldCoords
() const¶ The upper bound of mass over charge range in “normal” space.
-
int32_t
lowerMzInIMSCoords
() const¶ The lower bound of mass over charge range in √(m/z) index space.
-
int32_t
upperMzInIMSCoords
() const¶ The upper bound of mass over charge range in √(m/z) index space.
-
int32_t
toIMSCoordsPreGammaAdjustment
(double mz) const¶ Convert the specified mass over charge into Intrinsic Mass Spacing index space before applying IMS gamma. This function allows the coord to be negative, and should only every be used when first creating the the point cloud
- Parameters
mz
: mass over charge value to be converted
-
double
toWorldCoords
(int32_t imsIdx) const¶ Convert the specified Intrinsic Mass Spacing index into mass over charge space
- Parameters
imsCoord
: IMS index to be converted
-
Spectra & Chromatograms¶
-
class
Spectrum1D
¶ A simple data holding class primarily used to return results from querying a SwathMap.
Public Functions
-
Spectrum1D
(const Eigen::VectorXd &mz)¶ Initialise the Spectrum1D with a given set of mass over charge values and setting a correspondingly sized intensity vector to zeroes
- Parameters
mz
: a vector (of sizeM
) mass over charge values captured by this spectrum
-
Spectrum1D
(const std::vector<double> &mz, const std::vector<int> &intens)¶ Initialise the Spectrum1D with a given set of mass over charge values and a correspondingly sized intensity vector
- Parameters
mz
: a vector (of sizeM
) mass over charge values captured by this spectrumintens
: a vector (of sizeM
) intensity values captured by this spectrum
Public Members
-
Eigen::VectorXd
massOverCharge
¶ An Eigen::VectorXd (of size
M
) mass over charge values captured by this spectrum
-
Eigen::VectorXi
intensities
¶ An Eigen::VectorXi (of size
M
) intensity values captured by this spectrum
-
-
class
Chromatogram1D
¶ A simple data holding class primarily used to return results from querying a SwathMap.
Public Functions
-
Chromatogram1D
(const Eigen::VectorXd &rt)¶ Initialise the Chromatogram1D with a given set of retention times and setting a correspondingly sized intensity vector to zeroes
- Parameters
rt
: a vector (of sizeN
) retention times captured by this chromatogram
Public Members
-
Eigen::VectorXd
retentionTime
¶ An Eigen::VectorXd (of size
N
) retention times captured by this chromatogram
-
Eigen::VectorXi
intensities
¶ An Eigen::VectorXi (of size
N
) intensity values captured by this chromatogram
-
-
class
Chromatogram2DSparse
¶ A simple data holding class primarily used to return results from querying a SwathMap. The sparse moiniker refers to the fact that zero intensities are excluded from the intensity matrix to improve performance
Public Types
-
using
Value
= int¶ The type used for representing the intensity values.
-
using
Triplet
= Eigen::Triplet<Value>¶ A (retention time index, √(m/z) index, intensity) triple that corresponds to a point in the point cloud. These are primarily used for loading the toffee::Chromatogram2DSparse::Martix
-
using
Matrix
= Eigen::SparseMatrix<Value>¶ An Eigen::SparseMatrix in column major layout that is used to represent the intensities
Public Functions
-
Chromatogram2DSparse
(size_t retentionTimeSize, size_t massOverChargeSize, const std::vector<Triplet> &triplets)¶ - Parameters
retentionTimeSize
: the number of retention times captured by this chromatogrammassOverChargeSize
: the number of mass over charge values captured by this chromatogramtriplets
: a vector of triplets that represent points in the point cloud that are used to initialise the Chromatogram2DSparse::intensities sparse matrix
-
Chromatogram2DSparse
(Chromatogram2DSparse &&other)¶
-
Chromatogram2DSparse &
operator=
(Chromatogram2DSparse &&other)¶
Public Members
-
Eigen::VectorXd
retentionTime
¶ An Eigen::VectorXd (of size
N
) retention times captured by this chromatogram
-
Eigen::VectorXd
massOverCharge
¶ An Eigen::VectorXd (of size
M
) mass over charge values captured by this chromatogram
-
Matrix
intensities
¶ An Eigen::SparseMatrix (of shape
N
xM
) in column major layout, of intensity values captured by this chromatogram
-
using
-
class
Chromatogram2DDense
¶ A simple data holding class primarily used to return results from querying a SwathMap. The dense moniker refers to the fact that zero intensities are included in the intensity matrix and may result in a large memory footprint.
- Warning
Careless useage of this class may result in very large memory requirements of the calling function. In these cases, Chromatogram2DSparse may be useful.
Public Functions
-
Chromatogram2DDense
(const Chromatogram2DSparse &chromatogram2DSparse)¶ Convert a two-dimensional chromatogram with sparse intensities into a two-dimensional chromatogram with intensities in a dense matrix format.
- Parameters
chromatogram2DSparse
: the sparse representation of the chromatogram
Public Members
-
Eigen::VectorXd
retentionTime
¶ An Eigen::VectorXd (of size
N
) retention times captured by this chromatogram
-
Eigen::VectorXd
massOverCharge
¶ An Eigen::VectorXd (of size
M
) mass over charge values captured by this chromatogram
-
Eigen::MatrixXi
intensities
¶ An Eigen::MatrixXi (of shape
N
xM
) in column major layout, of intensity values captured by this chromatogram
MassOverChargeRange¶
-
class
IMassOverChargeRange
¶ Subclassed by toffee::MassOverChargeRange, toffee::MassOverChargeRangeIMSCoords, toffee::MassOverChargeRangeWithPixelHalfWidth, toffee::MassOverChargeRangeWithPPMFullWidth
Public Functions
-
virtual
~IMassOverChargeRange
()¶
Using the Intrinsic Mass Spacing parameters
alpha
andbeta
, convert the lower bound of the concrete child class into its corresponding √(m/z) index
Using the Intrinsic Mass Spacing parameters
alpha
andbeta
, convert the upper bound of the concrete child class into its corresponding √(m/z) index
-
virtual
Warning
doxygenclass: Cannot find class “toffee::MassOverChargeRangeWithPixelWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/
Warning
doxygenclass: Cannot find class “toffee::MassOverChargeRangeWithPPMWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/
-
class
MassOverChargeRange
: public toffee::IMassOverChargeRange¶ An implementation of IMassOverChargeRange that allows the user to explicitly set the upper and lower bounds of the mass over charge range
Public Functions
-
MassOverChargeRange
(double lowerMz, double upperMz)¶ - Parameters
lowerMz
: the lower bound of mass over charge for this rangeupperMz
: the upper bound of mass over charge for this range
-
double
massOverChargeLower
() const¶ The lower bound of mass over charge for this range.
-
double
massOverChargeUpper
() const¶ The upper bound of mass over charge for this range.
-
-
class
MassOverChargeRangeIMSCoords
: public toffee::IMassOverChargeRange¶ An implementation of IMassOverChargeRange that allows the user to explicitly set the upper and lower bounds of the mass over charge range in terms of the √(m/z) index space
Public Functions
-
MassOverChargeRangeIMSCoords
(int32_t lowerIMSCoord, int32_t upperIMSCoord)¶ - Parameters
lowerIMSCoord
: the lower bound of mass over charge for this range in √(m/z) index spaceupperIMSCoord
: the upper bound of mass over charge for this range in √(m/z) index space
-
RetentionTimeRange¶
-
class
IRetentionTimeRange
¶ An interface class that allows users to define retention time ranges using various methods. These ranges are used by toffee::SwathMap when querying the intensity data.
The principle use of the ranges is to enable multiple different ways of defining a range, and then converting between world and index space. In the instance of converting from world to index, the range classes will ‘snap’ to the closest index. The upper and lower bounds are inclusive, meaning that a range with lowerRTIdx, upperRTIdx = 154, 154 would return data with a single row (the data contained along index 154). Similarly, a range with lowerRTIdx, upperRTIdx = 153, 155 would return data with three rows, centered at retentionTimeIdx 154.
Subclassed by toffee::RetentionTimeRange, toffee::RetentionTimeRangeWithPixelHalfWidth
Public Functions
-
virtual
~IRetentionTimeRange
()¶
-
virtual int32_t
lowerRTIdx
(const Eigen::VectorXd &retentionTimes) const = 0¶ Find the index in the specified retention time vector that contains the value that is closest to the lower bound specified in the construction of a concrete child class
- Warning
This function assumes, but does not check, that the retention time vector is sorted in ascending order
- Parameters
retentionTimes
: a vector of retention times in ascending order
-
virtual int32_t
upperRTIdx
(const Eigen::VectorXd &retentionTimes) const = 0¶ Find the index in the specified retention time vector that contains the value that is closest to the upper bound specified in the construction of a concrete child class
- Warning
This function assumes, but does not check, that the retention time vector is sorted in ascending order
- Parameters
retentionTimes
: a vector of retention times in ascending order
Public Static Functions
-
int32_t
toRTIdx
(double retentionTime, const Eigen::VectorXd &retentionTimes)¶ Find the index in the specified retention time vector that contains the value that is closest to the specified retention time we are searching for. If there are two values equidistant, arbitrarily preference the higher index.
- Warning
This function assumes, but does not check, that the retention time vector is sorted in ascending order
- Parameters
retentionTime
: the retention time value that we wish to search forretentionTimes
: a vector of retention times in ascending order
-
virtual
Warning
doxygenclass: Cannot find class “toffee::RetentionTimeRangeWithPixelWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/
-
class
RetentionTimeRange
: public toffee::IRetentionTimeRange¶ An implementation of IRetentionTimeRange that allows the user to explicitly set the upper and lower bounds of the retention time range
Public Functions
-
RetentionTimeRange
(double lowerRetentionTime, double upperRetentionTime)¶ - Parameters
lowerRetentionTime
: the lower bound of retention time for this rangeupperRetentionTime
: the upper bound of retention time for this range
-
double
retentionTimeLower
() const¶ The lower bound of retention time for this range.
-
double
retentionTimeUpper
() const¶ The upper bound of retention time for this range.
-
int32_t
lowerRTIdx
(const Eigen::VectorXd &retentionTimes) const¶
-
int32_t
upperRTIdx
(const Eigen::VectorXd &retentionTimes) const¶
-
Versioning¶
-
class
Version
¶ Public Static Functions
-
static int
combineFileFormatVersions
(int majorVersion, int minorVersion)¶ A helper function to combine major and minor version numbers into a form that can be easily compared
Public Static Attributes
-
const std::string
LIBRARY_VERSION
= "dev"¶ The version of the library.
-
const int
FILE_FORMAT_MAJOR_VERSION_CURRENT
= 1¶ The latest major version of the
toffee
file format.
-
const int
FILE_FORMAT_MINOR_VERSION_CURRENT
= 2¶ The latest minor version of the
toffee
file format.
-
const int
FILE_FORMAT_MAJOR_VERSION_COMPATIBILITY
= 0¶ The version of the major
toffee
file that we are backwards compatible with
-
const int
FILE_FORMAT_MINOR_VERSION_COMPATIBILITY
= 2¶ The version of the minor
toffee
file that we are backwards compatible with
-
const int
FILE_FORMAT_NUM_ALLOWED_MINOR_VERSIONS
= 1000¶ We do not support minor versions greater than or equal to this number.
-
static int