Toffee C++ API

Creating a Toffee File

The following API documentation gives a flavour for how you can create a toffee file. However, it is best explained by way of example, such as the following python code in the unit testing framework. As mentioned elsewhere, the python and C++ APIs are laregly equivalent.

class DummyToffeeFile():
    def __init__(
        self,
        name,
        num_rt_points=1626,
        scan_cycle_time=3.32,
        ms1_rt_offset=0.17,
        ms2_rt_offset=0.506,
        num_mz_points=1000,
        ms2_name=None,
        ims_type=toffee.IntrinsicMassSpacingType.TOF,
        fraction_not_zero=0.01,
    ):
        self.name = name
        self.hash = sum(ord(ch) for ch in name)
        self.seed = self.hash % ((np.iinfo(np.int32).max + 1) * 2)
        logger.info('%s: hash=%s, seed=%s', self.name, self.hash, self.seed)
        np.random.seed(self.seed)

        self.header = self._header(ims_type)

        # create retention time vectors for each window
        self.scan_cycle_time = scan_cycle_time
        self.ms1_rt_offset = ms1_rt_offset
        self.ms2_rt_offset = ms2_rt_offset
        self.ms1_rt_vector = np.arange(num_rt_points) * self.scan_cycle_time + self.ms1_rt_offset
        self.ms2_rt_vector = np.arange(num_rt_points) * self.scan_cycle_time + self.ms2_rt_offset

        # set the mass over charge bounds of the data
        lower = 90
        upper = 2100
        (
            self.ims_alpha,
            self.ims_beta,
            self.ims_gamma,
            self.ims_coord_vector,
            self.mz_vector,
        ) = self.create_mz_vector(ims_type, lower, upper, num_mz_points)

        # we're going to create 1x MS1 window and 1x MS2 window
        self.ms1_name = toffee.ToffeeWriter.MS1_NAME
        if ms2_name is None:
            ms2_name = toffee.ToffeeWriter.ms2Name(41)
        self.ms2_name = ms2_name
        # window boundaries match ProCan90-M06-07.tof/ms2-041
        self.window_lower, self.window_center, self.window_upper = (608.5, 611.5, 614.5)

        # create a dense matrix of fake data
        ms1_mz, ms1_rt = np.meshgrid(self.mz_vector, self.ms1_rt_vector)
        self.expected_intensities_ms1 = (ms1_rt * ms1_mz).astype(np.uint32)
        ms2_mz, ms2_rt = np.meshgrid(self.mz_vector, self.ms2_rt_vector)
        self.expected_intensities_ms2 = (ms2_rt * ms2_mz).astype(np.uint32)

        # generate our toffee writer so we can add windows to it
        self.tof_fname = name + '.tof'
        self.ims_type = ims_type
        writer = toffee.ToffeeWriter(self.tof_fname, self.ims_type)
        writer.addHeaderMetadata(self.header)

        # loop through the two windows, modify their intensities by setting
        # a random seclectin to zero, convert this into a point cloud and
        # then add it to the toffee writer
        self.scan_data = {}
        for map_name, rt_vector, expected_intensities in zip(
            [self.ms1_name, self.ms2_name],
            [self.ms1_rt_vector, self.ms2_rt_vector],
            [self.expected_intensities_ms1, self.expected_intensities_ms2],
        ):
            # create the raw data container
            scan_data = toffee.RawSwathScanData(map_name, ims_type)
            if map_name == self.ms2_name:
                scan_data.windowLower = self.window_lower
                scan_data.windowCenter = self.window_center
                scan_data.windowUpper = self.window_upper
            else:
                scan_data.windowLower = -1
                scan_data.windowCenter = -1
                scan_data.windowUpper = -1

            # slice the data as if we were a mass spectrometer creating 'scans'
            for r, rt_value in enumerate(rt_vector):
                intensity_vector = expected_intensities[r, :]

                # add some zeros by generating a mask and assigning a random
                # fraction to be True - then set anything in the intensity
                # vector, masked by True, to be zero. In one case, set all
                # intensities to be zero (mimicking a scan with no data)
                if r % 10 == 0:
                    tmp_fraction_not_zero = 0.0
                else:
                    tmp_fraction_not_zero = fraction_not_zero
                mask = np.random.uniform(0, 1, size=intensity_vector.shape)
                mask[mask < tmp_fraction_not_zero] = 0
                mask[mask != 0] = 1
                mask = mask.astype(bool)
                intensity_vector[mask] = 0
                # double check the masking worked as expected
                if (~mask).any():
                    assert (intensity_vector > 0).any()
                mz = self.mz_vector[~mask]
                intens = intensity_vector[~mask]

                # add to the scan
                scan_data.addScan(rt_value, mz, intens)

            # create our point cloud
            pcl = scan_data.toPointCloud()

            # save scan data for later
            self.scan_data[map_name] = scan_data

            # assert that the point cloud properties were set correctly
            assert pcl.name == map_name
            assert pcl.imsType == ims_type
            assert pcl.windowLower == scan_data.windowLower
            assert pcl.windowCenter == scan_data.windowCenter
            assert pcl.windowUpper == scan_data.windowUpper

            # assert that the retention time was correctly calculated
            np.testing.assert_allclose(pcl.scanCycleTime, self.scan_cycle_time)
            if map_name == self.ms2_name:
                np.testing.assert_allclose(pcl.firstScanRetentionTimeOffset, self.ms2_rt_offset)
            else:
                np.testing.assert_allclose(pcl.firstScanRetentionTimeOffset, self.ms1_rt_offset)

            ims_props = pcl.imsProperties
            assert len(ims_props.sliceIndex) == num_rt_points, \
                '{} {}\n{}'.format(len(ims_props.sliceIndex), num_rt_points, ims_props.sliceIndex)

            # assert that the Intrinsic Mass Spacing parameters were calculated
            # correctly
            np.testing.assert_allclose(ims_props.medianAlpha, self.ims_alpha)
            np.testing.assert_allclose(ims_props.medianBeta, self.ims_beta)
            np.testing.assert_allclose(ims_props.gamma, self.ims_gamma)

            # add to the file
            writer.addPointCloud(pcl)

    @classmethod
    def create_mz_vector(cls, ims_type, lower, upper, num_mz_points):
        """
        Create a m/z vector that respects the IMS spacings for a Sciex (TOF)
        type instrument
        """
        if ims_type == toffee.IntrinsicMassSpacingType.TOF:
            return cls._create_mz_vector_tof(lower, upper, num_mz_points)
        elif ims_type == toffee.IntrinsicMassSpacingType.ORBITRAP:
            return cls._create_mz_vector_orbitrap(lower, upper, num_mz_points)
        raise ValueError('Upsupported IMS type: {}'.format(ims_type))

    @classmethod
    def _create_mz_vector_tof(cls, lower, upper, num_mz_points):
        """
        Create a m/z vector that respects the IMS spacings for a Sciex (TOF)
        type instrument
        """
        sqrt_lower = np.sqrt(lower)
        sqrt_upper = np.sqrt(upper)

        # and use these to create dummy Intrinsic Mass Spacing properties
        ims_alpha = (sqrt_upper - sqrt_lower) / num_mz_points
        ims_gamma = int(np.round(sqrt_lower / ims_alpha, decimals=0))
        ims_beta = sqrt_lower - ims_gamma * ims_alpha
        if ims_beta > 0:
            ims_gamma += 1
            ims_beta -= ims_alpha

        # from these create an mass over charge vector with known properties
        ims_coord = np.arange(num_mz_points + 1).astype(int)
        mz = ((ims_coord + ims_gamma) * ims_alpha + ims_beta) ** 2

        # add some padding to ims
        ims_gamma -= 10
        ims_coord += 10

        return ims_alpha, ims_beta, ims_gamma, ims_coord, mz

    @classmethod
    def _create_mz_vector_orbitrap(cls, lower, upper, num_mz_points):
        """
        Create a m/z vector that respects the IMS spacings for a Thermo (Orbitrap)
        type instrument
        """
        inverse_sqrt_lower = 1 / np.sqrt(lower)
        inverse_sqrt_upper = 1 / np.sqrt(upper)

        # and use these to create dummy Intrinsic Mass Spacing properties
        ims_alpha = (inverse_sqrt_upper - inverse_sqrt_lower) / num_mz_points
        ims_gamma = int(np.round(inverse_sqrt_lower / ims_alpha, decimals=0))
        ims_beta = inverse_sqrt_lower - ims_gamma * ims_alpha
        if ims_beta > 0:
            ims_gamma -= 1
            ims_beta += ims_alpha

        # from these create an mass over charge vector with known properties
        ims_coord = np.arange(num_mz_points + 1).astype(int)
        mz = ((ims_coord + ims_gamma) * ims_alpha + ims_beta) ** -2

        # add some padding to ims
        ims_gamma -= 10
        ims_coord += 10

        return ims_alpha, ims_beta, ims_gamma, ims_coord, mz

    @classmethod
    def _header(cls, ims_type):
        if ims_type == toffee.IntrinsicMassSpacingType.TOF:
            header_fname = 'Header-TOF.mzML'
        elif ims_type == toffee.IntrinsicMassSpacingType.ORBITRAP:
            header_fname = 'Header-Orbitrap.mzML'
        else:
            raise ValueError('Upsupported IMS type: {}'.format(ims_type))

        curdir = os.path.dirname(os.path.abspath(__file__))
        header_fname = os.path.join(curdir, header_fname)
        with open(header_fname, 'r') as f:
            return f.read()

ScanDataPointCloud

class ScanDataPointCloud

The data from a single MS1 or MS2 window collected as a point cloud of X points discovered from N scans

Public Members

std::string name

The name of the window. This should match one of the formats specified by toffee::ToffeeWriter

double windowLower

The lower (m/z) bound of the MS selection window - should be -1 for MS1 scans

double windowCenter

The center (m/z) of the MS selection window - should be -1 for MS1 scans

double windowUpper

The upper (m/z) bound of the MS selection window - should be -1 for MS1 scans

double scanCycleTime

The time (in seconds) between each scan for this window. In a typical DIA workflow, we anticipate that this is constant for all windows in a SwathRun

double firstScanRetentionTimeOffset

The time (in seconds) from the start of the experiment to the first scan for this window.

IntrinsicMassSpacingType imsType

The Intrinsic Mass Spacing type (i.e. ‘TOF’, ‘Orbitrap’)

IMSProperties imsProperties

The Intrinsic Mass Spacing properties.

std::vector<uint32_t> intensity

The intensity value for each of the X points in the point cloud.

RawSwathScanData

class RawSwathScanData

The raw data from a single MS1 or MS2 window collected as a vector of N individual scans at discrete retention times

Public Functions

explicit RawSwathScanData(const std::string &name, IntrinsicMassSpacingType imsType)
Parameters

name – the name of the window. This should match one of the formats specified by toffee::ToffeeWriter

void addScan(double rt, const std::vector<double> &mz, const std::vector<uint32_t> &intensity)

Add a scan to the raw data

Parameters
  • rt – the retention time of this scan

  • mz – the mass over charge data collected during this scan

  • intensity – the intensity data collected during this scan

void addMassAccuracyWriter(const std::shared_ptr<const MassAccuracyWriter> &massAccuracyWriter)

The mass accuracy can be used as a debugging tool to keep track of the actual and converted mass over charge values during conversion to point cloud (index space) using the Intrinsic Mass Spacing.

Parameters

if – this is not null, then a debugging step will be taken during the conversion to a point cloud

ScanDataPointCloud toPointCloud() const

Convert the raw data into the point cloud format. Included in this is the conversion of mass over charge to √(m/z) index space using the Intrinsic Mass Spacing properties (c.f. toffee::IMassOverChargeRange)

Public Members

const std::string name

The name of the window. This should match one of the formats specified by toffee::ToffeeWriter

double windowLower

The lower (m/z) bound of the MS selection window - should be -1 for MS1 scans

double windowCenter

The center (m/z) of the MS selection window - should be -1 for MS1 scans

double windowUpper

The upper (m/z) bound of the MS selection window - should be -1 for MS1 scans

IntrinsicMassSpacingType imsType

The type of mass analyzer in terms of the Intrinsic Mass Spacing.

size_t numberOfScans = 0

The number (N) of scans in this window.

std::vector<double> retentionTime

A vector (of size N) retention times.

std::vector<std::vector<double>> mzData

A vector (of size N) of vectors where the nested vector contains the mass over charge data collected during the n-th scan

std::vector<std::vector<uint32_t>> intensityData

A vector (of size N) of vectors where the nested vector contains the intensity data collected during the n-th scan

ToffeeWriter

class ToffeeWriter

If we consider the raw data collected within a given window across a full gradient during a SWATH-MS run, each data point represents:

  • Mass over charge The abscissa refers to the mass over charge ratio of the peptide ion

  • Retention time: The ordinate refers to the time at which the peptide ion is elutes from the Liquid Chromatography column

  • Intensity: The applicate is a count of the number of times a peptide ion is detected on the sensor

Through the Intrinsic Mass Spacing, mass over charge can be represented by an unsigned integer, as can intensity. Furthermore, the retention time for each data point can be considered as an index into an associated (but much smaller) vector of doubles. In this manner, we can save the raw data into a file format as 3 vectors of unsigned 32-bit integers with some associated metadata.

toffee takes this exact approach, saving each SWATH-MS window as a group in an HDF5 file; where each group has:

  • double attributes lower, center, and upper to describe the properties of the window;

  • double attributes scanCycleTime ($t_c$) and firstScanRetentionTimeOffset ($t_0$) that describe the retention time vector for each window such that $t(i) = t_c i + t_0$ in seconds;

  • a uint32 dataset of retentionTimeIdx, of equal length as the retention time vector, that represents indices into the imsCoord and intensity between which the corresponding value of $t(i)$ is applied;

  • double attributes imsAlpha (${IMS}$) and imsBeta (${trunc}$) for the Intrinsic Mass Spacing m/z conversion such that ${m}{z} = ( {IMS} i_{{m/z}} + {trunc} )^2$; and

  • uint32 datasets of imsCoord and intensity for the point cloud, with one entry for each point in the cloud.

Thus, through this ToffeeWriter, we can generate a file with lossless compression that can represent a SWATH-MS mzML file with a ten-fold decrease in the file size - making it on par with the file size of the original vendor format.

Public Functions

explicit ToffeeWriter(const std::string &h5FilePath, IntrinsicMassSpacingType imsType)
Parameters

h5FilePath – the output path of the toffee file. If a file already exists at that path, it will be over-written

void addPointCloud(const ScanDataPointCloud &pcl) const

Add a point cloud for a given window to the toffee file. If a window with the same name already exists in the file, it will throw an exception

Parameters

pcl – the point cloud to be added

void addHeaderMetadata(const std::string &header) const

Add a header string to the HDF5 file. In normal circumstances, this will be a PSI-formatted XML string matching the header for mzML files

Public Static Functions

static std::string ms2Name(const size_t &idx)

Name of the MS2 HDF5 group corresponding to the specified index.

static std::string imsTypeAsStr(const IntrinsicMassSpacingType &imsType)

Name of the IMS Type in string format.

static IntrinsicMassSpacingType imsTypeFromStr(const std::string &imsTypeAsStr)

Convert the name of the IMS Type in string format to the enum.

Public Static Attributes

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_MAJOR_VERSION   = "FILE_FORMAT_MAJOR_VERSION"

Name of the file major version HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_MINOR_VERSION   = "FILE_FORMAT_MINOR_VERSION"

Name of the file minor version HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_LIBRARY_VERSION   = "CREATED_BY_LIBRARY_VERSION"

Name of the softare version used to create the file HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_EXPERIMENT_METADATA   = "metadataXML"

Name of the run’s header HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_IMS_TYPE   = "IMSType"

Name of a window’s Intrinsic Mass Spacing type HDF5 attribute Valid values are: TOF, Orbitrap

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_IMS_ALPHA   = "IMSAlpha"

Name of a window’s Intrinsic Mass Spacing alpha HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_IMS_BETA   = "IMSBeta"

Name of a window’s Intrinsic Mass Spacing beta HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_IMS_GAMMA   = "IMSGamma"

Name of a window’s Intrinsic Mass Spacing gamma HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_WINDOW_BOUNDS_LOWER   = "precursorLower"

Name of a window’s mass over charge lower bounds HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_WINDOW_BOUNDS_UPPER   = "precursorUpper"

Name of a window’s center mass over charge HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_WINDOW_CENTER   = "precursorCenter"

Name of a window’s mass over charge upper bounds HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_SCAN_CYCLE_TIME   = "scanCycleTime"

Name of a window’s scan cycle time HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string ATTR_WINDOW_RETENTION_TIME_OFFSET   = "firstScanRetentionTimeOffset"

Name of a window’s first retention time offset HDF5 attribute.

static TOF_WIN_DLL_DECLSPEC const std::string DATASET_RETENTION_TIME_IDX   = "retentionTimeIdx"

Name of a window’s retention time index HDF5 dataset.

static TOF_WIN_DLL_DECLSPEC const std::string DATASET_IMS_ALPHA_PER_SCAN   = "IMSAlphaPerScan"

Name of a window’s Intrinsic Mass Spacing alpha per scan HDF5 dataset.

static TOF_WIN_DLL_DECLSPEC const std::string DATASET_IMS_BETA_PER_SCAN   = "IMSBetaPerScan"

Name of a window’s Intrinsic Mass Spacing beta per scan HDF5 dataset.

static TOF_WIN_DLL_DECLSPEC const std::string DATASET_MZ_IMS_IDX   = "imsCoord"

Name of a window’s √(m/z) index HDF5 dataset.

static TOF_WIN_DLL_DECLSPEC const std::string DATASET_INTENSITY   = "intensity"

Name of a window’s intensity HDF5 dataset.

static TOF_WIN_DLL_DECLSPEC const std::string MS1_NAME   = std::string{"ms1"}

Name of the MS1 HDF5 group.

static TOF_WIN_DLL_DECLSPEC const std::string MS2_PREFIX   = std::string{"ms2-"}

Prefix for the MS2 HDF5 groups.

Accessing Data in a Toffee File

SwathRun

class SwathRun

A class that enables access to properties of a full toffee file.

Public Functions

explicit SwathRun(const std::string &fname)
Parameters

fname – path to the toffee file of interest

inline std::string header() const

Extract the metadata header from the specified file. In usual circumstances one could expect this to look like a PSI-formatter XML description of the run

inline int formatMajorVersion() const

The file format major version.

inline int formatMinorVersion() const

The file format minor version.

inline IntrinsicMassSpacingType imsType() const

The type of mass analyzer Intrinsic Mass Spacing.

inline bool hasMS1Data() const

Return true if this toffee file contains data for the MS1 window.

inline std::vector<MS2WindowDescriptor> ms2Windows() const

Return a vector of descriptions of each the MS2 windows contained in the toffee file

std::map<double, std::string> mapPrecursorsToMS2Names(const std::vector<double> &precursorMzValues, double lowerMzOverlap, double upperMzOverlap) const

It is often useful to know which precursors map to specific MS2 windows in the toffee file. For instance, this can be useful for performance reasons when iterating through a spectral library

Parameters
  • precursorMzValues – A vector of mass over charge values that correspond to the precursors that we wish to map to MS2 windows

  • lowerMzOverlap – It is usual to have MS2 windows overlap in an experiment, this allows us to exclude precursors with this offest from the lower boundary of an MS2 window

  • upperMzOverlap – It is usual to have MS2 windows overlap in an experiment, this allows us to exclude precursors with this offest from the upper boundary of an MS2 window

SwathMap loadSwathMap(const std::string &mapName, bool lossyAdjustIMSCoords = false) const

Load a SwathMap for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same SwathRun concurrently.

This function is primarily for the python wrapping, when calling from C++, we recommend using SwathRun::loadSwathMapPtr

Parameters
  • mapName – name of the window of interest (must match the naming convention to ToffeeWriter)

  • lossyAdjustIMSCoords – if true, the IMS coords are translated from the scan specific alpha and beta to the window alpha and beta in a way that minimizes the ppm error. Note, this will minimize the mass error of the transform, but this is a lossy operation that cannot be undone!

std::shared_ptr<SwathMap> loadSwathMapPtr(const std::string &mapName, bool lossyAdjustIMSCoords = false) const

Load a SwathMap for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same SwathRun concurrently.

Parameters
  • mapName – name of the window of interest (must match the naming convention to ToffeeWriter)

  • lossyAdjustIMSCoords – if true, the IMS coords are translated from the scan specific alpha and beta to the window alpha and beta in a way that minimizes the ppm error. Note, this will minimize the mass error of the transform, but this is a lossy operation that cannot be undone!

SwathMapInMemorySpectrumAccess loadSwathMapInMemorySpectrumAccess(const std::string &mapName) const

Load a SwathMapInMemorySpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same SwathRun concurrently.

This function is primarily for the python wrapping, when calling from C++, we recommend using SwathRun::loadSwathMapPtr

Parameters

mapName – name of the window of interest (must match the naming convetion to ToffeeWriter)

std::shared_ptr<SwathMapInMemorySpectrumAccess> loadSwathMapInMemorySpectrumAccessPtr(const std::string &mapName) const

Load a SwathMapInMemorySpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same SwathRun concurrently.

Parameters

mapName – name of the window of interest (must match the naming convetion to ToffeeWriter)

SwathMapSpectrumAccess loadSwathMapSpectrumAccess(const std::string &mapName) const

Load a SwathMapSpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same SwathRun concurrently.

This function is primarily for the python wrapping, when calling from C++, we recommend using SwathRun::loadSwathMapPtr

Parameters

mapName – name of the window of interest (must match the naming convetion to ToffeeWriter)

std::shared_ptr<SwathMapSpectrumAccess> loadSwathMapSpectrumAccessPtr(const std::string &mapName) const

Load a SwathMapSpectrumAccess for a specific window from the toffee file. As this is a read-only operation, it is thread-safe and multiple threads can read from the same SwathRun concurrently.

Parameters

mapName – name of the window of interest (must match the naming convetion to ToffeeWriter)

class MS2WindowDescriptor

A simple data class for describing an MS2 window.

Public Functions

inline bool operator==(const MS2WindowDescriptor &other) const

Windows are considered equal if and only if they have the same name, and upper and lower mass over charge ranges

Public Members

std::string name

The name of the window in the toffee file.

double lower

The lower bound of the range of precursor mass over charge values that are selected to be fragmented in this MS2 window

double upper

The upper bound of the range of precursor mass over charge values that are selected to be fragmented in this MS2 window

SwathMap

class SwathMap

The SwathMap class is one of the key elements of the the toffee library. It allows access raw Time of Flight mass spectrometry data in near constant time, with relatively a small memory footprint.

Public Types

using RTreeIndex = int32_t

Defines the type used to index the mass over charge and retention time values in the RTree. For search performance and memory optimisation, we use int32_t as our index type &#8212; if the resolution of the mass spectrometer means greater than 4e9 mass over charge values can be recorded using the intrinsic mass spacing method of toffee, then this will need to change

using RtImsCoord = boost::geometry::model::point<RTreeIndex, 2, boost::geometry::cs::cartesian>

A coordinate in the Point Cloud with retention time as the ‘x’ value and √(m/z) index as the ‘y’ value.

using RTreeIntensity = uint32_t

Defines the type used to record intensity values in the RTree. For memory optimisation, intensity values remain as uint32_t &#8212; if the sensitivity of the mass spectrometer means signals greater than 4e9 can be recorded, then this will need to change

using PCLPoint = std::pair<RtImsCoord, RTreeIntensity>

A point in the point cloud, with the signal intesity as the ‘value’.

using RStar = boost::geometry::index::rstar<32>

The integer value passed here defines the number of points per leaf node of the RTree. It is selected based on a balance between search performance and memory. Using 4 is analogous to a quad-tree.

using RTree = boost::geometry::index::rtree<PCLPoint, RStar>

A spatial search tree from boost that can be used for high performance spatial searching of the data contained within a SwathMap

using Box = boost::geometry::model::box<RtImsCoord>

A type used for defining axis aligned box queries of the RTree.

using Segment = boost::geometry::model::segment<RtImsCoord>

A type used for defining segment queries of the RTree.

Public Functions

SwathMap(std::string name, double lower, double center, double upper, bool isMS1, double scanCycleTime, double firstScanRetentionTimeOffset, size_t numberOfScans, std::shared_ptr<const MassOverChargeTransformer> mzTransformer, const std::vector<PCLPoint> &points)

Primary constructor of a SwathMap. In general operation, it is unlikely that a user of the library will need to call this directly. Instead, prefer functions on SwathRun that load in windows.

Parameters
  • name – name of the MS selection window that is being represented by this data. This must take the form of ToffeeWriter::MS1_NAME or ToffeeWriter::ms2Name

  • lower – lower (m/z) bound of the MS selection window

  • center – center (m/z) of the MS selection window

  • upper – upper (m/z) bound of the MS selection window

  • isMS1 – if true, this SwathMap represents data from the MS1 precursor ions, otherwise, the data is from MS2 fragment ions

  • scanCycleTime – the time (in seconds) between each scan for this window

  • firstScanRetentionTimeOffset – the time (in seconds) from the start of the experiment to the first scan for this window

  • numberOfScans – the number of entries in the retention time vector

  • mzTransformer – transforms to and from the mass over charge world and IMS index space

  • points – A vector of toffee::SwathMap::PCLPoint representing the signal intensities detected by the mass spectrometer

SwathMap()
SwathMap(const SwathMap &other) = default
SwathMap &operator=(const SwathMap &other)
SwathMap(SwathMap &&other) noexcept
SwathMap &operator=(SwathMap &&other) noexcept
inline std::string name() const

Return the name of the SwathMap as it was recorded in the original toffee file

inline bool isMS1() const

Return true if this SwathMap represents data from the MS1 precursor ions, otherwise false if the data is from MS2 fragment ions

inline double precursorLowerMz() const

Lower √(m/z) bound of the MS selection window.

inline double precursorCenterMz() const

Center √(m/z) of the MS selection window.

inline double precursorUpperMz() const

Upper √(m/z) bound of the MS selection window.

inline double scanCycleTime() const

The time (in seconds) between each scan for this window. In a typical DIA workflow, we anticipate that this is constant for all windows in a SwathRun

inline double firstScanRetentionTimeOffset() const

The time (in seconds) from the start of the experiment to the first scan for this window.

inline const Eigen::VectorXd &retentionTime() const

A vector (of size N) retention times where N is the number of scans performed, or spectra, for this window

inline ptrdiff_t numberOfSpectra() const

The number of scans performed, or spectra, for this window.

inline const Eigen::VectorXd &massOverCharge() const

A vector (of size M) mass over charge (m/z) values, where M represents all possible (m/z) values that could be detected by the mass spectrometer in its current configuration

inline std::shared_ptr<const MassOverChargeTransformer> getMzTransformer() const

Get a transformer that can report the bounds of the mass over charge for this SwathMap, as well as perform conversions between (m/z) and √(m/z) index

Chromatogram2DDense extractedIonChromatogram(const IMassOverChargeRange &mzRange) const

Extract a dense two-dimensional chromatogram from the SwathMap based on the specified mass over change range. In this instance, the returned chromatogram will span the full retention time range &#8212; if you only require a subset of retention times, see toffee::SwathMap::filteredExtractedIonChromatogram

Warning

If the mass range is large, you can expect a large memory requirement for this function. In this case, you may wish to use toffee::SwathMap::extractedIonChromatogramSparse

Parameters

mzRange – specifies the upper and lower bounds of mass over charge for querying the SwathMap

Chromatogram2DSparse extractedIonChromatogramSparse(const IMassOverChargeRange &mzRange) const

Extract a sparse two-dimensional chromatogram from the SwathMap based on the specified mass over change range. In this instance, the returned chromatogram will span the full retention time range &#8212; if you only require a subset of retention times, see toffee::SwathMap::filteredExtractedIonChromatogramSparse

Parameters

mzRange – specifies the upper and lower bounds of mass over charge for querying the SwathMap

Chromatogram2DDense filteredExtractedIonChromatogram(const IMassOverChargeRange &mzRange, const IRetentionTimeRange &rtRange) const

Extract a dense two-dimensional chromatogram from the SwathMap based on the specified mass over change and retention time ranges.

Warning

If the ranges are large, you can expect a large memory requirement for this function. In this case, you may wish to use toffee::SwathMap::filteredExtractedIonChromatogramSparse

Parameters
  • mzRange – specifies the upper and lower bounds of mass over charge for querying the SwathMap

  • rtRange – specifies the upper and lower bounds of retention time for querying the SwathMap

Chromatogram2DSparse filteredExtractedIonChromatogramSparse(const IMassOverChargeRange &mzRange, const IRetentionTimeRange &rtRange) const

Extract a sparse two-dimensional chromatogram from the SwathMap based on the specified mass over change and retention time ranges.

Parameters
  • mzRange – specifies the upper and lower bounds of mass over charge for querying the SwathMap

  • rtRange – specifies the upper and lower bounds of retention time for querying the SwathMap

class MassOverChargeTransformer

A simple class for converting between (m/z) space and √(m/z) index space. Furthermore, it is typically used to define the full range of allowable mass over charge of a SwathMap. Importantly, this class and its concrete children allow a user to convert between mass over charge space and √(m/z) index space using the Intrinsic Mass Spacing parameters alpha and beta where:

\[ \frac{m}{z} = \left( \alpha_{IMS} i_{\sqrt{m/z}} + \beta_{trunc} \right)^2 \]

where \(\alpha_{IMS}\) is the IMS value, \(i_{\sqrt{m/z}}\) is the unsigned integer index of the \(m/z\) value in \(\sqrt{m/z}\) space, and \(\beta_{trunc}\) is to control for any truncation error that may be introduced from the conversion process.

The principle use of the ranges is to enable multiple different ways of defining a range, and then converting between world and index space. In the instance of converting from world to index, the range classes will ‘snap’ to the closest index. The upper and lower bounds are inclusive, meaning that a range with lowerIMSCoord, upperIMSCoord = 154, 154 would return data with a single row (the data contained along index 154). Similarly, a range with lowerIMSCoord, upperIMSCoord = 153, 155 would return data with three rows, centered at imsCoord 154.

Subclassed by toffee::MassOverChargeTransformerOrbitrap, toffee::MassOverChargeTransformerTOF

Public Functions

MassOverChargeTransformer() = default
virtual ~MassOverChargeTransformer() = default
virtual double lowerMzInWorldCoords() const = 0

The lower bound of mass over charge range in “normal” space.

virtual double upperMzInWorldCoords() const = 0

The upper bound of mass over charge range in “normal” space.

virtual int32_t lowerMzInIMSCoords() const = 0

The lower bound of mass over charge range in √(m/z) index space.

virtual int32_t upperMzInIMSCoords() const = 0

The upper bound of mass over charge range in √(m/z) index space.

inline IntrinsicMassSpacingType imsType() const

The Intrinsic Mass Spacing type See toffee::IMassOverChargeRange for more details

inline double imsAlpha() const

The Intrinsic Mass Spacing alpha parameter. See toffee::IMassOverChargeRange for more details

inline double imsBeta() const

The Intrinsic Mass Spacing beta parameter See toffee::IMassOverChargeRange for more details

inline int32_t imsGamma() const

The Intrinsic Mass Spacing gamma parameter See toffee::IMassOverChargeRange for more details

int32_t toIMSCoords(double mz) const

Convert the specified mass over charge into Intrinsic Mass Spacing index space

Parameters

mz – mass over charge value to be converted

virtual int32_t toIMSCoordsPreGammaAdjustment(double mz) const = 0

Convert the specified mass over charge into Intrinsic Mass Spacing index space before applying IMS gamma. This function allows the coord to be negative, and should only every be used when first creating the the point cloud

Parameters

mz – mass over charge value to be converted

virtual double toWorldCoords(int32_t imsIdx) const = 0

Convert the specified Intrinsic Mass Spacing index into mass over charge space

Parameters

imsCoord – IMS index to be converted

class MassOverChargeTransformerTOF : public toffee::MassOverChargeTransformer

A simple class for converting between (m/z) space and √(m/z) index space. Furthermore, it is typically used to define the full range of allowable mass over charge of a SwathMap.

See toffee::IMassOverChargeRange for how this conversion takes place

Public Functions

explicit MassOverChargeTransformerTOF()
explicit MassOverChargeTransformerTOF(double imsAlpha, double imsBeta, int32_t imsGamma)
Parameters
  • imsAlpha – the Intrinsic Mass Spacing alpha parameter

  • imsBeta – the Intrinsic Mass Spacing beta parameter

explicit MassOverChargeTransformerTOF(double imsAlpha, double imsBeta, int32_t imsGamma, int32_t lowerMzInIMSCoords, int32_t upperMzInIMSCoords)
Parameters
  • imsAlpha – the Intrinsic Mass Spacing alpha parameter

  • imsBeta – the Intrinsic Mass Spacing beta parameter

  • lowerMzInIMSCoords – the lower bound of mass over charge range in IMS index space

  • upperMzInIMSCoords – the upper bound of mass over charge range in IMS index space

inline virtual double lowerMzInWorldCoords() const final

The lower bound of mass over charge range in “normal” space.

inline virtual double upperMzInWorldCoords() const final

The upper bound of mass over charge range in “normal” space.

inline virtual int32_t lowerMzInIMSCoords() const final

The lower bound of mass over charge range in √(m/z) index space.

inline virtual int32_t upperMzInIMSCoords() const final

The upper bound of mass over charge range in √(m/z) index space.

virtual int32_t toIMSCoordsPreGammaAdjustment(double mz) const final

Convert the specified mass over charge into Intrinsic Mass Spacing index space before applying IMS gamma. This function allows the coord to be negative, and should only every be used when first creating the the point cloud

Parameters

mz – mass over charge value to be converted

virtual double toWorldCoords(int32_t imsCoord) const final

Convert the specified Intrinsic Mass Spacing index into mass over charge space

Parameters

imsCoord – IMS index to be converted

Spectra & Chromatograms

class Spectrum1D

A simple data holding class primarily used to return results from querying a SwathMap.

Public Functions

inline Spectrum1D(const Eigen::VectorXd &mz)

Initialise the Spectrum1D with a given set of mass over charge values and setting a correspondingly sized intensity vector to zeroes

Parameters

mz – a vector (of size M) mass over charge values captured by this spectrum

inline Spectrum1D(const std::vector<double> &mz, const std::vector<int> &intens)

Initialise the Spectrum1D with a given set of mass over charge values and a correspondingly sized intensity vector

Parameters
  • mz – a vector (of size M) mass over charge values captured by this spectrum

  • intens – a vector (of size M) intensity values captured by this spectrum

Public Members

Eigen::VectorXd massOverCharge

An Eigen::VectorXd (of size M) mass over charge values captured by this spectrum

Eigen::VectorXi intensities

An Eigen::VectorXi (of size M) intensity values captured by this spectrum

class Chromatogram1D

A simple data holding class primarily used to return results from querying a SwathMap.

Public Functions

inline Chromatogram1D(const Eigen::VectorXd &rt)

Initialise the Chromatogram1D with a given set of retention times and setting a correspondingly sized intensity vector to zeroes

Parameters

rt – a vector (of size N) retention times captured by this chromatogram

Public Members

Eigen::VectorXd retentionTime

An Eigen::VectorXd (of size N) retention times captured by this chromatogram

Eigen::VectorXi intensities

An Eigen::VectorXi (of size N) intensity values captured by this chromatogram

class Chromatogram2DSparse

A simple data holding class primarily used to return results from querying a SwathMap. The sparse moiniker refers to the fact that zero intensities are excluded from the intensity matrix to improve performance

Public Types

using Value = int

The type used for representing the intensity values.

using Triplet = Eigen::Triplet<Value>

A (retention time index, √(m/z) index, intensity) triple that corresponds to a point in the point cloud. These are primarily used for loading the toffee::Chromatogram2DSparse::Martix

using Matrix = Eigen::SparseMatrix<Value>

An Eigen::SparseMatrix in column major layout that is used to represent the intensities

Public Functions

inline Chromatogram2DSparse(size_t retentionTimeSize, size_t massOverChargeSize, const std::vector<Triplet> &triplets)
Parameters
  • retentionTimeSize – the number of retention times captured by this chromatogram

  • massOverChargeSize – the number of mass over charge values captured by this chromatogram

  • triplets – a vector of triplets that represent points in the point cloud that are used to initialise the Chromatogram2DSparse::intensities sparse matrix

inline Chromatogram2DSparse(Chromatogram2DSparse &&other) noexcept
inline Chromatogram2DSparse &operator=(Chromatogram2DSparse &&other) noexcept

Public Members

Eigen::VectorXd retentionTime

An Eigen::VectorXd (of size N) retention times captured by this chromatogram

Eigen::VectorXd massOverCharge

An Eigen::VectorXd (of size M) mass over charge values captured by this chromatogram

Matrix intensities

An Eigen::SparseMatrix (of shape NxM) in column major layout, of intensity values captured by this chromatogram

class Chromatogram2DDense

A simple data holding class primarily used to return results from querying a SwathMap. The dense moniker refers to the fact that zero intensities are included in the intensity matrix and may result in a large memory footprint.

Warning

Careless useage of this class may result in very large memory requirements of the calling function. In these cases, Chromatogram2DSparse may be useful.

Public Functions

inline explicit Chromatogram2DDense(const Chromatogram2DSparse &chromatogram2DSparse)

Convert a two-dimensional chromatogram with sparse intensities into a two-dimensional chromatogram with intensities in a dense matrix format.

Parameters

chromatogram2DSparse – the sparse representation of the chromatogram

Public Members

Eigen::VectorXd retentionTime

An Eigen::VectorXd (of size N) retention times captured by this chromatogram

Eigen::VectorXd massOverCharge

An Eigen::VectorXd (of size M) mass over charge values captured by this chromatogram

Eigen::MatrixXi intensities

An Eigen::MatrixXi (of shape NxM) in column major layout, of intensity values captured by this chromatogram

MassOverChargeRange

class IMassOverChargeRange

Subclassed by toffee::MassOverChargeRange, toffee::MassOverChargeRangeIMSCoords, toffee::MassOverChargeRangeWithPixelHalfWidth, toffee::MassOverChargeRangeWithPPMFullWidth

Public Functions

virtual ~IMassOverChargeRange() = default
virtual int32_t lowerMzInIMSCoords(const std::shared_ptr<const MassOverChargeTransformer> &mzTransformer) const = 0

Using the Intrinsic Mass Spacing parameters alpha and beta, convert the lower bound of the concrete child class into its corresponding √(m/z) index

virtual int32_t upperMzInIMSCoords(const std::shared_ptr<const MassOverChargeTransformer> &mzTransformer) const = 0

Using the Intrinsic Mass Spacing parameters alpha and beta, convert the upper bound of the concrete child class into its corresponding √(m/z) index

Warning

doxygenclass: Cannot find class “toffee::MassOverChargeRangeWithPixelWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/

Warning

doxygenclass: Cannot find class “toffee::MassOverChargeRangeWithPPMWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/

class MassOverChargeRange : public toffee::IMassOverChargeRange

An implementation of IMassOverChargeRange that allows the user to explicitly set the upper and lower bounds of the mass over charge range

Public Functions

explicit MassOverChargeRange(double lowerMz, double upperMz)
Parameters
  • lowerMz – the lower bound of mass over charge for this range

  • upperMz – the upper bound of mass over charge for this range

inline double massOverChargeLower() const

The lower bound of mass over charge for this range.

inline double massOverChargeUpper() const

The upper bound of mass over charge for this range.

virtual int32_t lowerMzInIMSCoords(const std::shared_ptr<const MassOverChargeTransformer> &mzTransformer) const final

See IMassOverChargeRange::lowerMzInIMSCoords.

virtual int32_t upperMzInIMSCoords(const std::shared_ptr<const MassOverChargeTransformer> &mzTransformer) const final

See IMassOverChargeRange::upperMzInIMSCoords.

class MassOverChargeRangeIMSCoords : public toffee::IMassOverChargeRange

An implementation of IMassOverChargeRange that allows the user to explicitly set the upper and lower bounds of the mass over charge range in terms of the √(m/z) index space

Public Functions

explicit MassOverChargeRangeIMSCoords(int32_t lowerIMSCoord, int32_t upperIMSCoord)
Parameters
  • lowerIMSCoord – the lower bound of mass over charge for this range in √(m/z) index space

  • upperIMSCoord – the upper bound of mass over charge for this range in √(m/z) index space

virtual int32_t lowerMzInIMSCoords(const std::shared_ptr<const MassOverChargeTransformer> &mzTransformer) const final

See IMassOverChargeRange::lowerMzInIMSCoords.

virtual int32_t upperMzInIMSCoords(const std::shared_ptr<const MassOverChargeTransformer> &mzTransformer) const final

See IMassOverChargeRange::upperMzInIMSCoords.

RetentionTimeRange

class IRetentionTimeRange

An interface class that allows users to define retention time ranges using various methods. These ranges are used by toffee::SwathMap when querying the intensity data.

The principle use of the ranges is to enable multiple different ways of defining a range, and then converting between world and index space. In the instance of converting from world to index, the range classes will ‘snap’ to the closest index. The upper and lower bounds are inclusive, meaning that a range with lowerRTIdx, upperRTIdx = 154, 154 would return data with a single row (the data contained along index 154). Similarly, a range with lowerRTIdx, upperRTIdx = 153, 155 would return data with three rows, centered at retentionTimeIdx 154.

Subclassed by toffee::RetentionTimeRange, toffee::RetentionTimeRangeWithPixelHalfWidth

Public Functions

virtual ~IRetentionTimeRange() = default
virtual int32_t lowerRTIdx(const Eigen::VectorXd &retentionTimes) const = 0

Find the index in the specified retention time vector that contains the value that is closest to the lower bound specified in the construction of a concrete child class

Warning

This function assumes, but does not check, that the retention time vector is sorted in ascending order

Parameters

retentionTimes – a vector of retention times in ascending order

virtual int32_t upperRTIdx(const Eigen::VectorXd &retentionTimes) const = 0

Find the index in the specified retention time vector that contains the value that is closest to the upper bound specified in the construction of a concrete child class

Warning

This function assumes, but does not check, that the retention time vector is sorted in ascending order

Parameters

retentionTimes – a vector of retention times in ascending order

Public Static Functions

static int32_t toRTIdx(double retentionTime, const Eigen::VectorXd &retentionTimes)

Find the index in the specified retention time vector that contains the value that is closest to the specified retention time we are searching for. If there are two values equidistant, arbitrarily preference the higher index.

Warning

This function assumes, but does not check, that the retention time vector is sorted in ascending order

Parameters
  • retentionTime – the retention time value that we wish to search for

  • retentionTimes – a vector of retention times in ascending order

Warning

doxygenclass: Cannot find class “toffee::RetentionTimeRangeWithPixelWidth” in doxygen xml output for project “toffee” from directory: _build/doxygen/xml/

class RetentionTimeRange : public toffee::IRetentionTimeRange

An implementation of IRetentionTimeRange that allows the user to explicitly set the upper and lower bounds of the retention time range

Public Functions

RetentionTimeRange(double lowerRetentionTime, double upperRetentionTime)
Parameters
  • lowerRetentionTime – the lower bound of retention time for this range

  • upperRetentionTime – the upper bound of retention time for this range

inline double retentionTimeLower() const

The lower bound of retention time for this range.

inline double retentionTimeUpper() const

The upper bound of retention time for this range.

virtual int32_t lowerRTIdx(const Eigen::VectorXd &retentionTimes) const final

See IRetentionTimeRange::lowerRTIdx.

virtual int32_t upperRTIdx(const Eigen::VectorXd &retentionTimes) const final

See IRetentionTimeRange::upperRTIdx.

Versioning

class Version

Public Static Functions

static inline int combineFileFormatVersions(int majorVersion, int minorVersion)

A helper function to combine major and minor version numbers into a form that can be easily compared

Public Static Attributes

static TOF_WIN_DLL_DECLSPEC const std::string LIBRARY_VERSION   = "dev"

The version of the library.

static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_MAJOR_VERSION_CURRENT   = 1

The latest major version of the toffee file format.

static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_MINOR_VERSION_CURRENT   = 2

The latest minor version of the toffee file format.

static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_MAJOR_VERSION_COMPATIBILITY   = 0

The version of the major toffee file that we are backwards compatible with

static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_MINOR_VERSION_COMPATIBILITY   = 2

The version of the minor toffee file that we are backwards compatible with

static TOF_WIN_DLL_DECLSPEC const int FILE_FORMAT_NUM_ALLOWED_MINOR_VERSIONS   = 1000

We do not support minor versions greater than or equal to this number.