Source code for spec2vec.SpectrumDocument

from typing import Optional
from matchms.Spikes import Spikes
from .Document import Document


[docs]class SpectrumDocument(Document): """Create documents from spectra. Every peak (and loss) positions (m/z value) will be converted into a string "word". The entire list of all peak words forms a spectrum document. Peak words have the form "peak@100.32" (for n_decimals=2), and losses have the format "loss@100.32". Peaks with identical resulting strings will not be merged, hence same words can exist multiple times in a document (e.g. peaks at 100.31 and 100.29 would lead to two words "peak@100.3" when using n_decimals=1). For example: .. testcode:: import numpy as np from matchms import Spectrum from spec2vec import SpectrumDocument spectrum = Spectrum(mz=np.array([100.0, 150.0, 200.51]), intensities=np.array([0.7, 0.2, 0.1]), metadata={'compound_name': 'substance1'}) spectrum_document = SpectrumDocument(spectrum, n_decimals=1) print(spectrum_document.words) print(spectrum_document.peaks.mz) print(spectrum_document.get("compound_name")) Should output .. testoutput:: ['peak@100.0', 'peak@150.0', 'peak@200.5'] [100. 150. 200.51] substance1 """
[docs] def __init__(self, spectrum, n_decimals: int = 2): """ Parameters ---------- spectrum: SpectrumType Input spectrum. n_decimals Peak positions are converted to strings with n_decimal decimals. The default is 2, which would convert a peak at 100.387 into the word "peak@100.39". """ self.n_decimals = n_decimals self.weights = None super().__init__(obj=spectrum) self._add_weights()
def _make_words(self): """Create word from peaks (and losses).""" peak_words = [f"peak@{mz:.{self.n_decimals}f}" for mz in self._obj.peaks.mz] if self._obj.losses is not None: loss_words = [f"loss@{mz:.{self.n_decimals}f}" for mz in self._obj.losses.mz] else: loss_words = [] self.words = peak_words + loss_words return self def _add_weights(self): """Add peaks (and loss) intensities as weights.""" assert self._obj.peaks.intensities.max() <= 1, "peak intensities not normalized" peak_intensities = self._obj.peaks.intensities.tolist() if self._obj.losses is not None: loss_intensities = self._obj.losses.intensities.tolist() else: loss_intensities = [] self.weights = peak_intensities + loss_intensities return self
[docs] def get(self, key: str, default=None): """Retrieve value from Spectrum metadata dict. Shorthand for .. code-block:: python val = self._obj.metadata[key] """ assert not hasattr(self, key), "Key cannot be attribute of SpectrumDocument class" return self._obj.get(key, default)
@property def metadata(self): """Return metadata of original spectrum.""" return self._obj.metadata @property def losses(self) -> Optional[Spikes]: """Return losses of original spectrum.""" return self._obj.losses @property def peaks(self) -> Spikes: """Return peaks of original spectrum.""" return self._obj.peaks