Source code for biobss.ppgtools.ppg_statistical

import collections

import numpy as np
from numpy.typing import ArrayLike
from scipy import stats

from biobss.common.signal_entropy import *

# Statistical features
FEATURES_STAT_CYCLE = {
    "mean_peaks": lambda _0, peaks_amp, _1, _2, _3: np.mean(peaks_amp),
    "std_peaks": lambda _0, peaks_amp, _1, _2, _3: np.std(peaks_amp),
}

FEATURES_STAT_SEGMENT = {
    "mean": np.mean,
    "median": np.median,
    "std": np.std,
    "pct_25": lambda sig: np.percentile(sig, 25),
    "pct_75": lambda sig: np.percentile(sig, 75),
    "mad": lambda sig: np.sum(sig - np.mean(sig)) / len(sig),
    "skewness": stats.skew,
    "kurtosis": stats.kurtosis,
    "entropy": lambda sig: calculate_shannon_entropy(sig),
}


[docs]def ppg_stat_features( sig: ArrayLike, sampling_rate: float, input_types: list, fiducials: dict = None, prefix: str = "ppg", **kwargs ) -> dict: """Calculates statistical features. Cycle-based features: mean_peaks: Mean of the peak amplitudes std_peaks: Standard deviation of the peak amplitudes Segment-based features: mean: Mean value of the signal median: Median value of the signal std: Standard deviation of the signal pct_25: 25th percentile of the signal pct_75 75th percentile of the signal mad: Mean absolute deviation of the signal skewness: Skewness of the signal kurtosis: Kurtosis of the signal entropy: Entropy of the signal Args: sig (ArrayLike): Signal to be analyzed. sampling_rate (float): Sampling rate of the signal (Hz). input_types (list): Type of feature calculation, should be 'segment' or 'cycle'. fiducials (dict, optional): Dictionary of fiducial point locations. Defaults to None. prefix (str, optional): Prefix for signal type. Defaults to 'ppg'. Kwargs: peaks_locs (ArrayLike): Array of peak locations troughs_locs (ArrayLike): Array of trough locations Raises: ValueError: If sampling rate is not greater than 0. ValueError: If 'peaks_locs' and/or 'troughs_locs' is not provided for the input_type 'cycle'. ValueError: If type is not 'cycle' or 'segment'. Returns: dict: Dictionary of calculated features. """ if sampling_rate <= 0: raise ValueError("Sampling rate must be greater than 0.") input_types = [x.lower() for x in input_types] features_stat = {} for type in input_types: if type == "cycle": for key, func in FEATURES_STAT_CYCLE.items(): if all(k in kwargs.keys() for k in ("peaks_locs", "troughs_locs")): peaks_amp = sig[kwargs["peaks_locs"]] try: features_stat["_".join([prefix, key])] = func( sig, peaks_amp, kwargs["peaks_locs"], kwargs["troughs_locs"], sampling_rate ) except: features_stat["_".join([prefix, key])] = np.nan else: raise ValueError("Missing keyword arguments for the input_type: 'cycle'!") elif type == "segment": for key, func in FEATURES_STAT_SEGMENT.items(): try: features_stat["_".join([prefix, key])] = func(sig) except: features_stat["_".join([prefix, key])] = np.nan else: raise ValueError("Type should be 'cycle' or 'segment'.") return features_stat