bci_essentials.signal_processing

Signal processing tools for processing trials of EEG data.

The EEG data inputs can be 2D or 3D arrays.

For single trials, inputs are of the shape n_channels x n_samples, where:
- n_channels = number of channels
- n_samples = number of samples
For multiple trials, inputs are of the shape n_trials x n_channels x n_samples, where:
- n_trials = number of trials
- n_channels = number of channels
- n_samples = number of samples

Outputs are the same dimensions as input (trials, channels, samples)

View Source

  1"""
  2Signal processing tools for processing trials of EEG data.
  3
  4The EEG data inputs can be 2D or 3D arrays.
  5- For single trials, inputs are of the shape `n_channels x n_samples`, where:
  6    - n_channels = number of channels
  7    - n_samples = number of samples
  8- For multiple trials, inputs are of the shape `n_trials x n_channels x n_samples`, where:
  9    - n_trials = number of trials
 10    - n_channels = number of channels
 11    - n_samples = number of samples
 12
 13- Outputs are the same dimensions as input (trials, channels, samples)
 14
 15"""
 16
 17import random
 18import functools
 19import numpy as np
 20from scipy import signal
 21from typing import Callable, Any
 22from imblearn.over_sampling import SMOTE
 23from .utils.logger import Logger  # Logger wrapper
 24
 25# Instantiate a logger for the module at the default level of logging.INFO
 26# Logs to bci_essentials.__module__) where __module__ is the name of the module
 27logger = Logger(name=__name__)
 28
 29
 30def validate_filter_input(func: Callable) -> Callable:
 31    """Decorator to validate input data for filter functions."""
 32
 33    @functools.wraps(func)
 34    def wrapper(data: np.ndarray, *args: Any, **kwargs: Any) -> np.ndarray:
 35        try:
 36            if not isinstance(data, np.ndarray):
 37                raise ValueError(
 38                    f"Input data for {func.__name__} must be a numpy array"
 39                )
 40
 41            if not (data.ndim in [2, 3]):
 42                raise ValueError(
 43                    f"Data shape for {func.__name__} must be 2D or 3D array"
 44                )
 45
 46            return func(data, *args, **kwargs)
 47        except Exception as e:
 48            logger.ERROR(f"Error in {func.__name__}: {str(e)}")
 49            return data
 50
 51    return wrapper
 52
 53
 54@validate_filter_input
 55def bandpass(data, f_low, f_high, order, fsample):
 56    """Bandpass Filter.
 57
 58    Filters out frequencies outside of the range f_low to f_high with a
 59    Butterworth filter of specific order.
 60
 61    Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.
 62
 63    Parameters
 64    ----------
 65    data : numpy.ndarray
 66        Trials of EEG data.
 67        3D (or 2D) array containing data with `float` type.
 68
 69        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
 70    f_low : float
 71        Lower cut-off frequency.
 72    f_high : float
 73        Upper cut-off frequency.
 74    order : int
 75        Order of the filter.
 76    fsample : float
 77        Sampling rate of signal.
 78
 79    Returns
 80    -------
 81    new_data : numpy.ndarray
 82        Trials of filtered EEG data.
 83        3D (or 2D) array containing data with `float` type.
 84
 85        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
 86
 87    """
 88    Wn = [f_low / (fsample / 2), f_high / (fsample / 2)]
 89    sos = signal.butter(order, Wn, btype="bandpass", output="sos")
 90
 91    filtered_data = signal.sosfiltfilt(sos, data)
 92
 93    return filtered_data
 94
 95
 96@validate_filter_input
 97def lowpass(data, f_cutoff, order, fsample):
 98    """Lowpass Filter.
 99
100    Filters out frequencies above f_critical with a Butterworth filter of specific order.
101
102    Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.
103
104    Parameters
105    ----------
106    data : numpy.ndarray
107        Trials of EEG data.
108        3D (or 2D) array containing data with `float` type.
109
110        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
111    f_cutoff : float
112        Cut-off frequency.
113    order : int
114        Order of the filter.
115    fsample : float
116        Sampling rate of signal.
117
118    Returns
119    -------
120    new_data : numpy.ndarray
121        Trials of filtered EEG data.
122        3D (or 2D) array containing data with `float` type.
123
124        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
125
126    """
127    Wn = f_cutoff / (fsample / 2)
128    sos = signal.butter(order, Wn, btype="lowpass", output="sos")
129
130    filtered_data = signal.sosfiltfilt(sos, data)
131
132    return filtered_data
133
134
135@validate_filter_input
136def highpass(data, f_cutoff, order, fsample):
137    """Highpass Filter.
138
139    Filters out frequencies below f_critical with a Butterworth filter of specific order.
140
141    Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.
142
143    Parameters
144    ----------
145    data : numpy.ndarray
146        Trials of EEG data.
147        3D (or 2D) array containing data with `float` type.
148
149        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
150    f_cutoff : float
151        Cut-off frequency.
152    order : int
153        Order of the filter.
154    fsample : float
155        Sampling rate of signal.
156
157    Returns
158    -------
159    new_data : numpy.ndarray
160        Trials of filtered EEG data.
161        3D (or 2D) array containing data with `float` type.
162
163        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
164    """
165    Wn = f_cutoff / (fsample / 2)
166    sos = signal.butter(order, Wn, btype="highpass", output="sos")
167
168    filtered_data = signal.sosfiltfilt(sos, data)
169
170    return filtered_data
171
172
173@validate_filter_input
174def notch(data, f_notch, Q, fsample):
175    """Notch Filter.
176
177    Notch filter for removing specific frequency components.
178
179    Parameters
180    ----------
181    data : numpy.ndarray
182        Trials of EEG data.
183        3D (or 2D) array containing data with `float` type.
184
185        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
186    f_notch : float
187        Frequency of notch.
188    Q : float
189        Quality factor. Dimensionless parameter that characterizes
190        notch filter -3 dB bandwidth bw relative to its
191        center frequency, Q = w0/bw.
192    fsample : float
193        Sampling rate of signal.
194
195    Returns
196    -------
197    new_data : numpy.ndarray
198        Trials of filtered EEG data.
199        3D array containing data with `float` type.
200
201        shape = (`n_trials`,`n_channels`,`n_samples`)
202
203    """
204
205    b, a = signal.iirnotch(f_notch, Q, fsample)
206    filtered_data = signal.filtfilt(b, a, data)
207
208    return filtered_data
209
210
211def lico(X, y, expansion_factor=3, sum_num=2, shuffle=False):
212    """Linear Combination Oversampling (LiCO)
213
214    Generates synthetic EEG trials from the minority class by creating weighted linear
215    combinations of existing trials, with added Gaussian noise for variability.
216    Automatically detects the minority class based on label distribution.
217
218    Parameters
219    ----------
220    X : numpy.ndarray
221        Trials of EEG data.
222        3D array containing data with `float` type.
223        shape = (n_trials, n_channels, n_samples)
224    y : numpy.ndarray
225        Labels corresponding to X.
226    expansion_factor : int, *optional*
227        Controls the amount of oversampling for the minority class.
228        The minority class size will be increased by this factor.
229        - Default is `3`.
230    sum_num : int, *optional*
231        Number of existing trials to combine for each synthetic trial.
232        Higher values create more complex combinations.
233        - Default is `2`.
234    shuffle : bool, *optional*
235        Whether to shuffle the final combined dataset.
236        - Default is `False`.
237
238    Returns
239    -------
240    over_X : numpy.ndarray
241        Original trials combined with synthetic trials.
242        shape = (n_expanded_trials, n_channels, n_samples)
243    over_y : numpy.ndarray
244        Labels for original and synthetic trials.
245        shape = (n_expanded_trials,)
246
247    """
248
249    # Find unique classes and their counts
250    classes, counts = np.unique(y, return_counts=True)
251
252    # Determine the minority class (class with the fewest samples)
253    minority_class = classes[np.argmin(counts)]
254    logger.debug("Minority class: %s", minority_class)
255    # Select the original EEG trials only corresponding to the minority class
256    minority_X = X[y == minority_class]
257    # Get the shape of the minority class data
258    n_minority, n_channels, n_samples = minority_X.shape
259    logger.debug("Shape of minority class: %s", minority_X.shape)
260
261    # Calculate number of new synthetic samples needed
262    n_synthetic_trials = int(n_minority * (expansion_factor - 1))
263    # Initialize array for synthetic samples
264    synthetic_X = np.zeros([n_synthetic_trials, n_channels, n_samples])
265    logger.debug("Shape of synthetic trials: %s", synthetic_X.shape)
266
267    # Generate synthetic trials by combining minority class samples with LiCO
268    for trial_idx in range(n_synthetic_trials):
269        # Generate random weights
270        weights = np.random.dirichlet(np.ones(sum_num), size=1)[0]
271
272        # For each new trial, create a random combination of existing trials
273        for j in range(sum_num):
274            random_trial_idx = random.randint(0, n_minority - 1)
275            random_epoch = minority_X[random_trial_idx, :, :]
276            synthetic_X[trial_idx, :, :] += weights[j] * random_epoch
277
278        # Add small noise for variability
279        # noise = np.random.normal(0, 0.01, size=synthetic_X[trial_idx, :, :].shape)
280        noise = np.random.normal(size=[n_channels, n_samples])
281        synthetic_X[trial_idx, :, :] += noise
282
283        # Normalize the new sample
284        synthetic_X[trial_idx, :, :] /= np.linalg.norm(synthetic_X[trial_idx, :, :])
285
286    # Combine original data with synthetic data
287    over_X = np.append(X, synthetic_X, axis=0)
288    over_y = np.append(y, np.ones([n_synthetic_trials], dtype=int))
289
290    logger.info("LiCO expanded data from %d to %d samples", len(y), len(over_y))
291    logger.info("Final class distribution: %s", np.bincount(over_y).tolist())
292
293    # Shuffle the data if requested
294    if shuffle:
295        indices = np.arange(len(over_y))
296        np.random.shuffle(indices)
297
298        over_X = over_X[indices]
299        over_y = over_y[indices]
300
301    return over_X, over_y
302
303
304def smote(X, y, expansion_factor=3, k_neighbors=5, shuffle=False, random_state=42):
305    """Oversampling using SMOTE (Synthetic Minority Over-sampling Technique)
306
307    Generates synthetic EEG trials from minority class (typically target/P300 responses).
308
309    Parameters
310    ----------
311    X : numpy.ndarray
312        Trials of EEG data.
313        3D array containing data with `float` type.
314        shape = (n_trials, n_channels, n_samples)
315    y : numpy.ndarray
316        Labels corresponding to X.
317    expansion_factor : float, *optional*
318        Controls the amount of oversampling for the minority class.
319        - Default is `3`.
320    k_neighbors : int, *optional*
321        Number of nearest neighbors to use for synthetic sample generation.
322        - Default is `5`.
323    shuffle : bool, *optional*
324        Whether to shuffle the final combined dataset.
325        - Default is `False`.
326    random_state : int, *optional*
327        Random seed for reproducibility.
328        - Default is `42`.
329
330    Returns
331    -------
332    over_X : numpy.ndarray
333        Oversampled X.
334    over_y : numpy.ndarray
335        Oversampled y.
336    """
337
338    # Get dimensions
339    n_trials, n_channels, n_samples = X.shape
340
341    # Find unique classes and their counts
342    classes, counts = np.unique(y, return_counts=True)
343    minority_class = classes[np.argmin(counts)]
344    n_minority = int(sum(y == minority_class) * expansion_factor)
345    sampling_strategy = {minority_class: n_minority}
346
347    # Reshape X to 2D for SMOTE (combine channels and samples)
348    X_reshaped = X.reshape(n_trials, n_channels * n_samples)
349
350    # Apply SMOTE
351    try:
352        # If not enough minority samples for k_neighbors, reduce k
353        if n_minority <= k_neighbors:
354            k_neighbors = max(1, n_minority - 1)
355            logger.warning(
356                "Reduced k_neighbors to %s due to small minority class", k_neighbors
357            )
358
359        # Configure and apply SMOTE
360        smote = SMOTE(
361            sampling_strategy=sampling_strategy,
362            k_neighbors=k_neighbors,
363            random_state=random_state,
364        )
365        X_resampled, y_resampled = smote.fit_resample(X_reshaped, y)
366
367        # Reshape back to 3D
368        X_resampled = X_resampled.reshape(-1, n_channels, n_samples)
369
370        # Shuffle if requested
371        if shuffle:
372            indices = np.arange(len(y_resampled))
373            np.random.shuffle(indices)
374            X_resampled = X_resampled[indices]
375            y_resampled = y_resampled[indices]
376
377        logger.info(
378            "SMOTE expanded data from %s to %s samples", len(y), len(y_resampled)
379        )
380        logger.info("New class balance: %s/%s", sum(y_resampled == 1), len(y_resampled))
381
382        return X_resampled, y_resampled
383
384    except ValueError as e:
385        logger.error("SMOTE failed: %s. Returning original data.", e)
386        return X, y
387
388
389def random_oversampling(X, y, ratio):
390    """Random Oversampling
391
392    Randomly samples epochs of X to oversample the MINORITY class.
393    Automatically determines which class is the MINORITY class.
394
395    Parameters
396    ----------
397    X : numpy.ndarray [n_trials, n_channels, n_samples]
398        Trials of EEG data.
399        3D array containing data with `float` type.
400    y : numpy.ndarray [n_trials]
401        Labels corresponding to X.
402    ratio : float
403        Desired ratio of MINORITY class samples to majority class samples
404        - ratio=1 means the number of MINORITY class samples will be equal to the number of majority class samples
405        - ratio=0.5 means the number of MINORITY class samples will be half the number of majority class samples
406        - ratio=2 means the number of MINORITY class samples will be twice the number of majority class samples
407
408    Returns
409    -------
410    over_X : numpy.ndarray
411        Oversampled X.
412    over_y : numpy.ndarray
413        Oversampled y.
414    """
415    # Find unique classes and their counts
416    classes, counts = np.unique(y, return_counts=True)
417
418    # Determine minority and majority classes
419    minority_class = classes[np.argmin(counts)]
420    n_minority = np.min(counts)
421    n_majority = np.max(counts)
422
423    # Get minority class samples
424    minority_X = X[y == minority_class]
425
426    # Calculate number of samples needed
427    n_samples = int(n_majority * ratio) - n_minority
428
429    # Generate new samples
430    new_X = np.zeros([n_samples, X.shape[1], X.shape[2]])
431    for i in range(n_samples):
432        new_X[i, :, :] = minority_X[random.choice(range(n_minority)), :, :]
433
434    over_X = np.append(X, new_X, axis=0)
435    over_y = np.append(y, np.ones([n_samples]) * minority_class)
436
437    return over_X, over_y
438
439
440def random_undersampling(X, y, ratio):
441    """Random Undersampling
442
443    Randomly removes epochs of X to undersample the MAJORITY class.
444    Automatically determines which class is the MAJORITYajority class.
445
446    Parameters
447    ----------
448    X : numpy.ndarray [n_trials, n_channels, n_samples]
449        Trials of EEG data.
450        3D array containing data with `float` type.
451    y : numpy.ndarray [n_trials]
452        Labels corresponding to X.
453    ratio : float
454        Desired ratio of MAJORITY class samples to minority class samples.
455        - ratio=1 means the number of MAJORITY class samples will be equal to the number of minority class samples
456        - ratio=0.5 means the number of MAJORITY class samples will be half the number of minority class samples
457        - ratio=2 means the number of MAJORITY class samples will be twice the number of minority class samples
458
459    Returns
460    -------
461    under_X : numpy.ndarray
462        Undersampled X.
463    under_y : numpy.ndarray
464        Undersampled y.
465    """
466    # Find unique classes and their counts
467    classes, counts = np.unique(y, return_counts=True)
468
469    # Determine minority and majority classes
470    majority_class = classes[np.argmax(counts)]
471    minority_class = classes[np.argmin(counts)]
472    n_minority = np.min(counts)
473
474    # Calculate number of majority samples to keep
475    n_samples = int(n_minority * ratio)
476
477    # Get indices of majority class samples
478    majority_indices = np.where(y == majority_class)[0]
479
480    # Randomly select indices to keep
481    keep_indices = np.random.choice(majority_indices, size=n_samples, replace=False)
482
483    # Get indices of minority class samples
484    minority_indices = np.where(y == minority_class)[0]
485
486    # Combine indices
487    all_indices = np.concatenate([keep_indices, minority_indices])
488
489    # Create undersampled datasets
490    under_X = X[all_indices]
491    under_y = y[all_indices]
492
493    return under_X, under_y

logger = <bci_essentials.utils.logger.Logger object>

def validate_filter_input(func: Callable) -> Callable: View Source

31def validate_filter_input(func: Callable) -> Callable:
32    """Decorator to validate input data for filter functions."""
33
34    @functools.wraps(func)
35    def wrapper(data: np.ndarray, *args: Any, **kwargs: Any) -> np.ndarray:
36        try:
37            if not isinstance(data, np.ndarray):
38                raise ValueError(
39                    f"Input data for {func.__name__} must be a numpy array"
40                )
41
42            if not (data.ndim in [2, 3]):
43                raise ValueError(
44                    f"Data shape for {func.__name__} must be 2D or 3D array"
45                )
46
47            return func(data, *args, **kwargs)
48        except Exception as e:
49            logger.ERROR(f"Error in {func.__name__}: {str(e)}")
50            return data
51
52    return wrapper

Decorator to validate input data for filter functions.

@validate_filter_input

def bandpass(data, f_low, f_high, order, fsample): View Source

55@validate_filter_input
56def bandpass(data, f_low, f_high, order, fsample):
57    """Bandpass Filter.
58
59    Filters out frequencies outside of the range f_low to f_high with a
60    Butterworth filter of specific order.
61
62    Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.
63
64    Parameters
65    ----------
66    data : numpy.ndarray
67        Trials of EEG data.
68        3D (or 2D) array containing data with `float` type.
69
70        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
71    f_low : float
72        Lower cut-off frequency.
73    f_high : float
74        Upper cut-off frequency.
75    order : int
76        Order of the filter.
77    fsample : float
78        Sampling rate of signal.
79
80    Returns
81    -------
82    new_data : numpy.ndarray
83        Trials of filtered EEG data.
84        3D (or 2D) array containing data with `float` type.
85
86        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
87
88    """
89    Wn = [f_low / (fsample / 2), f_high / (fsample / 2)]
90    sos = signal.butter(order, Wn, btype="bandpass", output="sos")
91
92    filtered_data = signal.sosfiltfilt(sos, data)
93
94    return filtered_data

Bandpass Filter.

Filters out frequencies outside of the range f_low to f_high with a Butterworth filter of specific order.

Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.

Parameters

data (numpy.ndarray): Trials of EEG data. 3D (or 2D) array containing data with float type.

shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
f_low (float): Lower cut-off frequency.
f_high (float): Upper cut-off frequency.
order (int): Order of the filter.
fsample (float): Sampling rate of signal.

Returns

new_data (numpy.ndarray): Trials of filtered EEG data. 3D (or 2D) array containing data with float type.

shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)

@validate_filter_input

def lowpass(data, f_cutoff, order, fsample): View Source

 97@validate_filter_input
 98def lowpass(data, f_cutoff, order, fsample):
 99    """Lowpass Filter.
100
101    Filters out frequencies above f_critical with a Butterworth filter of specific order.
102
103    Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.
104
105    Parameters
106    ----------
107    data : numpy.ndarray
108        Trials of EEG data.
109        3D (or 2D) array containing data with `float` type.
110
111        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
112    f_cutoff : float
113        Cut-off frequency.
114    order : int
115        Order of the filter.
116    fsample : float
117        Sampling rate of signal.
118
119    Returns
120    -------
121    new_data : numpy.ndarray
122        Trials of filtered EEG data.
123        3D (or 2D) array containing data with `float` type.
124
125        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
126
127    """
128    Wn = f_cutoff / (fsample / 2)
129    sos = signal.butter(order, Wn, btype="lowpass", output="sos")
130
131    filtered_data = signal.sosfiltfilt(sos, data)
132
133    return filtered_data

Lowpass Filter.

Filters out frequencies above f_critical with a Butterworth filter of specific order.

Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.

Parameters

data (numpy.ndarray): Trials of EEG data. 3D (or 2D) array containing data with float type.

shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
f_cutoff (float): Cut-off frequency.
order (int): Order of the filter.
fsample (float): Sampling rate of signal.

Returns

new_data (numpy.ndarray): Trials of filtered EEG data. 3D (or 2D) array containing data with float type.

shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)

@validate_filter_input

def highpass(data, f_cutoff, order, fsample): View Source

136@validate_filter_input
137def highpass(data, f_cutoff, order, fsample):
138    """Highpass Filter.
139
140    Filters out frequencies below f_critical with a Butterworth filter of specific order.
141
142    Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.
143
144    Parameters
145    ----------
146    data : numpy.ndarray
147        Trials of EEG data.
148        3D (or 2D) array containing data with `float` type.
149
150        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
151    f_cutoff : float
152        Cut-off frequency.
153    order : int
154        Order of the filter.
155    fsample : float
156        Sampling rate of signal.
157
158    Returns
159    -------
160    new_data : numpy.ndarray
161        Trials of filtered EEG data.
162        3D (or 2D) array containing data with `float` type.
163
164        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
165    """
166    Wn = f_cutoff / (fsample / 2)
167    sos = signal.butter(order, Wn, btype="highpass", output="sos")
168
169    filtered_data = signal.sosfiltfilt(sos, data)
170
171    return filtered_data

Highpass Filter.

Filters out frequencies below f_critical with a Butterworth filter of specific order.

Wraps the scipy.signal.butter and scipy.signal.sosfiltfilt methods.

Parameters

data (numpy.ndarray): Trials of EEG data. 3D (or 2D) array containing data with float type.

shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
f_cutoff (float): Cut-off frequency.
order (int): Order of the filter.
fsample (float): Sampling rate of signal.

Returns

new_data (numpy.ndarray): Trials of filtered EEG data. 3D (or 2D) array containing data with float type.

shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)

@validate_filter_input

def notch(data, f_notch, Q, fsample): View Source

174@validate_filter_input
175def notch(data, f_notch, Q, fsample):
176    """Notch Filter.
177
178    Notch filter for removing specific frequency components.
179
180    Parameters
181    ----------
182    data : numpy.ndarray
183        Trials of EEG data.
184        3D (or 2D) array containing data with `float` type.
185
186        shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
187    f_notch : float
188        Frequency of notch.
189    Q : float
190        Quality factor. Dimensionless parameter that characterizes
191        notch filter -3 dB bandwidth bw relative to its
192        center frequency, Q = w0/bw.
193    fsample : float
194        Sampling rate of signal.
195
196    Returns
197    -------
198    new_data : numpy.ndarray
199        Trials of filtered EEG data.
200        3D array containing data with `float` type.
201
202        shape = (`n_trials`,`n_channels`,`n_samples`)
203
204    """
205
206    b, a = signal.iirnotch(f_notch, Q, fsample)
207    filtered_data = signal.filtfilt(b, a, data)
208
209    return filtered_data

Notch Filter.

Notch filter for removing specific frequency components.

Parameters

data (numpy.ndarray): Trials of EEG data. 3D (or 2D) array containing data with float type.

shape = (n_trials, n_channels, n_samples) or (n_channels, n_samples)
f_notch (float): Frequency of notch.
Q (float): Quality factor. Dimensionless parameter that characterizes notch filter -3 dB bandwidth bw relative to its center frequency, Q = w0/bw.
fsample (float): Sampling rate of signal.

Returns

new_data (numpy.ndarray): Trials of filtered EEG data. 3D array containing data with float type.

shape = (n_trials,n_channels,n_samples)

def lico(X, y, expansion_factor=3, sum_num=2, shuffle=False): View Source

212def lico(X, y, expansion_factor=3, sum_num=2, shuffle=False):
213    """Linear Combination Oversampling (LiCO)
214
215    Generates synthetic EEG trials from the minority class by creating weighted linear
216    combinations of existing trials, with added Gaussian noise for variability.
217    Automatically detects the minority class based on label distribution.
218
219    Parameters
220    ----------
221    X : numpy.ndarray
222        Trials of EEG data.
223        3D array containing data with `float` type.
224        shape = (n_trials, n_channels, n_samples)
225    y : numpy.ndarray
226        Labels corresponding to X.
227    expansion_factor : int, *optional*
228        Controls the amount of oversampling for the minority class.
229        The minority class size will be increased by this factor.
230        - Default is `3`.
231    sum_num : int, *optional*
232        Number of existing trials to combine for each synthetic trial.
233        Higher values create more complex combinations.
234        - Default is `2`.
235    shuffle : bool, *optional*
236        Whether to shuffle the final combined dataset.
237        - Default is `False`.
238
239    Returns
240    -------
241    over_X : numpy.ndarray
242        Original trials combined with synthetic trials.
243        shape = (n_expanded_trials, n_channels, n_samples)
244    over_y : numpy.ndarray
245        Labels for original and synthetic trials.
246        shape = (n_expanded_trials,)
247
248    """
249
250    # Find unique classes and their counts
251    classes, counts = np.unique(y, return_counts=True)
252
253    # Determine the minority class (class with the fewest samples)
254    minority_class = classes[np.argmin(counts)]
255    logger.debug("Minority class: %s", minority_class)
256    # Select the original EEG trials only corresponding to the minority class
257    minority_X = X[y == minority_class]
258    # Get the shape of the minority class data
259    n_minority, n_channels, n_samples = minority_X.shape
260    logger.debug("Shape of minority class: %s", minority_X.shape)
261
262    # Calculate number of new synthetic samples needed
263    n_synthetic_trials = int(n_minority * (expansion_factor - 1))
264    # Initialize array for synthetic samples
265    synthetic_X = np.zeros([n_synthetic_trials, n_channels, n_samples])
266    logger.debug("Shape of synthetic trials: %s", synthetic_X.shape)
267
268    # Generate synthetic trials by combining minority class samples with LiCO
269    for trial_idx in range(n_synthetic_trials):
270        # Generate random weights
271        weights = np.random.dirichlet(np.ones(sum_num), size=1)[0]
272
273        # For each new trial, create a random combination of existing trials
274        for j in range(sum_num):
275            random_trial_idx = random.randint(0, n_minority - 1)
276            random_epoch = minority_X[random_trial_idx, :, :]
277            synthetic_X[trial_idx, :, :] += weights[j] * random_epoch
278
279        # Add small noise for variability
280        # noise = np.random.normal(0, 0.01, size=synthetic_X[trial_idx, :, :].shape)
281        noise = np.random.normal(size=[n_channels, n_samples])
282        synthetic_X[trial_idx, :, :] += noise
283
284        # Normalize the new sample
285        synthetic_X[trial_idx, :, :] /= np.linalg.norm(synthetic_X[trial_idx, :, :])
286
287    # Combine original data with synthetic data
288    over_X = np.append(X, synthetic_X, axis=0)
289    over_y = np.append(y, np.ones([n_synthetic_trials], dtype=int))
290
291    logger.info("LiCO expanded data from %d to %d samples", len(y), len(over_y))
292    logger.info("Final class distribution: %s", np.bincount(over_y).tolist())
293
294    # Shuffle the data if requested
295    if shuffle:
296        indices = np.arange(len(over_y))
297        np.random.shuffle(indices)
298
299        over_X = over_X[indices]
300        over_y = over_y[indices]
301
302    return over_X, over_y

Linear Combination Oversampling (LiCO)

Generates synthetic EEG trials from the minority class by creating weighted linear combinations of existing trials, with added Gaussian noise for variability. Automatically detects the minority class based on label distribution.

Parameters

X (numpy.ndarray): Trials of EEG data. 3D array containing data with float type. shape = (n_trials, n_channels, n_samples)
y (numpy.ndarray): Labels corresponding to X.
expansion_factor (int, optional): Controls the amount of oversampling for the minority class. The minority class size will be increased by this factor.
- Default is 3.
sum_num (int, optional): Number of existing trials to combine for each synthetic trial. Higher values create more complex combinations.
- Default is 2.
shuffle (bool, optional): Whether to shuffle the final combined dataset.
- Default is False.

Returns

over_X (numpy.ndarray): Original trials combined with synthetic trials. shape = (n_expanded_trials, n_channels, n_samples)
over_y (numpy.ndarray): Labels for original and synthetic trials. shape = (n_expanded_trials,)

def smote( X, y, expansion_factor=3, k_neighbors=5, shuffle=False, random_state=42): View Source

305def smote(X, y, expansion_factor=3, k_neighbors=5, shuffle=False, random_state=42):
306    """Oversampling using SMOTE (Synthetic Minority Over-sampling Technique)
307
308    Generates synthetic EEG trials from minority class (typically target/P300 responses).
309
310    Parameters
311    ----------
312    X : numpy.ndarray
313        Trials of EEG data.
314        3D array containing data with `float` type.
315        shape = (n_trials, n_channels, n_samples)
316    y : numpy.ndarray
317        Labels corresponding to X.
318    expansion_factor : float, *optional*
319        Controls the amount of oversampling for the minority class.
320        - Default is `3`.
321    k_neighbors : int, *optional*
322        Number of nearest neighbors to use for synthetic sample generation.
323        - Default is `5`.
324    shuffle : bool, *optional*
325        Whether to shuffle the final combined dataset.
326        - Default is `False`.
327    random_state : int, *optional*
328        Random seed for reproducibility.
329        - Default is `42`.
330
331    Returns
332    -------
333    over_X : numpy.ndarray
334        Oversampled X.
335    over_y : numpy.ndarray
336        Oversampled y.
337    """
338
339    # Get dimensions
340    n_trials, n_channels, n_samples = X.shape
341
342    # Find unique classes and their counts
343    classes, counts = np.unique(y, return_counts=True)
344    minority_class = classes[np.argmin(counts)]
345    n_minority = int(sum(y == minority_class) * expansion_factor)
346    sampling_strategy = {minority_class: n_minority}
347
348    # Reshape X to 2D for SMOTE (combine channels and samples)
349    X_reshaped = X.reshape(n_trials, n_channels * n_samples)
350
351    # Apply SMOTE
352    try:
353        # If not enough minority samples for k_neighbors, reduce k
354        if n_minority <= k_neighbors:
355            k_neighbors = max(1, n_minority - 1)
356            logger.warning(
357                "Reduced k_neighbors to %s due to small minority class", k_neighbors
358            )
359
360        # Configure and apply SMOTE
361        smote = SMOTE(
362            sampling_strategy=sampling_strategy,
363            k_neighbors=k_neighbors,
364            random_state=random_state,
365        )
366        X_resampled, y_resampled = smote.fit_resample(X_reshaped, y)
367
368        # Reshape back to 3D
369        X_resampled = X_resampled.reshape(-1, n_channels, n_samples)
370
371        # Shuffle if requested
372        if shuffle:
373            indices = np.arange(len(y_resampled))
374            np.random.shuffle(indices)
375            X_resampled = X_resampled[indices]
376            y_resampled = y_resampled[indices]
377
378        logger.info(
379            "SMOTE expanded data from %s to %s samples", len(y), len(y_resampled)
380        )
381        logger.info("New class balance: %s/%s", sum(y_resampled == 1), len(y_resampled))
382
383        return X_resampled, y_resampled
384
385    except ValueError as e:
386        logger.error("SMOTE failed: %s. Returning original data.", e)
387        return X, y

Oversampling using SMOTE (Synthetic Minority Over-sampling Technique)

Generates synthetic EEG trials from minority class (typically target/P300 responses).

Parameters

X (numpy.ndarray): Trials of EEG data. 3D array containing data with float type. shape = (n_trials, n_channels, n_samples)
y (numpy.ndarray): Labels corresponding to X.
expansion_factor (float, optional): Controls the amount of oversampling for the minority class.
- Default is 3.
k_neighbors (int, optional): Number of nearest neighbors to use for synthetic sample generation.
- Default is 5.
shuffle (bool, optional): Whether to shuffle the final combined dataset.
- Default is False.
random_state (int, optional): Random seed for reproducibility.
- Default is 42.

Returns

over_X (numpy.ndarray): Oversampled X.
over_y (numpy.ndarray): Oversampled y.

def random_oversampling(X, y, ratio): View Source

390def random_oversampling(X, y, ratio):
391    """Random Oversampling
392
393    Randomly samples epochs of X to oversample the MINORITY class.
394    Automatically determines which class is the MINORITY class.
395
396    Parameters
397    ----------
398    X : numpy.ndarray [n_trials, n_channels, n_samples]
399        Trials of EEG data.
400        3D array containing data with `float` type.
401    y : numpy.ndarray [n_trials]
402        Labels corresponding to X.
403    ratio : float
404        Desired ratio of MINORITY class samples to majority class samples
405        - ratio=1 means the number of MINORITY class samples will be equal to the number of majority class samples
406        - ratio=0.5 means the number of MINORITY class samples will be half the number of majority class samples
407        - ratio=2 means the number of MINORITY class samples will be twice the number of majority class samples
408
409    Returns
410    -------
411    over_X : numpy.ndarray
412        Oversampled X.
413    over_y : numpy.ndarray
414        Oversampled y.
415    """
416    # Find unique classes and their counts
417    classes, counts = np.unique(y, return_counts=True)
418
419    # Determine minority and majority classes
420    minority_class = classes[np.argmin(counts)]
421    n_minority = np.min(counts)
422    n_majority = np.max(counts)
423
424    # Get minority class samples
425    minority_X = X[y == minority_class]
426
427    # Calculate number of samples needed
428    n_samples = int(n_majority * ratio) - n_minority
429
430    # Generate new samples
431    new_X = np.zeros([n_samples, X.shape[1], X.shape[2]])
432    for i in range(n_samples):
433        new_X[i, :, :] = minority_X[random.choice(range(n_minority)), :, :]
434
435    over_X = np.append(X, new_X, axis=0)
436    over_y = np.append(y, np.ones([n_samples]) * minority_class)
437
438    return over_X, over_y

Random Oversampling

Randomly samples epochs of X to oversample the MINORITY class. Automatically determines which class is the MINORITY class.

Parameters

X (numpy.ndarray [n_trials, n_channels, n_samples]): Trials of EEG data. 3D array containing data with float type.
y (numpy.ndarray [n_trials]): Labels corresponding to X.
ratio (float): Desired ratio of MINORITY class samples to majority class samples
- ratio=1 means the number of MINORITY class samples will be equal to the number of majority class samples
- ratio=0.5 means the number of MINORITY class samples will be half the number of majority class samples
- ratio=2 means the number of MINORITY class samples will be twice the number of majority class samples

Returns

over_X (numpy.ndarray): Oversampled X.
over_y (numpy.ndarray): Oversampled y.

def random_undersampling(X, y, ratio): View Source

441def random_undersampling(X, y, ratio):
442    """Random Undersampling
443
444    Randomly removes epochs of X to undersample the MAJORITY class.
445    Automatically determines which class is the MAJORITYajority class.
446
447    Parameters
448    ----------
449    X : numpy.ndarray [n_trials, n_channels, n_samples]
450        Trials of EEG data.
451        3D array containing data with `float` type.
452    y : numpy.ndarray [n_trials]
453        Labels corresponding to X.
454    ratio : float
455        Desired ratio of MAJORITY class samples to minority class samples.
456        - ratio=1 means the number of MAJORITY class samples will be equal to the number of minority class samples
457        - ratio=0.5 means the number of MAJORITY class samples will be half the number of minority class samples
458        - ratio=2 means the number of MAJORITY class samples will be twice the number of minority class samples
459
460    Returns
461    -------
462    under_X : numpy.ndarray
463        Undersampled X.
464    under_y : numpy.ndarray
465        Undersampled y.
466    """
467    # Find unique classes and their counts
468    classes, counts = np.unique(y, return_counts=True)
469
470    # Determine minority and majority classes
471    majority_class = classes[np.argmax(counts)]
472    minority_class = classes[np.argmin(counts)]
473    n_minority = np.min(counts)
474
475    # Calculate number of majority samples to keep
476    n_samples = int(n_minority * ratio)
477
478    # Get indices of majority class samples
479    majority_indices = np.where(y == majority_class)[0]
480
481    # Randomly select indices to keep
482    keep_indices = np.random.choice(majority_indices, size=n_samples, replace=False)
483
484    # Get indices of minority class samples
485    minority_indices = np.where(y == minority_class)[0]
486
487    # Combine indices
488    all_indices = np.concatenate([keep_indices, minority_indices])
489
490    # Create undersampled datasets
491    under_X = X[all_indices]
492    under_y = y[all_indices]
493
494    return under_X, under_y

Random Undersampling

Randomly removes epochs of X to undersample the MAJORITY class. Automatically determines which class is the MAJORITYajority class.

Parameters

X (numpy.ndarray [n_trials, n_channels, n_samples]): Trials of EEG data. 3D array containing data with float type.
y (numpy.ndarray [n_trials]): Labels corresponding to X.
ratio (float): Desired ratio of MAJORITY class samples to minority class samples.
- ratio=1 means the number of MAJORITY class samples will be equal to the number of minority class samples
- ratio=0.5 means the number of MAJORITY class samples will be half the number of minority class samples
- ratio=2 means the number of MAJORITY class samples will be twice the number of minority class samples

Returns

under_X (numpy.ndarray): Undersampled X.
under_y (numpy.ndarray): Undersampled y.