Source code for nsds_lab_to_nwb.components.stimulus.tokenizers.base_tokenizer

import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)


[docs]class BaseTokenizer():
    """ Base Tokenizer class for auditory stimulus data
    """
    def __init__(self, block_name, stim_configs):
        self.block_name = block_name
        self.stim_configs = stim_configs

        self.tokenizer_type = 'BaseTokenizer'
        self.custom_trial_columns = None
        self.audio_start_time = None

[docs]    def tokenize(self, mark_events, rec_end_time):

        if self.stim_configs['name'] == 'baseline':
            # using SingleTokenizer._tokenize
            trial_list = self._tokenize(None, None, rec_end_time=rec_end_time)
            return trial_list

        mark_offset = self.stim_configs['mark_offset']      # from mark to actual stim onset
        stim_onsets = mark_events + mark_offset

        stim_start_time = stim_onsets[0]
        audio_start_time = mark_events[0] - self.stim_configs['first_mark']
        audio_end_time = audio_start_time + self.stim_configs['play_length']
        last_marker_time = mark_events[-1]

        stim_name = self.stim_configs['name']
        logger.debug(f'Tokenizing {stim_name} stimulus.')
        logger.debug(f'audio file start time: {audio_start_time}')
        logger.debug(f'stim onset: {stim_start_time}')
        logger.debug(f'last marker: {last_marker_time}')
        logger.debug(f'audio file end time: {audio_end_time} ')
        logger.debug(f'recording end time: {rec_end_time}')

        self._validate_num_stim_onsets(stim_onsets)
        self.audio_start_time = audio_start_time

        stim_vals = self._load_stim_parameters()
        if stim_vals is not None:
            if len(stim_vals) != self.stim_configs['nsamples']:
                raise ValueError('incorrect number of stimulus parameter sets found.')

        trial_list = self._tokenize(stim_vals, stim_onsets,
                                    audio_start_time=audio_start_time,
                                    audio_end_time=audio_end_time,
                                    rec_end_time=rec_end_time)
        return trial_list

    def _load_stim_parameters(self):
        # override in Tone and TIMIT tokenizers
        return None

    def _tokenize(self, stim_vals, stim_onsets, **kwargs):
        raise NotImplementedError

    def _validate_num_stim_onsets(self, stim_onsets):
        ''' Validate that the number of identified stim onsets
        is equal to the known number of stimulus trials.
        '''
        num_onsets = len(stim_onsets)
        num_expected_trials = self.stim_configs['nsamples']
        mismatch_msg = (
            f"{self.tokenizer_type}: "
            + "Incorrect number of stimulus onsets found "
            + f"in block {self.block_name}. "
            + f"Expected {num_expected_trials}, found {num_onsets}.")

        if num_onsets != num_expected_trials:
            raise ValueError(mismatch_msg)