Source code for nsds_lab_to_nwb.components.stimulus.tokenizers.timit_tokenizer

from nsds_lab_to_nwb.components.stimulus.tokenizers.base_tokenizer import BaseTokenizer


[docs]class TIMITTokenizer(BaseTokenizer):
    """
    Tokenize into TIMIT stimulus trials.
    """
    def __init__(self, block_name, stim_configs):
        BaseTokenizer.__init__(self, block_name, stim_configs)
        self.tokenizer_type = 'TIMITTokenizer'

        # list of ('column_name', 'column_description')
        self.custom_trial_columns = [('sb', 'Stimulus (s) or baseline (b) period'),
                                     ('sample_filename', 'Sample Filename')]

    def _load_stim_parameters(self):
        stim_params_path = self.stim_configs['stim_params_path']
        stim_vals = timit_stimulus_values(stim_params_path)
        return stim_vals

    def _tokenize(self, stim_vals, stim_onsets,
                  *, audio_start_time, audio_end_time, rec_end_time):
        # bl_gap: gap between baseline and stimulus periods
        # (use the same value for both the pre- and post-stim baselines)
        bl_gap = self.stim_configs['baseline_start']
        if self.stim_configs.get('baseline_end', None) is not None:
            raise ValueError('baseline_end is assumed to have null/None value, '
                             'meaning that baselines extend to the ends of recoding')

        trial_list = []

        # period before the first stimulus starts
        stop_time = stim_onsets[0] - bl_gap
        if stop_time > 0.0:
            trial_list.append(dict(start_time=0.0,
                                   stop_time=stop_time,
                                   sb='b',
                                   sample_filename='none'))

        for i, onset in enumerate(stim_onsets):
            filename = str(stim_vals[i])
            try:
                stop_time = stim_onsets[i + 1]
            except IndexError:
                stop_time = audio_end_time
            trial_list.append(dict(start_time=onset,
                                   stop_time=stop_time,
                                   sb='s',
                                   sample_filename=filename))

        # period after the end of last stim trial until recording stops
        start_time = audio_end_time + bl_gap
        if start_time < rec_end_time:
            trial_list.append(dict(start_time=start_time,
                                   stop_time=rec_end_time,
                                   sb='b',
                                   sample_filename='none'))

        return trial_list


[docs]def timit_stimulus_values(file_path):
    """adapted from mars.configs.block_directory

    Parameters
    -----------
    file_path : full path to a .txt file that contains a list of filenames

    Returns
    --------
    stim_vals: list of str
        each item is a .wav file name in TIMIT.
    """
    # expecting a text file, one .wav filename string per row
    with open(file_path) as f:
        lines = f.readlines()
    stim_vals = [line.rstrip(' \n') for line in lines]
    return stim_vals