Package `fed3`

fed3 is a Python package for working with FED3 data.

Expand source code

# -*- coding: utf-8 -*-

'''fed3 is a Python package for working with FED3 data.'''

# set warnings style to remove reprinting of warning
import warnings as __warnings

def __warning_on_one_line(message, category, filename, lineno, file=None, line=None):
        return '%s:%s: %s: %s\n' % (filename, lineno, category.__name__, message)

__warnings.formatwarning = __warning_on_one_line

# version
from ._version import v
__version__ = v
del v

#imports for package namespace
from fed3.core import (FEDFrame,
                       as_aligned,
                       can_concat,
                       concat,
                       load,
                       split,
                       timecrop)

from fed3.examples import list_examples, load_examples

from fed3.lightcycle import set_lightcycle

from fed3.metrics import get_metric, list_metrics

__all__ = [
    'FEDFrame',
    'as_aligned',
    'can_concat',
    'concat',
    'load',
    'split',
    'timecrop',
    'list_examples',
    'load_examples',
    'set_lightcycle',
    'get_metric',
    'list_metrics'
    ]

Sub-modules

fed3.core: This packge defines the major FEDFrame class (FEDFrame) for representing fed3 data. It is a subclass of pandas DataFrame. Other …
fed3.examples: This package provides example data for fed3. The package comes bundled with some CSV files of FED3 data. These can be loaded via fed3, returning …
fed3.lightcycle: General code for helping with the light cycle for FED3 data.
fed3.metrics: This packages defines functions for extracting temporal variables from FED3Frames, and tools for collecting those variables into tables. This is …
fed3.plot: Plotting with fed3 …

Functions

def as_aligned(feds, alignment, inplace=False)

Helper function for setting the alignment of one or more FEDFrames. See FEDFrame.set_alignment() for more information.

Parameters

feds : FEDFrame or collection of FEDFrames: FEDFrames to set alignment for
alignment : 'str':: Alignment string.
inplace : bool: When True, the FEDFrames are modified in place; otherwise, new copies are created.

Returns

aligned or None: Either one FEDFrame or a list of FEDFrames with new alignment..

Expand source code

def as_aligned(feds, alignment, inplace=False):
    '''
    Helper function for setting the alignment of one or more FEDFrames.
    See `fed3.core.fedframe.FEDFrame.set_alignment()` for more information.

    Parameters
    ----------
    feds : FEDFrame or collection of FEDFrames
        FEDFrames to set alignment for
    alignment: 'str':
        Alignment string.
    inplace : bool
        When True, the FEDFrames are modified in place; otherwise,
        new copies are created.

    Returns
    -------
    aligned or None
        Either one FEDFrame or a list of FEDFrames with new alignment..

    '''
    if isinstance(feds, FEDFrame):
        aligned = feds.set_alignment(alignment, inplace=inplace)
    else:
        aligned = [f.set_alignment(alignment) for f in feds]

    return aligned

def can_concat(feds)

Determines whether or not FEDFrames can be concatenated, (based on whether their start and end times overlap).

Parameters

feds : array: an array of FEDFrames

Returns

bool

Expand source code

def can_concat(feds):
    """
    Determines whether or not FEDFrames can be concatenated, (based on whether
    their start and end times overlap).

    Parameters
    ----------
    feds : array
        an array of FEDFrames

    Returns
    -------
    bool

    """
    sorted_feds = sorted(feds, key=lambda x: x.start_time)
    for i, file in enumerate(sorted_feds[1:], start=1):
        if file.start_time <= sorted_feds[i-1].end_time:
            return False
    return True

def concat(feds, name=None, add_concat_number=True, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'))

Concatenated FED3 data in time.

Parameters

feds : collection of FEDFrame objects: List or other collection of FEDFrame
name : str, optional: Name to give the new FEDFrame with concatenated data. The default is None, in which case the name of the first FEDFrame is used.
add_concat_number : bool, optional: Adds a column keeping record of the concatenation. The default is True.
reset_columns : list-like, optional: Columns whose counts should be modified in order to preserve counts across the concatenated data. The default is ('Pellet_Count', 'Left_Poke_Count','Right_Poke_Count').

Raises

ValueError: Cannot concatenated FED data when the timestamps are overlapping.

Returns

newfed : FEDFrame: New FEDFrame object with concatenated data.

Expand source code

def concat(feds, name=None, add_concat_number=True,
           reset_columns=('Pellet_Count', 'Left_Poke_Count','Right_Poke_Count')):
    '''
    Concatenated FED3 data in time.

    Parameters
    ----------
    feds : collection of FEDFrame objects
        List or other collection of FEDFrame
    name : str, optional
        Name to give the new FEDFrame with concatenated data.
        The default is None, in which case the name of the first FEDFrame
        is used.
    add_concat_number : bool, optional
        Adds a column keeping record of the concatenation. The default is True.
    reset_columns : list-like, optional
        Columns whose counts should be modified in order to preserve counts
        across the concatenated data.  The default is
        `('Pellet_Count', 'Left_Poke_Count','Right_Poke_Count')`.

    Raises
    ------
    ValueError
        Cannot concatenated FED data when the timestamps are overlapping.

    Returns
    -------
    newfed : fed3.FEDFrame
        New FEDFrame object with concatenated data.

    '''

    if name is None:
        name = feds[0].name

    if not can_concat(feds):
        raise ValueError('FEDFrame dates overlap, cannot concat.')

    output=[]
    offsets = {}

    sorted_feds = sorted(feds, key=lambda x: x.start_time)

    for i, fed in enumerate(sorted_feds):
        df = fed.copy()
        if add_concat_number:
            df['Concat_#'] = i

        if i==0:
            for col in reset_columns:
                if col in df.columns:
                    offsets[col] = df[col].max()

        else:
            for col, offset in offsets.items():
                df[col] += offset
                offsets[col] = df[col].max()

        output.append(df)

    newfed = pd.concat(output)
    newfed._load_init(name=name)

    return newfed

def get_metric(y)

Return a metric function from its key.

Parameters

y : str: Key for metric.

Raises

KeyError: Metric key not recognized.

Returns

namedtuple: Named tuple with a func and nicename attribute. The func is the actual metric function, which can be called on FEDFrames. The nicename is a nicer version of the key, used for axis labels.

Expand source code

def get_metric(y):
    '''
    Return a metric function from its key.

    Parameters
    ----------
    y : str
        Key for metric.

    Raises
    ------
    KeyError
        Metric key not recognized.

    Returns
    -------
    namedtuple
        Named tuple with a `func` and `nicename` attribute.  The `func`
        is the actual metric function, which can be called on FEDFrames.
        The `nicename` is a nicer version of the key, used for axis labels.

    '''

    key = y.lower()
    try:
        return METRICS[key]
    except KeyError:
        metrics = ', '.join(f"'{m}'" for m in METRICS.keys())
        raise ValueError(f'Metric key "{y}" is not recognized. Possible metrics are: '
                         f'{metrics}.')

def list_examples()

List all the available example data sets - specifically the string keys which can be provided to load_examples().

Returns

list: All avaiable keys.

Expand source code

def list_examples():
    '''
    List all the available example data sets - specifically the string
    keys which can be provided to `load_examples()`.

    Returns
    -------
    list
        All avaiable keys.
    '''

    examples = []
    for folder in sorted(os.listdir(DATADIR)):
        fullfolder = os.path.join(DATADIR, folder)
        if not os.path.isdir(fullfolder): continue;
        examples.append(folder)

    return examples

def list_metrics()

List all available metric keys.

Returns

list

Expand source code

def list_metrics():
    '''
    List all available metric keys.

    Returns
    -------
    list

    '''
    return list(METRICS.keys())

def load(path, index_col='MM:DD:YYYY hh:mm:ss', dropna=True, deduplicate_index=None, offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'))

Load FED3 data from a CSV/Excel file. This is the typical recommended way for importing FED3 data. Relies mostly on pandas.read_csv() and pandas.read_excel() for the parsing.

Parameters

path : str: System path to FED3 data file.
index_col : str, optional: Timestamp column to use as index. The default is 'MM:DD:YYYY hh:mm:ss'.
dropna : bool, optional: Remove all empty rows. The default is True.
deduplicate_index, offset, reset_counts, reset_columns : optional: Arguments passed to FEDFrame.deduplicate_index(), used to remove duplicate timestamps as the data are loaded.

Returns

f : FEDFrame: New FEDFrame object.

Expand source code

def load(path, index_col='MM:DD:YYYY hh:mm:ss', dropna=True,
         deduplicate_index=None, offset='1S', reset_counts=False,
         reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')):
    '''
    Load FED3 data from a CSV/Excel file.  This is the typical
    recommended way for importing FED3 data.  Relies mostly
    on `pandas.read_csv()` and `pandas.read_excel()` for the parsing.

    Parameters
    ----------
    path : str
        System path to FED3 data file.
    index_col : str, optional
        Timestamp column to use as index. The default is 'MM:DD:YYYY hh:mm:ss'.
    dropna : bool, optional
        Remove all empty rows. The default is True.
    deduplicate_index, offset, reset_counts, reset_columns: optional
        Arguments passed to `fed3.FEDFrame.deduplicate_index()`, used
        to remove duplicate timestamps as the data are loaded.

    Returns
    -------
    f : fed3.FEDFrame
        New FEDFrame object.

    '''
    # read the path
    name, ext = os.path.splitext(path)
    ext = ext.lower()

    read_opts = {'.csv':pd.read_csv, '.xlsx':pd.read_excel}
    func = read_opts[ext]
    feddata = func(path,
                   parse_dates=True,
                   index_col=index_col)
    if dropna:
        feddata = feddata.dropna(how='all')

    name = os.path.basename(name)
    f = FEDFrame(feddata)
    f._load_init(name=name,
                 path=path,
                 deduplicate_index=deduplicate_index,
                 offset=offset,
                 reset_counts=reset_counts,
                 reset_columns=reset_columns)

    return f

def load_examples(key, verbose=False, deduplicate_index=None, offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'))

Load the example data linked to a given key.

Parameters

key : str: Example to load.
verbose : bool: Print status while loading
deduplicate_index, offset, reset_counts, reset_columns : optional: Arguments passed to FEDFrame.deduplicate_index(), used to remove duplicate timestamps as the data are loaded.

Raises

KeyError: Unrecognized key.

Returns

list: FED3 example data, as a list of FEDFrame objects.

Expand source code

def load_examples(key, verbose=False, deduplicate_index=None, offset='1S',
                  reset_counts=False,
                  reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')):
    '''
    Load the example data linked to a given key.

    Parameters
    ----------
    key : str
        Example to load.
    verbose : bool
        Print status while loading
    deduplicate_index, offset, reset_counts, reset_columns: optional
        Arguments passed to `fed3.FEDFrame.deduplicate_index()`, used
        to remove duplicate timestamps as the data are loaded.

    Raises
    ------
    KeyError
        Unrecognized key.

    Returns
    -------
    list
        FED3 example data, as a list of FEDFrame objects.

    '''
    vprint = print if verbose else lambda *args, **kwargs: None


    example_path = os.path.join(DATADIR, key)
    examples = []
    vprint()
    vprint(f'Loading from data directory: {DATADIR}')
    vprint()
    vprint(f'Example folder: {example_path}')

    vprint()
    for file in sorted(os.listdir(example_path)):
        name, ext = os.path.splitext(file)
        if ext.lower() not in ['.csv', '.xlsx']: continue;
        vprint(f' - {file}...')
        fullfile = os.path.join(example_path, file)
        f = load(path=fullfile,
                 deduplicate_index=deduplicate_index,
                 offset=offset,
                 reset_counts=reset_counts,
                 reset_columns=reset_columns)
        examples.append(f)

    return examples

def set_lightcycle(on_hour, off_hour, on_minute=0, off_minute=0)

Set the light cycle. This affects shading on plots and operations which group data based on the light cycle.

Parameters

on_hour : int: Integer indicating the hour of day when lights turn on, in [0-24).
off_hour : int: Integer indicating the hour of day when lights turn off, in [0-24).
on_minute : int, optional: Minute of the on_hour where lights turn on, in [0-60).
off_minute : int, optional: Minute of the off_hour where lights turn off, in [0-60).

Returns

None.

Expand source code

def set_lightcycle(on_hour, off_hour, on_minute=0, off_minute=0):
    '''
    Set the light cycle.  This affects shading on plots and operations
    which group data based on the light cycle.

    Parameters
    ----------
    on_hour : int
        Integer indicating the hour of day when lights turn on, in [0-24).
    off_hour : int
        Integer indicating the hour of day when lights turn off, in [0-24).
    on_minute : int, optional
        Minute of the `on_hour` where lights turn on, in [0-60).
    off_minute : int, optional
        Minute of the `off_hour` where lights turn off, in [0-60).

    Returns
    -------
    None.

    '''
    LIGHTCYCLE['on'] = dt.time(hour=on_hour, minute=on_minute)
    LIGHTCYCLE['off'] = dt.time(hour=off_hour, minute=off_minute)

def split(fed, dates, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'), return_empty=False, tag_name=True)

Split one FEDFrame into a multiple based on one or more dates.

Parameters

fed : FEDFrame: FED3 data.
dates : datetime string or datetime object, or list-like of such: Timestamp(s) to split the data on.
reset_columns : list-like, optional: Columns whose cumulative totals should be reset when splitting the data. The default is ('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count').
return_empty : bool, optional: Return empty FEDFrames created from splitting. The default is False.
tag_name : bool, optional: Add a '_#' tag to the name of each new FEDFrame. The default is True.

Returns

output : list: List of FED3 objects created by split.

Expand source code

def split(fed, dates, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'),
          return_empty=False, tag_name=True):
    '''
    Split one FEDFrame into a multiple based on one or more dates.

    Parameters
    ----------
    fed : fed3.FEDFrame
        FED3 data.
    dates : datetime string or datetime object, or list-like of such
        Timestamp(s) to split the data on.
    reset_columns : list-like, optional
        Columns whose cumulative totals should be reset when splitting the data.
        The default is ('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count').
    return_empty : bool, optional
        Return empty FEDFrames created from splitting. The default is False.
    tag_name : bool, optional
        Add a `'_#'` tag to the name of each new FEDFrame. The default is True.

    Returns
    -------
    output : list
        List of FED3 objects created by split.

    '''
    dates = _split_handle_dates(dates)
    output = []
    offsets = {col: 0 for col in reset_columns}
    og_name = fed.name
    for i in range(len(dates[:-1])):
        start = dates[i]
        end = dates[i+1]
        subset = fed[(fed.index >= start) &
                     (fed.index < end)].copy()
        if tag_name:
            subset.name = f"{og_name}_{i}"
        if not return_empty and subset.empty:
            continue
        if offsets:
            for col in reset_columns:
                subset[col] -= offsets[col]
                offsets[col] = subset[col].max()
        output.append(subset)
    return output

def timecrop(fed, start, end, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'), name=None)

Return a new FEDFrame cropped in time to only include data between two dates.

Parameters

fed : FEDFrame: FED3 data.
start : datetime str or object: Time to start including data (inclusive).
end : datetime str or object: Time to stop including data (exclusive).
reset_columns : list-like, optional: Columns whose cumulative totals should be reset when cropping the data. The default is ('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count').
name : str, optional: Name for the new FEDFrame produced. The default is None.

Returns

newfed : FEDFrame: New FEDFrame object after filtering.

Expand source code

def timecrop(fed, start, end,
             reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'),
             name=None):
    '''
    Return a new FEDFrame cropped in time to only include data between two
    dates.

    Parameters
    ----------
    fed : fed3.FEDFrame
        FED3 data.
    start : datetime str or object
        Time to start including data (inclusive).
    end : datetime str or object
        Time to stop including data (exclusive).
    reset_columns : list-like, optional
        Columns whose cumulative totals should be reset when cropping the data.
        The default is ('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count').
    name : str, optional
        Name for the new FEDFrame produced. The default is None.

    Returns
    -------
    newfed : fed3.FEDFrame
        New FEDFrame object after filtering.

    '''

    prior = fed[(fed.index < start)]
    newfed = fed[(fed.index >= start) &
                 (fed.index < end)].copy()
    for col in reset_columns:
        if not prior.empty:
            newfed[col] -= prior[col].max()

    if name is not None:
        newfed.name = name

    return newfed

Classes

class FEDFrame (data=None, index: Axes | None = None, columns: Axes | None = None, dtype: Dtype | None = None, copy: bool | None = None)

The main object interface for FED3 data in the fed3 library. Provides a 2D table for storing FED3 data.

FEDFrame is a subclass of the DataFrame, which allows for the highly-developed data manipulation operations provided by pandas. Most things you can do with a pandas DataFrame can also be done with a FEDFrame.

Note there is no equivalent of the pandas Series which is specific to FEDs.

FEDFrame provides additional attributes and methods which are specific to FED3 data. See additional documentation for these below.

Most of the time, FED3 data will be accessed directly from the logged CSV files. In this case, using the FEDFrame constructor is not recommended; you should instead use load(). But if for some reason you already have FED3 data loaded into a pandas DataFrame, you can make use of the constructor and the FEDFrame._load_init() function to get full FEDFrame functionality.

Other links:

pandas: https://pandas.pydata.org/docs/index.html
pandas DataFrame: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html
Subclassing pandas: https://pandas.pydata.org/docs/development/extending.html

Expand source code

class FEDFrame(pd.DataFrame):
    '''The main object interface for FED3 data in the fed3 library.  Provides
    a 2D table for storing FED3 data.

    FEDFrame is a subclass of the DataFrame, which allows for the highly-developed
    data manipulation operations provided by pandas.  Most things you can do
    with a pandas DataFrame can also be done with a FEDFrame.

    Note there is no equivalent of the pandas Series which is specific to FEDs.

    FEDFrame provides additional attributes and methods which are specific
    to FED3 data.  See additional documentation for these below.

    Most of the time, FED3 data will be accessed directly from the logged CSV
    files.  In this case, using the FEDFrame constructor is not recommended;
    you should instead use `fed3.core.load()`.  But if for some reason you already
    have FED3 data loaded into a pandas DataFrame, you can make use of the
    constructor and the `fed3.core.fedframe.FEDFrame._load_init()` function to
    get full FEDFrame functionality.

    Other links:

    - pandas: https://pandas.pydata.org/docs/index.html
    - pandas DataFrame: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html
    - Subclassing pandas: https://pandas.pydata.org/docs/development/extending.html'''

    # ---- Class variables
    _metadata = ['name', 'path', 'foreign_columns', 'missing_columns',
                 '_alignment', '_current_offset']

    LR_POKE_METHOD_OPTIONS = ('from_columns', 'from_events')
    LR_POKE_METHOD = 'from_columns'
    L_POKE_EVENTS = ['Left', 'LeftShort', 'LeftWithPellet', 'LeftinTimeout', 'LeftDuringDispense']
    R_POKE_EVENTS = ['Right', 'RightShort', 'RightWithPellet', 'RightinTimeout', 'RightDuringDispense']

    # ---- Properties

    @property
    def _constructor(self):
        '''Maintains the FEDFrame type for derivates created from self.
        See https://pandas.pydata.org/docs/development/extending.html'''
        return FEDFrame

    @property
    def duration(self):
        """Time delta of last timestamp and first timestamp."""
        return self.end_time-self.start_time

    @property
    def end_time(self):
        """Last timestamp in file."""
        return pd.Timestamp(self.index.values[-1])

    @property
    def events(self):
        '''Number of logged events (i.e. rows).'''
        return len(self.data)

    @property
    def fedmode(self):
        '''FED3 operating mode for this data.'''
        return self.determine_mode()

    @property
    def start_time(self):
        '''First timestamp in file.'''
        return pd.Timestamp(self.index.values[0])

    # ---- "Private"

    def _binary_correct_pokes(self):
        l = self._binary_pokes('left')
        r = self._binary_pokes('right')
        active_l = self['Active_Poke'] == 'Left'
        active_r = self['Active_Poke'] == 'Right'
        correct = ((l * active_l).astype(int) | (r * active_r).astype(int))

        return correct

    def _binary_error_pokes(self):
        l = self._binary_pokes('left')
        r = self._binary_pokes('right')
        active_l = self['Active_Poke'] == 'Left'
        active_r = self['Active_Poke'] == 'Right'
        error = ((l * active_r).astype(int) | (r * active_l).astype(int))

        return error

    def _binary_pellets(self):
        bp = self['Pellet_Count'].diff().copy()
        if not bp.empty:
            bp.iloc[0] = int(self._first_event_type() == 'pellet')

        return bp

    def _binary_poke_for_side(self, side):
        if self.LR_POKE_METHOD == 'from_columns':
            col = {'left': 'Left_Poke_Count', 'right': 'Right_Poke_Count'}[side]
            bp = self[col].diff().copy()
            if not bp.empty:
                bp.iloc[0] = int(self._first_event_type() == side)
        elif self.LR_POKE_METHOD == 'from_events':
            search = {'left': self.L_POKE_EVENTS, 'right': self.R_POKE_EVENTS}[side]
            bp = self['Event'].isin(search).astype(int)
        else:
            raise ValueError(f'"{self.LR_POKE_METHOD}" is not recognized for '
                             f'FEDFrame.LR_POKE_METHOD.  Should be one of '
                             f'{FEDFrame.LR_POKE_METHOD_OPTIONS}.')

        return bp

    def _binary_pokes(self, kind='any'):
        kind = kind.lower()
        kinds = ['left', 'right', 'any', 'correct', 'error']
        if kind not in kinds:
            raise ValueError(f'`kind` must be one of  {kinds}, not {kind}')

        if kind == 'any':
            l = self._binary_poke_for_side('left')
            r = self._binary_poke_for_side('right')
            bp = ((l == 1) | (r==1)).astype(int)

        elif kind in ['left', 'right']:
            bp = self._binary_poke_for_side(kind).astype(int)

        elif kind in ['correct', 'error']:
            bp = self._binary_correct_pokes() if kind == 'correct' else self._binary_error_pokes()

        return bp

    def _cumulative_poke_for_side(self, side):
        if self.LR_POKE_METHOD == 'from_columns':
            col = {'left': 'Left_Poke_Count', 'right': 'Right_Poke_Count'}[side]
            cp = self[col]
        elif self.LR_POKE_METHOD == 'from_events':
            search = {'left': self.L_POKE_EVENTS, 'right': self.R_POKE_EVENTS}[side]
            cp = self['Event'].isin(search).cumsum().astype(int)
        else:
            raise ValueError(f'"{self.LR_POKE_METHOD}" is not recognized for '
                             f'FEDFrame.LR_POKE_METHOD.  Should be one of '
                             f'{FEDFrame.LR_POKE_METHOD_OPTIONS}.')

        return cp

    def _cumulative_pokes(self, kind='any'):
        kind = kind.lower()
        kinds = ['left', 'right', 'any', 'correct', 'error']
        if kind not in kinds:
            raise ValueError(f'`kind` must be one of  {kinds}, not {kind}')

        if kind == 'any':
            l = self._cumulative_poke_for_side('left')
            r = self._cumulative_poke_for_side('right')
            cp = (l + r).astype(int)

        elif kind in ['left', 'right']:
            cp = self._cumulative_poke_for_side(kind).astype(int)

        elif kind in ['correct', 'error']:
            bp = self._binary_correct_pokes() if kind == 'correct' else self._binary_error_pokes()
            cp = bp.cumsum()

        return cp

    def _first_event_type(self):
        '''
        Get the type of event for the first entry.  Special case implementation
        of `event_type()`.  Returns either "pellet", "left", "right", or "unknown".
        '''
        ts = self.index[0]
        left = self.loc[ts, 'Left_Poke_Count'] == 1
        right = self.loc[ts, 'Right_Poke_Count'] == 1
        pellet = self.loc[ts, 'Pellet_Count'] == 1

        if sum([left, right, pellet]) != 1:
            return 'unknown'

        ans = ['left', 'right', 'pellet'][[left, right, pellet].index(True)]
        return ans

    def _fix_column_names(self):
        '''
        Checks for and fixes old FED3 issue where column names included a
        trailing/starting space.

        Returns
        -------
        None.

        '''
        self.foreign_columns = []
        for col in self.columns:
            for fix in FIXED_COLS:
                likeness = SequenceMatcher(a=col, b=fix).ratio()
                if likeness > 0.85:
                    self.rename(columns={col:fix}, inplace=True)
                    break
                self.foreign_columns.append(col)
        self.missing_columns = [col for col in NEEDED_COLS if
                                col not in self.columns]

    def _handle_retrieval_time(self):
        if 'Retrieval_Time' not in self.columns:
            return
        self['Retrieval_Time'] = pd.to_numeric(self['Retrieval_Time'], errors='coerce')


    def _load_init(self, name=None, path=None, deduplicate_index=None,
                   offset='1S', reset_counts=False,
                   reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')):
        '''
        Initialize FEDFrame attributes and apply some data cleaning.

        This method is marked "private" because it is typically invoked
        automatically when loading data from local files.  The only
        use case is when you have existing pandas data which you want
        to convert into FEDFrame data.  The following demonstrates this
        use case, but note that **it is not recommended**
        (use `fed3.core.load()` instead):

        ```python
        import fed3
        import pandas as pd

        data = pd.read_csv("/some/file.csv")
        data = fed3.FEDFrame(data)

        # do the following to get full functionality
        data._load_init()
        ```

        Parameters
        ----------
        name : str, optional
            Name to give the FEDFrame. The default is None.
        path : str, optional
            Set a local data path for the data. The default is None.
        deduplicate_index, offset, reset_counts, reset_columns: optional
            Arguments passed to `fed3.FEDFrame.deduplicate_index()`, used
            to remove duplicate timestamps as the data are loaded.

        Returns
        -------
        None.

        '''
        self.name = name
        self.path = path
        self._fix_column_names()
        self._handle_retrieval_time()
        self._alignment = 'datetime'
        self._current_offset = pd.Timedelta(0)
        if deduplicate_index is not None:
            self.deduplicate_index(method=deduplicate_index,
                                   offset=offset,
                                   reset_counts=reset_counts,
                                   reset_columns=reset_columns)
        if self.check_duplicated_index():
            warnings.warn("Index has duplicate values, which may prevent some "
                          "fed3 operations.  Use the deuplicate_index() method "
                          "to remove duplicate timestamps.", RuntimeWarning)

    # ---- Public

    def check_duplicated_index(self):
        '''
        Checks if the data has duplicated timestamps.

        Returns
        -------
        bool
            `True` if duplicates found, else `False`.

        '''
        return self.index.duplicated().any()

    def deduplicate_index(self, method='keep_first', offset='1S',
                          reset_counts=False,
                          reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')):
        '''
        Apply a method to remove duplicate timestamps from the data.

        With FEDFrames, the timestamp column (typically the column
        'MM:DD:YYYY hh:mm:ss') is used as the row labels (AKA the
        `index` in pandas, [see here](https://pandas.pydata.org/docs/reference/api/pandas.Index.html)).
        Some operations which invole selecting data based on this index
        can fail when there are duplicate entries.

        FED3 data should not generally have duplicated timestamps, however
        they do arise due to two main causes.  One cause is transient
        logging errors, which should be rare (but may be more common with
        earlier FED software).  The other, more common, cause is
        data editing with Microsoft Excel, which has been documented elsewhere
        (see links below).

        Note that this method does not recover any information that is lost;
        it either removes duplicated indices or applies simple rules to alter
        them.  When available, non-duplicated data should be loaded into
        the program.

        fed3 should raise a warning when files with duplicated timestamps
        are loaded. The status of the index can also be checked with
        `check_duplicated_index()`.

        Parameters
        ----------
        method : str, optional
            Method for removing duplicates.  Options are:

            - `'keep_first'`: keep only the first instance of each duplicate
            set (default)
            - `'keep_last'`: keep only the last instance of each duplicate set
            - `'remove'`: delete any rows with duplicate timestamps
            - `'offset'`: add a small time offset to each date - does so
            iteratively until the index is not duplicated.
            - `'interpolate'`: offset duplicates such that they are spaced
            evenly between their value and the next timestamp in the series

            Note that `'interpolate'` and `'offset'` should preserve the
            length of the FEDFrame, while other options can reduce it.

        offset : str, optional
            Pandas time offset string, only used when `method='offset'`.
            The default is `'1S'`.
        reset_counts : bool, optional
            Reset columns cumulative columns that may be altered
            as a result of removing rows.  The default is False.  Note
            that this alters the number of pokes/pellets!
        reset_columns: bool, optional
            Column names to reset when `reset_counts` is True.

        Raises
        ------
        ValueError
            Trying to use `'interpolate'` when the last timestamp is duplicated.

        Returns
        -------
        None.  Data is modified in place.

        '''

        if method not in ['keep_first', 'keep_last', 'remove',
                             'offset', 'interpolate']:
            raise ValueError(f'`method` must be one of {method}, not "{method}"')

        if method == 'keep_first':
            mask = ~ self.index.duplicated(keep='first')
            self.query('@mask', inplace=True)
        elif method == 'keep_last':
            mask = ~ self.index.duplicated(keep='last')
            self.query('@mask', inplace=True)
        elif method == 'remove':
            mask = ~ self.index.duplicated(keep=False)
            self.query('@mask', inplace=True)
        elif method == 'offset':
            dt = pd.to_timedelta(offset)
            while self.check_duplicated_index():
                self.index = np.where(self.index.duplicated(),
                                      self.index + dt,
                                      self.index)
        elif method == 'interpolate':
            if self.index.duplicated()[-1]:
                raise ValueError("Cannot interpolate when the last "
                                 "timestamp is duplicated; try a different "
                                 "deduplication method.")
            t0 = self.index[0]
            s = pd.Series(self.index)
            s[s.duplicated()] = None
            self.index = t0 + pd.to_timedelta((s - t0).dt.total_seconds().interpolate(), unit='seconds')

        # column resetting
        if reset_counts:
            for column in reset_columns:
                self.reset_cumulative_column(column)

    def determine_mode(self):
        '''
        Return the recording mode of the current FED data.  This function tries
        to take this literally from the data headers.  There are likely
        to be problems for custom programs or particular FED software versions.

        Returns
        -------
        mode : str
            String indicating the mode.

        '''
        mode = 'Unknown'
        column = pd.Series(dtype=object)
        for col in ['FR','FR_Ratio',' FR_Ratio','Mode','Session_Type']:
            if col in self.columns:
                column = self[col]
        if not column.empty:
            if all(isinstance(i,int) for i in column):
                if len(set(column)) == 1:
                    mode = 'FR' + str(column[0])
                else:
                    mode = 'PR'
            elif 'PR' in column[0]:
                mode = 'PR'
            else:
                mode = str(column[0])
        return mode

    def event_type(self, timestamp):
        '''
        Return the type of a given timestamp within the data (pellet or poke).

        TODO
        Currently, this only reads the Event column, and throws an error if not
        present.  In the future, this may implement more logic to determine the
        type of an event.

        Parameters
        ----------
        timestamp : str, `pandas.Timestamp`
            timestamp to query.

        Raises
        ------
        Exception
            Fails when the Event column isn't present.

        Returns
        -------
        str
            Event type for queried timestamp.

        '''
        if 'Event' in self.columns:
            return self.loc[timestamp, 'Event']
        else:
            raise Exception('Missing "Event" column.')

    def interpellet_intervals(self, check_concat=True, condense=False):
        '''
        Calculate the interpellet intervals for each pellet event.
        This is the time (in minutes) since the last pellet was retrieved.

        Note that there is a shortcut for this method: `ipi`.

        Parameters
        ----------
        check_concat : bool, optional
            Removes IPIs when they are identified as coming directly after
            data concatenation. The default is True.  This will only work
            when data were concatenated with fed3.
        condense : bool, optional
            Return only rows where there are interpellet intervals.
            The default is False.  When False, the returned Series will
            have same length as full FEDFrame.

        Returns
        -------
        interpellet : pandas.Series
            Pandas Series containing the interpellet intervals.

        '''
        bp = self._binary_pellets()
        bp = bp[bp == 1]
        diff = bp.index.to_series().diff().dt.total_seconds() / 60

        interpellet = pd.Series(np.nan, index = self.index)
        interpellet.loc[diff.index] = diff

        if check_concat and 'Concat_#' in self.columns:
            #this can't do duplicate indexes
            if not any(self.index.duplicated()):
                #thanks to this answer https://stackoverflow.com/a/47115490/13386979
                dropped = interpellet.dropna()
                pos = dropped.index.to_series().groupby(self['Concat_#']).first()
                interpellet.loc[pos[1:]] = np.nan

        if condense:
            interpellet = interpellet.loc[bp.index]
            interpellet = _filterout(interpellet, dropna=True)

        return interpellet

    def meals(self, pellet_minimum=1, intermeal_interval=1, condense=False):
        '''
        Assign a meal number to each pellet retrieval.  Returns a series
        with those assignments.

        Parameters to this function determine what constitutes a meal.
        Assignments are based on interpellet intervals
        (see `FEDFrame.interpellet_intervals()`).

        Parameters
        ----------
        pellet_minimum : int, optional
            Number of pellets required in one meal. The default is 1.  For
            high numbers, some pellets can be unassigned to any meal.
        intermeal_interval : int, optional
            Maximum length of time (in minutes) that can pass between any
            two consecutive pellets assigned to the same meal. The default is 1.
        condense : False, optional
            Return only rows where there are meals (i.e. only pellet index).
            The default is False.  When False, the returned Series will
            have same length as full FEDFrame.

        Returns
        -------
        meals : pandas.Series
            pandas Series with labeled meals

        '''
        ipi = self.interpellet_intervals(condense=True)
        within_interval = ipi < intermeal_interval
        meals = ((~within_interval).cumsum() + 1)
        above_min = meals.value_counts().sort_index() >= pellet_minimum
        replacements = above_min[above_min].cumsum().reindex(above_min.index)
        meals = meals.map(replacements)
        if not condense:
            meals = meals.reindex(self.index)
        return meals

    def pellets(self, cumulative=True, condense=False):
        '''
        Provide a series containing pellet retrieval information.

        Parameters
        ----------
        cumulative : bool, optional
            When True (default), the values returned are a cumulative pellet count.
            When False, the values are binary.
        condense : bool, optional
            Return only rows corresponding to pellets.
            The default is False.  When False, the returned Series will
            have same length as full FEDFrame.

        Returns
        -------
        y : pandas Series
            pandas Series containing pellet retrieval counts/indicators.
        '''
        if cumulative:
            y = self['Pellet_Count']
            if condense:
                y = _filterout(y, deduplicate=True, dropzero=True)

        else:
            y = self._binary_pellets()
            if condense:
                y = _filterout(y, dropzero=True)

        return y

    def pokes(self, kind='any', cumulative=True, condense=False):
        '''
        Get an array of poke events.

        Parameters
        ----------
        kind : str, optional
            Key for determining the poke type returned. The default is 'any'
            (any poke event).  Other options are 'left', 'right', 'correct',
            and 'error'.
        cumulative : bool, optional
            When True (default), the values returned are a cumulative poke count.
            When False, the values are binary.
        condense : bool, optional
            Return only rows corresponding to poke events.
            The default is False.  When False, the returned Series will
            have same length as full FEDFrame.

        Raises
        ------
        ValueError
            Unaccetped key passed to `kind`.

        Returns
        -------
        y : pandas Series
            Pandas Series containing poke counts/indicators.

        '''

        kind = kind.lower()
        kinds = ['left', 'right', 'any', 'correct', 'error']
        if kind not in kinds:
            raise ValueError(f'`kind` must be one of  {kinds}, not {kind}')

        if cumulative:
            y = self._cumulative_pokes(kind)
            if condense:
                y = _filterout(y, deduplicate=True, dropzero=True)

        else:
            y = self._binary_pokes(kind)
            if condense:
                y = _filterout(y, dropzero=True)

        return y

    def reassign_events(self, include_side=True):
        '''
        Run an intitial assignment or reassignment of the "Event" column.

        Parameters
        ----------
        include_side : bool, optional
            Label poke events with "Left" and "Right" instead of "Poke".
            The default is True.

        Returns
        -------
        None.

        '''
        if include_side:
            events = pd.Series(np.nan, index=self.index)
            events.loc[self._binary_pellets().astype(bool)] = 'Pellet'
            events.loc[self._binary_pokes('left').astype(bool)] = 'Left'
            events.loc[self._binary_pokes('right').astype(bool)] = 'Right'
        else:
            events = np.where(self._binary_pellets(), 'Pellet', 'Poke')
        self['Event'] = events

    def reset_cumulative_column(self, column):
        '''
        Reset a cumulative column (usually Left_Poke_Count, Right_Poke_Count,
        or Pellet_Count) to be ascending integers.  This may be useful
        when other operations cause rows to be removed.

        Parameters
        ----------
        column : str
            String column name

        Returns
        -------
        None.

        '''
        reset, _ = pd.factorize(self[column])
        reset += self[column].iloc[0]
        self[column] = reset

    def set_alignment(self, alignment, inplace=True):
        '''
        Shift the timestamps of a FEDFrame to allow for comparisons with other data
        recorded at different times.

        This is particularly intended for plotting with `fed3.plot`.  By default,
        fed3 will plot fed3 data over the timestamps they were recorded.  For
        temporal plots (with time on the x-axis), this disallows combination
        (e.g. averaging) of data recorded on different dates.  To combine
        these sorts of data, this function will shift the timestamps FEDFrames
        to a common time.

        There are three options for temporal alignment, 'datetime', 'time',
        and 'elapsed'.  Note that these are the equivalents of 'shared date & time',
        'shared time', and 'elapsed time' from FED3_Viz.

        - 'datetime': Use the original recorded timestamps for plotting.  This is
        the default behavior for plotting.  This is generally useful when
        all your data were collected at the same time, when you want to show
        exactly when data were recorded, or when working with plots where
        the time of recording does not matter.
        - 'time': Shift the timestamps so that they have the same start date,
        but preserved time of day information.  This is useful for when you
        want to compare or average data recorded on different dates, but want
        to preserve circadian patterns.
        - 'elapsed': Shift the timestamps such that the first recorded timestamp
        is equal to a single, shared date.  This is useful for comparing data
        relative to the initiation of the recording, and you do not need
        to preserve circadian information.

        Note that for 'elapsed' and 'time' alignment, the common date is set
        by the `ZERO_DATE` variable in this module.

        Parameters
        ----------
        alignment : str, 'datetime', 'time', or 'elapsed'
            Option for temporal alignment.  See above for more information.
        inplace : bool, optional
            When True, the current FEDFrame is modified.  Else, a copy is
            returned with the new alignment.

        Raises
        ------
        ValueError
            Option for alignment not recognized.

        Returns
        -------
        newfed : fed3.FEDFrame
            FED3 data with new alignment.

        '''
        options = ['datetime', 'time', 'elapsed']

        if alignment not in options:
            raise ValueError(f'`alignment` must be one of {options}, '
                             f'not "{alignment}"')
        if alignment == 'datetime':
            new_diff = self._current_offset
        elif alignment == 'time':
            new_diff = self.index[0].date() - ZERO_DATE.date()
        elif alignment == 'elapsed':
            new_diff = self.index[0] - ZERO_DATE

        newfed = self if inplace else self.copy()
        newfed.index -= new_diff
        newfed._current_offset -= new_diff
        newfed._alignment = alignment

        return newfed

    # ---- Aliases
    ipi = interpellet_intervals

Ancestors

pandas.core.frame.DataFrame
pandas.core.generic.NDFrame
pandas.core.base.PandasObject
pandas.core.accessor.DirNamesMixin
pandas.core.indexing.IndexingMixin
pandas.core.arraylike.OpsMixin

Class variables

var LR_POKE_METHOD
var LR_POKE_METHOD_OPTIONS
var L_POKE_EVENTS
var R_POKE_EVENTS

Instance variables

var columns

The column labels of the DataFrame.

var duration

Time delta of last timestamp and first timestamp.

Expand source code

@property
def duration(self):
    """Time delta of last timestamp and first timestamp."""
    return self.end_time-self.start_time

var end_time

Last timestamp in file.

Expand source code

@property
def end_time(self):
    """Last timestamp in file."""
    return pd.Timestamp(self.index.values[-1])

var events

Number of logged events (i.e. rows).

Expand source code

@property
def events(self):
    '''Number of logged events (i.e. rows).'''
    return len(self.data)

var fedmode

FED3 operating mode for this data.

Expand source code

@property
def fedmode(self):
    '''FED3 operating mode for this data.'''
    return self.determine_mode()

var index

The index (row labels) of the DataFrame.

var start_time

First timestamp in file.

Expand source code

@property
def start_time(self):
    '''First timestamp in file.'''
    return pd.Timestamp(self.index.values[0])

Methods

def check_duplicated_index(self)

Checks if the data has duplicated timestamps.

Returns

bool: True if duplicates found, else False.

Expand source code

def check_duplicated_index(self):
    '''
    Checks if the data has duplicated timestamps.

    Returns
    -------
    bool
        `True` if duplicates found, else `False`.

    '''
    return self.index.duplicated().any()

def deduplicate_index(self, method='keep_first', offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'))

Apply a method to remove duplicate timestamps from the data.

With FEDFrames, the timestamp column (typically the column 'MM:DD:YYYY hh:mm:ss') is used as the row labels (AKA the index in pandas, see here). Some operations which invole selecting data based on this index can fail when there are duplicate entries.

FED3 data should not generally have duplicated timestamps, however they do arise due to two main causes. One cause is transient logging errors, which should be rare (but may be more common with earlier FED software). The other, more common, cause is data editing with Microsoft Excel, which has been documented elsewhere (see links below).

Note that this method does not recover any information that is lost; it either removes duplicated indices or applies simple rules to alter them. When available, non-duplicated data should be loaded into the program.

fed3 should raise a warning when files with duplicated timestamps are loaded. The status of the index can also be checked with check_duplicated_index().

Parameters

method : str, optional

Method for removing duplicates. Options are:

'keep_first': keep only the first instance of each duplicate set (default)
'keep_last': keep only the last instance of each duplicate set
'remove': delete any rows with duplicate timestamps
'offset': add a small time offset to each date - does so iteratively until the index is not duplicated.
'interpolate': offset duplicates such that they are spaced evenly between their value and the next timestamp in the series

Note that 'interpolate' and 'offset' should preserve the length of the FEDFrame, while other options can reduce it.

offset : str, optional

Pandas time offset string, only used when method='offset'. The default is '1S'.

reset_counts : bool, optional

Reset columns cumulative columns that may be altered as a result of removing rows. The default is False. Note that this alters the number of pokes/pellets!

reset_columns : bool, optional

Column names to reset when reset_counts is True.

Raises

ValueError: Trying to use 'interpolate' when the last timestamp is duplicated.

Returns

None. Data is modified in place.

Expand source code

def deduplicate_index(self, method='keep_first', offset='1S',
                      reset_counts=False,
                      reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')):
    '''
    Apply a method to remove duplicate timestamps from the data.

    With FEDFrames, the timestamp column (typically the column
    'MM:DD:YYYY hh:mm:ss') is used as the row labels (AKA the
    `index` in pandas, [see here](https://pandas.pydata.org/docs/reference/api/pandas.Index.html)).
    Some operations which invole selecting data based on this index
    can fail when there are duplicate entries.

    FED3 data should not generally have duplicated timestamps, however
    they do arise due to two main causes.  One cause is transient
    logging errors, which should be rare (but may be more common with
    earlier FED software).  The other, more common, cause is
    data editing with Microsoft Excel, which has been documented elsewhere
    (see links below).

    Note that this method does not recover any information that is lost;
    it either removes duplicated indices or applies simple rules to alter
    them.  When available, non-duplicated data should be loaded into
    the program.

    fed3 should raise a warning when files with duplicated timestamps
    are loaded. The status of the index can also be checked with
    `check_duplicated_index()`.

    Parameters
    ----------
    method : str, optional
        Method for removing duplicates.  Options are:

        - `'keep_first'`: keep only the first instance of each duplicate
        set (default)
        - `'keep_last'`: keep only the last instance of each duplicate set
        - `'remove'`: delete any rows with duplicate timestamps
        - `'offset'`: add a small time offset to each date - does so
        iteratively until the index is not duplicated.
        - `'interpolate'`: offset duplicates such that they are spaced
        evenly between their value and the next timestamp in the series

        Note that `'interpolate'` and `'offset'` should preserve the
        length of the FEDFrame, while other options can reduce it.

    offset : str, optional
        Pandas time offset string, only used when `method='offset'`.
        The default is `'1S'`.
    reset_counts : bool, optional
        Reset columns cumulative columns that may be altered
        as a result of removing rows.  The default is False.  Note
        that this alters the number of pokes/pellets!
    reset_columns: bool, optional
        Column names to reset when `reset_counts` is True.

    Raises
    ------
    ValueError
        Trying to use `'interpolate'` when the last timestamp is duplicated.

    Returns
    -------
    None.  Data is modified in place.

    '''

    if method not in ['keep_first', 'keep_last', 'remove',
                         'offset', 'interpolate']:
        raise ValueError(f'`method` must be one of {method}, not "{method}"')

    if method == 'keep_first':
        mask = ~ self.index.duplicated(keep='first')
        self.query('@mask', inplace=True)
    elif method == 'keep_last':
        mask = ~ self.index.duplicated(keep='last')
        self.query('@mask', inplace=True)
    elif method == 'remove':
        mask = ~ self.index.duplicated(keep=False)
        self.query('@mask', inplace=True)
    elif method == 'offset':
        dt = pd.to_timedelta(offset)
        while self.check_duplicated_index():
            self.index = np.where(self.index.duplicated(),
                                  self.index + dt,
                                  self.index)
    elif method == 'interpolate':
        if self.index.duplicated()[-1]:
            raise ValueError("Cannot interpolate when the last "
                             "timestamp is duplicated; try a different "
                             "deduplication method.")
        t0 = self.index[0]
        s = pd.Series(self.index)
        s[s.duplicated()] = None
        self.index = t0 + pd.to_timedelta((s - t0).dt.total_seconds().interpolate(), unit='seconds')

    # column resetting
    if reset_counts:
        for column in reset_columns:
            self.reset_cumulative_column(column)

def determine_mode(self)

Return the recording mode of the current FED data. This function tries to take this literally from the data headers. There are likely to be problems for custom programs or particular FED software versions.

Returns

mode : str: String indicating the mode.

Expand source code

def determine_mode(self):
    '''
    Return the recording mode of the current FED data.  This function tries
    to take this literally from the data headers.  There are likely
    to be problems for custom programs or particular FED software versions.

    Returns
    -------
    mode : str
        String indicating the mode.

    '''
    mode = 'Unknown'
    column = pd.Series(dtype=object)
    for col in ['FR','FR_Ratio',' FR_Ratio','Mode','Session_Type']:
        if col in self.columns:
            column = self[col]
    if not column.empty:
        if all(isinstance(i,int) for i in column):
            if len(set(column)) == 1:
                mode = 'FR' + str(column[0])
            else:
                mode = 'PR'
        elif 'PR' in column[0]:
            mode = 'PR'
        else:
            mode = str(column[0])
    return mode

def event_type(self, timestamp)

Return the type of a given timestamp within the data (pellet or poke).

TODO Currently, this only reads the Event column, and throws an error if not present. In the future, this may implement more logic to determine the type of an event.

Parameters

timestamp : str, pandas.Timestamp``: timestamp to query.

Raises

Exception: Fails when the Event column isn't present.

Returns

str: Event type for queried timestamp.

Expand source code

def event_type(self, timestamp):
    '''
    Return the type of a given timestamp within the data (pellet or poke).

    TODO
    Currently, this only reads the Event column, and throws an error if not
    present.  In the future, this may implement more logic to determine the
    type of an event.

    Parameters
    ----------
    timestamp : str, `pandas.Timestamp`
        timestamp to query.

    Raises
    ------
    Exception
        Fails when the Event column isn't present.

    Returns
    -------
    str
        Event type for queried timestamp.

    '''
    if 'Event' in self.columns:
        return self.loc[timestamp, 'Event']
    else:
        raise Exception('Missing "Event" column.')

def interpellet_intervals(self, check_concat=True, condense=False)

Calculate the interpellet intervals for each pellet event. This is the time (in minutes) since the last pellet was retrieved.

Note that there is a shortcut for this method: ipi.

Parameters

check_concat : bool, optional: Removes IPIs when they are identified as coming directly after data concatenation. The default is True. This will only work when data were concatenated with fed3.
condense : bool, optional: Return only rows where there are interpellet intervals. The default is False. When False, the returned Series will have same length as full FEDFrame.

Returns

interpellet : pandas.Series: Pandas Series containing the interpellet intervals.

Expand source code

def interpellet_intervals(self, check_concat=True, condense=False):
    '''
    Calculate the interpellet intervals for each pellet event.
    This is the time (in minutes) since the last pellet was retrieved.

    Note that there is a shortcut for this method: `ipi`.

    Parameters
    ----------
    check_concat : bool, optional
        Removes IPIs when they are identified as coming directly after
        data concatenation. The default is True.  This will only work
        when data were concatenated with fed3.
    condense : bool, optional
        Return only rows where there are interpellet intervals.
        The default is False.  When False, the returned Series will
        have same length as full FEDFrame.

    Returns
    -------
    interpellet : pandas.Series
        Pandas Series containing the interpellet intervals.

    '''
    bp = self._binary_pellets()
    bp = bp[bp == 1]
    diff = bp.index.to_series().diff().dt.total_seconds() / 60

    interpellet = pd.Series(np.nan, index = self.index)
    interpellet.loc[diff.index] = diff

    if check_concat and 'Concat_#' in self.columns:
        #this can't do duplicate indexes
        if not any(self.index.duplicated()):
            #thanks to this answer https://stackoverflow.com/a/47115490/13386979
            dropped = interpellet.dropna()
            pos = dropped.index.to_series().groupby(self['Concat_#']).first()
            interpellet.loc[pos[1:]] = np.nan

    if condense:
        interpellet = interpellet.loc[bp.index]
        interpellet = _filterout(interpellet, dropna=True)

    return interpellet

def ipi(self, check_concat=True, condense=False)

Calculate the interpellet intervals for each pellet event. This is the time (in minutes) since the last pellet was retrieved.

Note that there is a shortcut for this method: ipi.

Parameters

check_concat : bool, optional: Removes IPIs when they are identified as coming directly after data concatenation. The default is True. This will only work when data were concatenated with fed3.
condense : bool, optional: Return only rows where there are interpellet intervals. The default is False. When False, the returned Series will have same length as full FEDFrame.

Returns

interpellet : pandas.Series: Pandas Series containing the interpellet intervals.

Expand source code

def interpellet_intervals(self, check_concat=True, condense=False):
    '''
    Calculate the interpellet intervals for each pellet event.
    This is the time (in minutes) since the last pellet was retrieved.

    Note that there is a shortcut for this method: `ipi`.

    Parameters
    ----------
    check_concat : bool, optional
        Removes IPIs when they are identified as coming directly after
        data concatenation. The default is True.  This will only work
        when data were concatenated with fed3.
    condense : bool, optional
        Return only rows where there are interpellet intervals.
        The default is False.  When False, the returned Series will
        have same length as full FEDFrame.

    Returns
    -------
    interpellet : pandas.Series
        Pandas Series containing the interpellet intervals.

    '''
    bp = self._binary_pellets()
    bp = bp[bp == 1]
    diff = bp.index.to_series().diff().dt.total_seconds() / 60

    interpellet = pd.Series(np.nan, index = self.index)
    interpellet.loc[diff.index] = diff

    if check_concat and 'Concat_#' in self.columns:
        #this can't do duplicate indexes
        if not any(self.index.duplicated()):
            #thanks to this answer https://stackoverflow.com/a/47115490/13386979
            dropped = interpellet.dropna()
            pos = dropped.index.to_series().groupby(self['Concat_#']).first()
            interpellet.loc[pos[1:]] = np.nan

    if condense:
        interpellet = interpellet.loc[bp.index]
        interpellet = _filterout(interpellet, dropna=True)

    return interpellet

def meals(self, pellet_minimum=1, intermeal_interval=1, condense=False)

Assign a meal number to each pellet retrieval. Returns a series with those assignments.

Parameters to this function determine what constitutes a meal. Assignments are based on interpellet intervals (see FEDFrame.interpellet_intervals()).

Parameters

pellet_minimum : int, optional: Number of pellets required in one meal. The default is 1. For high numbers, some pellets can be unassigned to any meal.
intermeal_interval : int, optional: Maximum length of time (in minutes) that can pass between any two consecutive pellets assigned to the same meal. The default is 1.
condense : False, optional: Return only rows where there are meals (i.e. only pellet index). The default is False. When False, the returned Series will have same length as full FEDFrame.

Returns

meals : pandas.Series: pandas Series with labeled meals

Expand source code

def meals(self, pellet_minimum=1, intermeal_interval=1, condense=False):
    '''
    Assign a meal number to each pellet retrieval.  Returns a series
    with those assignments.

    Parameters to this function determine what constitutes a meal.
    Assignments are based on interpellet intervals
    (see `FEDFrame.interpellet_intervals()`).

    Parameters
    ----------
    pellet_minimum : int, optional
        Number of pellets required in one meal. The default is 1.  For
        high numbers, some pellets can be unassigned to any meal.
    intermeal_interval : int, optional
        Maximum length of time (in minutes) that can pass between any
        two consecutive pellets assigned to the same meal. The default is 1.
    condense : False, optional
        Return only rows where there are meals (i.e. only pellet index).
        The default is False.  When False, the returned Series will
        have same length as full FEDFrame.

    Returns
    -------
    meals : pandas.Series
        pandas Series with labeled meals

    '''
    ipi = self.interpellet_intervals(condense=True)
    within_interval = ipi < intermeal_interval
    meals = ((~within_interval).cumsum() + 1)
    above_min = meals.value_counts().sort_index() >= pellet_minimum
    replacements = above_min[above_min].cumsum().reindex(above_min.index)
    meals = meals.map(replacements)
    if not condense:
        meals = meals.reindex(self.index)
    return meals

def pellets(self, cumulative=True, condense=False)

Provide a series containing pellet retrieval information.

Parameters

cumulative : bool, optional: When True (default), the values returned are a cumulative pellet count. When False, the values are binary.
condense : bool, optional: Return only rows corresponding to pellets. The default is False. When False, the returned Series will have same length as full FEDFrame.

Returns

y : pandas Series: pandas Series containing pellet retrieval counts/indicators.

Expand source code

def pellets(self, cumulative=True, condense=False):
    '''
    Provide a series containing pellet retrieval information.

    Parameters
    ----------
    cumulative : bool, optional
        When True (default), the values returned are a cumulative pellet count.
        When False, the values are binary.
    condense : bool, optional
        Return only rows corresponding to pellets.
        The default is False.  When False, the returned Series will
        have same length as full FEDFrame.

    Returns
    -------
    y : pandas Series
        pandas Series containing pellet retrieval counts/indicators.
    '''
    if cumulative:
        y = self['Pellet_Count']
        if condense:
            y = _filterout(y, deduplicate=True, dropzero=True)

    else:
        y = self._binary_pellets()
        if condense:
            y = _filterout(y, dropzero=True)

    return y

def pokes(self, kind='any', cumulative=True, condense=False)

Get an array of poke events.

Parameters

kind : str, optional: Key for determining the poke type returned. The default is 'any' (any poke event). Other options are 'left', 'right', 'correct', and 'error'.
cumulative : bool, optional: When True (default), the values returned are a cumulative poke count. When False, the values are binary.
condense : bool, optional: Return only rows corresponding to poke events. The default is False. When False, the returned Series will have same length as full FEDFrame.

Raises

ValueError: Unaccetped key passed to kind.

Returns

y : pandas Series: Pandas Series containing poke counts/indicators.

Expand source code

def pokes(self, kind='any', cumulative=True, condense=False):
    '''
    Get an array of poke events.

    Parameters
    ----------
    kind : str, optional
        Key for determining the poke type returned. The default is 'any'
        (any poke event).  Other options are 'left', 'right', 'correct',
        and 'error'.
    cumulative : bool, optional
        When True (default), the values returned are a cumulative poke count.
        When False, the values are binary.
    condense : bool, optional
        Return only rows corresponding to poke events.
        The default is False.  When False, the returned Series will
        have same length as full FEDFrame.

    Raises
    ------
    ValueError
        Unaccetped key passed to `kind`.

    Returns
    -------
    y : pandas Series
        Pandas Series containing poke counts/indicators.

    '''

    kind = kind.lower()
    kinds = ['left', 'right', 'any', 'correct', 'error']
    if kind not in kinds:
        raise ValueError(f'`kind` must be one of  {kinds}, not {kind}')

    if cumulative:
        y = self._cumulative_pokes(kind)
        if condense:
            y = _filterout(y, deduplicate=True, dropzero=True)

    else:
        y = self._binary_pokes(kind)
        if condense:
            y = _filterout(y, dropzero=True)

    return y

def reassign_events(self, include_side=True)

Run an intitial assignment or reassignment of the "Event" column.

Parameters

include_side : bool, optional: Label poke events with "Left" and "Right" instead of "Poke". The default is True.

Returns

None.

Expand source code

def reassign_events(self, include_side=True):
    '''
    Run an intitial assignment or reassignment of the "Event" column.

    Parameters
    ----------
    include_side : bool, optional
        Label poke events with "Left" and "Right" instead of "Poke".
        The default is True.

    Returns
    -------
    None.

    '''
    if include_side:
        events = pd.Series(np.nan, index=self.index)
        events.loc[self._binary_pellets().astype(bool)] = 'Pellet'
        events.loc[self._binary_pokes('left').astype(bool)] = 'Left'
        events.loc[self._binary_pokes('right').astype(bool)] = 'Right'
    else:
        events = np.where(self._binary_pellets(), 'Pellet', 'Poke')
    self['Event'] = events

def reset_cumulative_column(self, column)

Reset a cumulative column (usually Left_Poke_Count, Right_Poke_Count, or Pellet_Count) to be ascending integers. This may be useful when other operations cause rows to be removed.

Parameters

column : str: String column name

Returns

None.

Expand source code

def reset_cumulative_column(self, column):
    '''
    Reset a cumulative column (usually Left_Poke_Count, Right_Poke_Count,
    or Pellet_Count) to be ascending integers.  This may be useful
    when other operations cause rows to be removed.

    Parameters
    ----------
    column : str
        String column name

    Returns
    -------
    None.

    '''
    reset, _ = pd.factorize(self[column])
    reset += self[column].iloc[0]
    self[column] = reset

def set_alignment(self, alignment, inplace=True)

Shift the timestamps of a FEDFrame to allow for comparisons with other data recorded at different times.

This is particularly intended for plotting with fed3.plot. By default, fed3 will plot fed3 data over the timestamps they were recorded. For temporal plots (with time on the x-axis), this disallows combination (e.g. averaging) of data recorded on different dates. To combine these sorts of data, this function will shift the timestamps FEDFrames to a common time.

There are three options for temporal alignment, 'datetime', 'time', and 'elapsed'. Note that these are the equivalents of 'shared date & time', 'shared time', and 'elapsed time' from FED3_Viz.

'datetime': Use the original recorded timestamps for plotting. This is the default behavior for plotting. This is generally useful when all your data were collected at the same time, when you want to show exactly when data were recorded, or when working with plots where the time of recording does not matter.
'time': Shift the timestamps so that they have the same start date, but preserved time of day information. This is useful for when you want to compare or average data recorded on different dates, but want to preserve circadian patterns.
'elapsed': Shift the timestamps such that the first recorded timestamp is equal to a single, shared date. This is useful for comparing data relative to the initiation of the recording, and you do not need to preserve circadian information.

Note that for 'elapsed' and 'time' alignment, the common date is set by the ZERO_DATE variable in this module.

Parameters

alignment : str, 'datetime', 'time', or 'elapsed': Option for temporal alignment. See above for more information.
inplace : bool, optional: When True, the current FEDFrame is modified. Else, a copy is returned with the new alignment.

Raises

ValueError: Option for alignment not recognized.

Returns

newfed : FEDFrame: FED3 data with new alignment.

Expand source code

def set_alignment(self, alignment, inplace=True):
    '''
    Shift the timestamps of a FEDFrame to allow for comparisons with other data
    recorded at different times.

    This is particularly intended for plotting with `fed3.plot`.  By default,
    fed3 will plot fed3 data over the timestamps they were recorded.  For
    temporal plots (with time on the x-axis), this disallows combination
    (e.g. averaging) of data recorded on different dates.  To combine
    these sorts of data, this function will shift the timestamps FEDFrames
    to a common time.

    There are three options for temporal alignment, 'datetime', 'time',
    and 'elapsed'.  Note that these are the equivalents of 'shared date & time',
    'shared time', and 'elapsed time' from FED3_Viz.

    - 'datetime': Use the original recorded timestamps for plotting.  This is
    the default behavior for plotting.  This is generally useful when
    all your data were collected at the same time, when you want to show
    exactly when data were recorded, or when working with plots where
    the time of recording does not matter.
    - 'time': Shift the timestamps so that they have the same start date,
    but preserved time of day information.  This is useful for when you
    want to compare or average data recorded on different dates, but want
    to preserve circadian patterns.
    - 'elapsed': Shift the timestamps such that the first recorded timestamp
    is equal to a single, shared date.  This is useful for comparing data
    relative to the initiation of the recording, and you do not need
    to preserve circadian information.

    Note that for 'elapsed' and 'time' alignment, the common date is set
    by the `ZERO_DATE` variable in this module.

    Parameters
    ----------
    alignment : str, 'datetime', 'time', or 'elapsed'
        Option for temporal alignment.  See above for more information.
    inplace : bool, optional
        When True, the current FEDFrame is modified.  Else, a copy is
        returned with the new alignment.

    Raises
    ------
    ValueError
        Option for alignment not recognized.

    Returns
    -------
    newfed : fed3.FEDFrame
        FED3 data with new alignment.

    '''
    options = ['datetime', 'time', 'elapsed']

    if alignment not in options:
        raise ValueError(f'`alignment` must be one of {options}, '
                         f'not "{alignment}"')
    if alignment == 'datetime':
        new_diff = self._current_offset
    elif alignment == 'time':
        new_diff = self.index[0].date() - ZERO_DATE.date()
    elif alignment == 'elapsed':
        new_diff = self.index[0] - ZERO_DATE

    newfed = self if inplace else self.copy()
    newfed.index -= new_diff
    newfed._current_offset -= new_diff
    newfed._alignment = alignment

    return newfed