Package fed3
fed3 is a Python package for working with FED3 data.
Expand source code
# -*- coding: utf-8 -*-
'''fed3 is a Python package for working with FED3 data.'''
# set warnings style to remove reprinting of warning
import warnings as __warnings
def __warning_on_one_line(message, category, filename, lineno, file=None, line=None):
return '%s:%s: %s: %s\n' % (filename, lineno, category.__name__, message)
__warnings.formatwarning = __warning_on_one_line
# version
from ._version import v
__version__ = v
del v
#imports for package namespace
from fed3.core import (FEDFrame,
as_aligned,
can_concat,
concat,
load,
split,
timecrop)
from fed3.examples import list_examples, load_examples
from fed3.lightcycle import set_lightcycle
from fed3.metrics import get_metric, list_metrics
__all__ = [
'FEDFrame',
'as_aligned',
'can_concat',
'concat',
'load',
'split',
'timecrop',
'list_examples',
'load_examples',
'set_lightcycle',
'get_metric',
'list_metrics'
]
Sub-modules
fed3.core
-
This packge defines the major FEDFrame class (
FEDFrame
) for representing fed3 data. It is a subclass of pandas DataFrame. Other … fed3.examples
-
This package provides example data for fed3. The package comes bundled with some CSV files of FED3 data. These can be loaded via fed3, returning …
fed3.lightcycle
-
General code for helping with the light cycle for FED3 data.
fed3.metrics
-
This packages defines functions for extracting temporal variables from FED3Frames, and tools for collecting those variables into tables. This is …
fed3.plot
-
Plotting with fed3 …
Functions
def as_aligned(feds, alignment, inplace=False)
-
Helper function for setting the alignment of one or more FEDFrames. See
FEDFrame.set_alignment()
for more information.Parameters
feds
:FEDFrame
orcollection
ofFEDFrames
- FEDFrames to set alignment for
alignment
:'str':
- Alignment string.
inplace
:bool
- When True, the FEDFrames are modified in place; otherwise, new copies are created.
Returns
aligned
orNone
- Either one FEDFrame or a list of FEDFrames with new alignment..
Expand source code
def as_aligned(feds, alignment, inplace=False): ''' Helper function for setting the alignment of one or more FEDFrames. See `fed3.core.fedframe.FEDFrame.set_alignment()` for more information. Parameters ---------- feds : FEDFrame or collection of FEDFrames FEDFrames to set alignment for alignment: 'str': Alignment string. inplace : bool When True, the FEDFrames are modified in place; otherwise, new copies are created. Returns ------- aligned or None Either one FEDFrame or a list of FEDFrames with new alignment.. ''' if isinstance(feds, FEDFrame): aligned = feds.set_alignment(alignment, inplace=inplace) else: aligned = [f.set_alignment(alignment) for f in feds] return aligned
def can_concat(feds)
-
Determines whether or not FEDFrames can be concatenated, (based on whether their start and end times overlap).
Parameters
feds
:array
- an array of FEDFrames
Returns
bool
Expand source code
def can_concat(feds): """ Determines whether or not FEDFrames can be concatenated, (based on whether their start and end times overlap). Parameters ---------- feds : array an array of FEDFrames Returns ------- bool """ sorted_feds = sorted(feds, key=lambda x: x.start_time) for i, file in enumerate(sorted_feds[1:], start=1): if file.start_time <= sorted_feds[i-1].end_time: return False return True
def concat(feds, name=None, add_concat_number=True, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'))
-
Concatenated FED3 data in time.
Parameters
feds
:collection
ofFEDFrame objects
- List or other collection of FEDFrame
name
:str
, optional- Name to give the new FEDFrame with concatenated data. The default is None, in which case the name of the first FEDFrame is used.
add_concat_number
:bool
, optional- Adds a column keeping record of the concatenation. The default is True.
reset_columns
:list-like
, optional- Columns whose counts should be modified in order to preserve counts
across the concatenated data.
The default is
('Pellet_Count', 'Left_Poke_Count','Right_Poke_Count')
.
Raises
ValueError
- Cannot concatenated FED data when the timestamps are overlapping.
Returns
newfed
:FEDFrame
- New FEDFrame object with concatenated data.
Expand source code
def concat(feds, name=None, add_concat_number=True, reset_columns=('Pellet_Count', 'Left_Poke_Count','Right_Poke_Count')): ''' Concatenated FED3 data in time. Parameters ---------- feds : collection of FEDFrame objects List or other collection of FEDFrame name : str, optional Name to give the new FEDFrame with concatenated data. The default is None, in which case the name of the first FEDFrame is used. add_concat_number : bool, optional Adds a column keeping record of the concatenation. The default is True. reset_columns : list-like, optional Columns whose counts should be modified in order to preserve counts across the concatenated data. The default is `('Pellet_Count', 'Left_Poke_Count','Right_Poke_Count')`. Raises ------ ValueError Cannot concatenated FED data when the timestamps are overlapping. Returns ------- newfed : fed3.FEDFrame New FEDFrame object with concatenated data. ''' if name is None: name = feds[0].name if not can_concat(feds): raise ValueError('FEDFrame dates overlap, cannot concat.') output=[] offsets = {} sorted_feds = sorted(feds, key=lambda x: x.start_time) for i, fed in enumerate(sorted_feds): df = fed.copy() if add_concat_number: df['Concat_#'] = i if i==0: for col in reset_columns: if col in df.columns: offsets[col] = df[col].max() else: for col, offset in offsets.items(): df[col] += offset offsets[col] = df[col].max() output.append(df) newfed = pd.concat(output) newfed._load_init(name=name) return newfed
def get_metric(y)
-
Return a metric function from its key.
Parameters
y
:str
- Key for metric.
Raises
KeyError
- Metric key not recognized.
Returns
namedtuple
- Named tuple with a
func
andnicename
attribute. Thefunc
is the actual metric function, which can be called on FEDFrames. Thenicename
is a nicer version of the key, used for axis labels.
Expand source code
def get_metric(y): ''' Return a metric function from its key. Parameters ---------- y : str Key for metric. Raises ------ KeyError Metric key not recognized. Returns ------- namedtuple Named tuple with a `func` and `nicename` attribute. The `func` is the actual metric function, which can be called on FEDFrames. The `nicename` is a nicer version of the key, used for axis labels. ''' key = y.lower() try: return METRICS[key] except KeyError: metrics = ', '.join(f"'{m}'" for m in METRICS.keys()) raise ValueError(f'Metric key "{y}" is not recognized. Possible metrics are: ' f'{metrics}.')
def list_examples()
-
List all the available example data sets - specifically the string keys which can be provided to
load_examples()
.Returns
list
- All avaiable keys.
Expand source code
def list_examples(): ''' List all the available example data sets - specifically the string keys which can be provided to `load_examples()`. Returns ------- list All avaiable keys. ''' examples = [] for folder in sorted(os.listdir(DATADIR)): fullfolder = os.path.join(DATADIR, folder) if not os.path.isdir(fullfolder): continue; examples.append(folder) return examples
def list_metrics()
-
List all available metric keys.
Returns
list
Expand source code
def list_metrics(): ''' List all available metric keys. Returns ------- list ''' return list(METRICS.keys())
def load(path, index_col='MM:DD:YYYY hh:mm:ss', dropna=True, deduplicate_index=None, offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'))
-
Load FED3 data from a CSV/Excel file. This is the typical recommended way for importing FED3 data. Relies mostly on
pandas.read_csv()
andpandas.read_excel()
for the parsing.Parameters
path
:str
- System path to FED3 data file.
index_col
:str
, optional- Timestamp column to use as index. The default is 'MM:DD:YYYY hh:mm:ss'.
dropna
:bool
, optional- Remove all empty rows. The default is True.
deduplicate_index
,offset
,reset_counts
,reset_columns
:optional
- Arguments passed to
FEDFrame.deduplicate_index()
, used to remove duplicate timestamps as the data are loaded.
Returns
f
:FEDFrame
- New FEDFrame object.
Expand source code
def load(path, index_col='MM:DD:YYYY hh:mm:ss', dropna=True, deduplicate_index=None, offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')): ''' Load FED3 data from a CSV/Excel file. This is the typical recommended way for importing FED3 data. Relies mostly on `pandas.read_csv()` and `pandas.read_excel()` for the parsing. Parameters ---------- path : str System path to FED3 data file. index_col : str, optional Timestamp column to use as index. The default is 'MM:DD:YYYY hh:mm:ss'. dropna : bool, optional Remove all empty rows. The default is True. deduplicate_index, offset, reset_counts, reset_columns: optional Arguments passed to `fed3.FEDFrame.deduplicate_index()`, used to remove duplicate timestamps as the data are loaded. Returns ------- f : fed3.FEDFrame New FEDFrame object. ''' # read the path name, ext = os.path.splitext(path) ext = ext.lower() read_opts = {'.csv':pd.read_csv, '.xlsx':pd.read_excel} func = read_opts[ext] feddata = func(path, parse_dates=True, index_col=index_col) if dropna: feddata = feddata.dropna(how='all') name = os.path.basename(name) f = FEDFrame(feddata) f._load_init(name=name, path=path, deduplicate_index=deduplicate_index, offset=offset, reset_counts=reset_counts, reset_columns=reset_columns) return f
def load_examples(key, verbose=False, deduplicate_index=None, offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'))
-
Load the example data linked to a given key.
Parameters
key
:str
- Example to load.
verbose
:bool
- Print status while loading
deduplicate_index
,offset
,reset_counts
,reset_columns
:optional
- Arguments passed to
FEDFrame.deduplicate_index()
, used to remove duplicate timestamps as the data are loaded.
Raises
KeyError
- Unrecognized key.
Returns
list
- FED3 example data, as a list of FEDFrame objects.
Expand source code
def load_examples(key, verbose=False, deduplicate_index=None, offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')): ''' Load the example data linked to a given key. Parameters ---------- key : str Example to load. verbose : bool Print status while loading deduplicate_index, offset, reset_counts, reset_columns: optional Arguments passed to `fed3.FEDFrame.deduplicate_index()`, used to remove duplicate timestamps as the data are loaded. Raises ------ KeyError Unrecognized key. Returns ------- list FED3 example data, as a list of FEDFrame objects. ''' vprint = print if verbose else lambda *args, **kwargs: None example_path = os.path.join(DATADIR, key) examples = [] vprint() vprint(f'Loading from data directory: {DATADIR}') vprint() vprint(f'Example folder: {example_path}') vprint() for file in sorted(os.listdir(example_path)): name, ext = os.path.splitext(file) if ext.lower() not in ['.csv', '.xlsx']: continue; vprint(f' - {file}...') fullfile = os.path.join(example_path, file) f = load(path=fullfile, deduplicate_index=deduplicate_index, offset=offset, reset_counts=reset_counts, reset_columns=reset_columns) examples.append(f) return examples
def set_lightcycle(on_hour, off_hour, on_minute=0, off_minute=0)
-
Set the light cycle. This affects shading on plots and operations which group data based on the light cycle.
Parameters
on_hour
:int
- Integer indicating the hour of day when lights turn on, in [0-24).
off_hour
:int
- Integer indicating the hour of day when lights turn off, in [0-24).
on_minute
:int
, optional- Minute of the
on_hour
where lights turn on, in [0-60). off_minute
:int
, optional- Minute of the
off_hour
where lights turn off, in [0-60).
Returns
None.
Expand source code
def set_lightcycle(on_hour, off_hour, on_minute=0, off_minute=0): ''' Set the light cycle. This affects shading on plots and operations which group data based on the light cycle. Parameters ---------- on_hour : int Integer indicating the hour of day when lights turn on, in [0-24). off_hour : int Integer indicating the hour of day when lights turn off, in [0-24). on_minute : int, optional Minute of the `on_hour` where lights turn on, in [0-60). off_minute : int, optional Minute of the `off_hour` where lights turn off, in [0-60). Returns ------- None. ''' LIGHTCYCLE['on'] = dt.time(hour=on_hour, minute=on_minute) LIGHTCYCLE['off'] = dt.time(hour=off_hour, minute=off_minute)
def split(fed, dates, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'), return_empty=False, tag_name=True)
-
Split one FEDFrame into a multiple based on one or more dates.
Parameters
fed
:FEDFrame
- FED3 data.
dates
:datetime string
ordatetime object,
orlist-like
ofsuch
- Timestamp(s) to split the data on.
reset_columns
:list-like
, optional- Columns whose cumulative totals should be reset when splitting the data. The default is ('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count').
return_empty
:bool
, optional- Return empty FEDFrames created from splitting. The default is False.
tag_name
:bool
, optional- Add a
'_#'
tag to the name of each new FEDFrame. The default is True.
Returns
output
:list
- List of FED3 objects created by split.
Expand source code
def split(fed, dates, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'), return_empty=False, tag_name=True): ''' Split one FEDFrame into a multiple based on one or more dates. Parameters ---------- fed : fed3.FEDFrame FED3 data. dates : datetime string or datetime object, or list-like of such Timestamp(s) to split the data on. reset_columns : list-like, optional Columns whose cumulative totals should be reset when splitting the data. The default is ('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'). return_empty : bool, optional Return empty FEDFrames created from splitting. The default is False. tag_name : bool, optional Add a `'_#'` tag to the name of each new FEDFrame. The default is True. Returns ------- output : list List of FED3 objects created by split. ''' dates = _split_handle_dates(dates) output = [] offsets = {col: 0 for col in reset_columns} og_name = fed.name for i in range(len(dates[:-1])): start = dates[i] end = dates[i+1] subset = fed[(fed.index >= start) & (fed.index < end)].copy() if tag_name: subset.name = f"{og_name}_{i}" if not return_empty and subset.empty: continue if offsets: for col in reset_columns: subset[col] -= offsets[col] offsets[col] = subset[col].max() output.append(subset) return output
def timecrop(fed, start, end, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'), name=None)
-
Return a new FEDFrame cropped in time to only include data between two dates.
Parameters
fed
:FEDFrame
- FED3 data.
start
:datetime str
orobject
- Time to start including data (inclusive).
end
:datetime str
orobject
- Time to stop including data (exclusive).
reset_columns
:list-like
, optional- Columns whose cumulative totals should be reset when cropping the data. The default is ('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count').
name
:str
, optional- Name for the new FEDFrame produced. The default is None.
Returns
newfed
:FEDFrame
- New FEDFrame object after filtering.
Expand source code
def timecrop(fed, start, end, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'), name=None): ''' Return a new FEDFrame cropped in time to only include data between two dates. Parameters ---------- fed : fed3.FEDFrame FED3 data. start : datetime str or object Time to start including data (inclusive). end : datetime str or object Time to stop including data (exclusive). reset_columns : list-like, optional Columns whose cumulative totals should be reset when cropping the data. The default is ('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'). name : str, optional Name for the new FEDFrame produced. The default is None. Returns ------- newfed : fed3.FEDFrame New FEDFrame object after filtering. ''' prior = fed[(fed.index < start)] newfed = fed[(fed.index >= start) & (fed.index < end)].copy() for col in reset_columns: if not prior.empty: newfed[col] -= prior[col].max() if name is not None: newfed.name = name return newfed
Classes
class FEDFrame (data=None, index: Axes | None = None, columns: Axes | None = None, dtype: Dtype | None = None, copy: bool | None = None)
-
The main object interface for FED3 data in the fed3 library. Provides a 2D table for storing FED3 data.
FEDFrame is a subclass of the DataFrame, which allows for the highly-developed data manipulation operations provided by pandas. Most things you can do with a pandas DataFrame can also be done with a FEDFrame.
Note there is no equivalent of the pandas Series which is specific to FEDs.
FEDFrame provides additional attributes and methods which are specific to FED3 data. See additional documentation for these below.
Most of the time, FED3 data will be accessed directly from the logged CSV files. In this case, using the FEDFrame constructor is not recommended; you should instead use
load()
. But if for some reason you already have FED3 data loaded into a pandas DataFrame, you can make use of the constructor and theFEDFrame._load_init()
function to get full FEDFrame functionality.Other links:
- pandas: https://pandas.pydata.org/docs/index.html
- pandas DataFrame: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html
- Subclassing pandas: https://pandas.pydata.org/docs/development/extending.html
Expand source code
class FEDFrame(pd.DataFrame): '''The main object interface for FED3 data in the fed3 library. Provides a 2D table for storing FED3 data. FEDFrame is a subclass of the DataFrame, which allows for the highly-developed data manipulation operations provided by pandas. Most things you can do with a pandas DataFrame can also be done with a FEDFrame. Note there is no equivalent of the pandas Series which is specific to FEDs. FEDFrame provides additional attributes and methods which are specific to FED3 data. See additional documentation for these below. Most of the time, FED3 data will be accessed directly from the logged CSV files. In this case, using the FEDFrame constructor is not recommended; you should instead use `fed3.core.load()`. But if for some reason you already have FED3 data loaded into a pandas DataFrame, you can make use of the constructor and the `fed3.core.fedframe.FEDFrame._load_init()` function to get full FEDFrame functionality. Other links: - pandas: https://pandas.pydata.org/docs/index.html - pandas DataFrame: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html - Subclassing pandas: https://pandas.pydata.org/docs/development/extending.html''' # ---- Class variables _metadata = ['name', 'path', 'foreign_columns', 'missing_columns', '_alignment', '_current_offset'] LR_POKE_METHOD_OPTIONS = ('from_columns', 'from_events') LR_POKE_METHOD = 'from_columns' L_POKE_EVENTS = ['Left', 'LeftShort', 'LeftWithPellet', 'LeftinTimeout', 'LeftDuringDispense'] R_POKE_EVENTS = ['Right', 'RightShort', 'RightWithPellet', 'RightinTimeout', 'RightDuringDispense'] # ---- Properties @property def _constructor(self): '''Maintains the FEDFrame type for derivates created from self. See https://pandas.pydata.org/docs/development/extending.html''' return FEDFrame @property def duration(self): """Time delta of last timestamp and first timestamp.""" return self.end_time-self.start_time @property def end_time(self): """Last timestamp in file.""" return pd.Timestamp(self.index.values[-1]) @property def events(self): '''Number of logged events (i.e. rows).''' return len(self.data) @property def fedmode(self): '''FED3 operating mode for this data.''' return self.determine_mode() @property def start_time(self): '''First timestamp in file.''' return pd.Timestamp(self.index.values[0]) # ---- "Private" def _binary_correct_pokes(self): l = self._binary_pokes('left') r = self._binary_pokes('right') active_l = self['Active_Poke'] == 'Left' active_r = self['Active_Poke'] == 'Right' correct = ((l * active_l).astype(int) | (r * active_r).astype(int)) return correct def _binary_error_pokes(self): l = self._binary_pokes('left') r = self._binary_pokes('right') active_l = self['Active_Poke'] == 'Left' active_r = self['Active_Poke'] == 'Right' error = ((l * active_r).astype(int) | (r * active_l).astype(int)) return error def _binary_pellets(self): bp = self['Pellet_Count'].diff().copy() if not bp.empty: bp.iloc[0] = int(self._first_event_type() == 'pellet') return bp def _binary_poke_for_side(self, side): if self.LR_POKE_METHOD == 'from_columns': col = {'left': 'Left_Poke_Count', 'right': 'Right_Poke_Count'}[side] bp = self[col].diff().copy() if not bp.empty: bp.iloc[0] = int(self._first_event_type() == side) elif self.LR_POKE_METHOD == 'from_events': search = {'left': self.L_POKE_EVENTS, 'right': self.R_POKE_EVENTS}[side] bp = self['Event'].isin(search).astype(int) else: raise ValueError(f'"{self.LR_POKE_METHOD}" is not recognized for ' f'FEDFrame.LR_POKE_METHOD. Should be one of ' f'{FEDFrame.LR_POKE_METHOD_OPTIONS}.') return bp def _binary_pokes(self, kind='any'): kind = kind.lower() kinds = ['left', 'right', 'any', 'correct', 'error'] if kind not in kinds: raise ValueError(f'`kind` must be one of {kinds}, not {kind}') if kind == 'any': l = self._binary_poke_for_side('left') r = self._binary_poke_for_side('right') bp = ((l == 1) | (r==1)).astype(int) elif kind in ['left', 'right']: bp = self._binary_poke_for_side(kind).astype(int) elif kind in ['correct', 'error']: bp = self._binary_correct_pokes() if kind == 'correct' else self._binary_error_pokes() return bp def _cumulative_poke_for_side(self, side): if self.LR_POKE_METHOD == 'from_columns': col = {'left': 'Left_Poke_Count', 'right': 'Right_Poke_Count'}[side] cp = self[col] elif self.LR_POKE_METHOD == 'from_events': search = {'left': self.L_POKE_EVENTS, 'right': self.R_POKE_EVENTS}[side] cp = self['Event'].isin(search).cumsum().astype(int) else: raise ValueError(f'"{self.LR_POKE_METHOD}" is not recognized for ' f'FEDFrame.LR_POKE_METHOD. Should be one of ' f'{FEDFrame.LR_POKE_METHOD_OPTIONS}.') return cp def _cumulative_pokes(self, kind='any'): kind = kind.lower() kinds = ['left', 'right', 'any', 'correct', 'error'] if kind not in kinds: raise ValueError(f'`kind` must be one of {kinds}, not {kind}') if kind == 'any': l = self._cumulative_poke_for_side('left') r = self._cumulative_poke_for_side('right') cp = (l + r).astype(int) elif kind in ['left', 'right']: cp = self._cumulative_poke_for_side(kind).astype(int) elif kind in ['correct', 'error']: bp = self._binary_correct_pokes() if kind == 'correct' else self._binary_error_pokes() cp = bp.cumsum() return cp def _first_event_type(self): ''' Get the type of event for the first entry. Special case implementation of `event_type()`. Returns either "pellet", "left", "right", or "unknown". ''' ts = self.index[0] left = self.loc[ts, 'Left_Poke_Count'] == 1 right = self.loc[ts, 'Right_Poke_Count'] == 1 pellet = self.loc[ts, 'Pellet_Count'] == 1 if sum([left, right, pellet]) != 1: return 'unknown' ans = ['left', 'right', 'pellet'][[left, right, pellet].index(True)] return ans def _fix_column_names(self): ''' Checks for and fixes old FED3 issue where column names included a trailing/starting space. Returns ------- None. ''' self.foreign_columns = [] for col in self.columns: for fix in FIXED_COLS: likeness = SequenceMatcher(a=col, b=fix).ratio() if likeness > 0.85: self.rename(columns={col:fix}, inplace=True) break self.foreign_columns.append(col) self.missing_columns = [col for col in NEEDED_COLS if col not in self.columns] def _handle_retrieval_time(self): if 'Retrieval_Time' not in self.columns: return self['Retrieval_Time'] = pd.to_numeric(self['Retrieval_Time'], errors='coerce') def _load_init(self, name=None, path=None, deduplicate_index=None, offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')): ''' Initialize FEDFrame attributes and apply some data cleaning. This method is marked "private" because it is typically invoked automatically when loading data from local files. The only use case is when you have existing pandas data which you want to convert into FEDFrame data. The following demonstrates this use case, but note that **it is not recommended** (use `fed3.core.load()` instead): ```python import fed3 import pandas as pd data = pd.read_csv("/some/file.csv") data = fed3.FEDFrame(data) # do the following to get full functionality data._load_init() ``` Parameters ---------- name : str, optional Name to give the FEDFrame. The default is None. path : str, optional Set a local data path for the data. The default is None. deduplicate_index, offset, reset_counts, reset_columns: optional Arguments passed to `fed3.FEDFrame.deduplicate_index()`, used to remove duplicate timestamps as the data are loaded. Returns ------- None. ''' self.name = name self.path = path self._fix_column_names() self._handle_retrieval_time() self._alignment = 'datetime' self._current_offset = pd.Timedelta(0) if deduplicate_index is not None: self.deduplicate_index(method=deduplicate_index, offset=offset, reset_counts=reset_counts, reset_columns=reset_columns) if self.check_duplicated_index(): warnings.warn("Index has duplicate values, which may prevent some " "fed3 operations. Use the deuplicate_index() method " "to remove duplicate timestamps.", RuntimeWarning) # ---- Public def check_duplicated_index(self): ''' Checks if the data has duplicated timestamps. Returns ------- bool `True` if duplicates found, else `False`. ''' return self.index.duplicated().any() def deduplicate_index(self, method='keep_first', offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')): ''' Apply a method to remove duplicate timestamps from the data. With FEDFrames, the timestamp column (typically the column 'MM:DD:YYYY hh:mm:ss') is used as the row labels (AKA the `index` in pandas, [see here](https://pandas.pydata.org/docs/reference/api/pandas.Index.html)). Some operations which invole selecting data based on this index can fail when there are duplicate entries. FED3 data should not generally have duplicated timestamps, however they do arise due to two main causes. One cause is transient logging errors, which should be rare (but may be more common with earlier FED software). The other, more common, cause is data editing with Microsoft Excel, which has been documented elsewhere (see links below). Note that this method does not recover any information that is lost; it either removes duplicated indices or applies simple rules to alter them. When available, non-duplicated data should be loaded into the program. fed3 should raise a warning when files with duplicated timestamps are loaded. The status of the index can also be checked with `check_duplicated_index()`. Parameters ---------- method : str, optional Method for removing duplicates. Options are: - `'keep_first'`: keep only the first instance of each duplicate set (default) - `'keep_last'`: keep only the last instance of each duplicate set - `'remove'`: delete any rows with duplicate timestamps - `'offset'`: add a small time offset to each date - does so iteratively until the index is not duplicated. - `'interpolate'`: offset duplicates such that they are spaced evenly between their value and the next timestamp in the series Note that `'interpolate'` and `'offset'` should preserve the length of the FEDFrame, while other options can reduce it. offset : str, optional Pandas time offset string, only used when `method='offset'`. The default is `'1S'`. reset_counts : bool, optional Reset columns cumulative columns that may be altered as a result of removing rows. The default is False. Note that this alters the number of pokes/pellets! reset_columns: bool, optional Column names to reset when `reset_counts` is True. Raises ------ ValueError Trying to use `'interpolate'` when the last timestamp is duplicated. Returns ------- None. Data is modified in place. ''' if method not in ['keep_first', 'keep_last', 'remove', 'offset', 'interpolate']: raise ValueError(f'`method` must be one of {method}, not "{method}"') if method == 'keep_first': mask = ~ self.index.duplicated(keep='first') self.query('@mask', inplace=True) elif method == 'keep_last': mask = ~ self.index.duplicated(keep='last') self.query('@mask', inplace=True) elif method == 'remove': mask = ~ self.index.duplicated(keep=False) self.query('@mask', inplace=True) elif method == 'offset': dt = pd.to_timedelta(offset) while self.check_duplicated_index(): self.index = np.where(self.index.duplicated(), self.index + dt, self.index) elif method == 'interpolate': if self.index.duplicated()[-1]: raise ValueError("Cannot interpolate when the last " "timestamp is duplicated; try a different " "deduplication method.") t0 = self.index[0] s = pd.Series(self.index) s[s.duplicated()] = None self.index = t0 + pd.to_timedelta((s - t0).dt.total_seconds().interpolate(), unit='seconds') # column resetting if reset_counts: for column in reset_columns: self.reset_cumulative_column(column) def determine_mode(self): ''' Return the recording mode of the current FED data. This function tries to take this literally from the data headers. There are likely to be problems for custom programs or particular FED software versions. Returns ------- mode : str String indicating the mode. ''' mode = 'Unknown' column = pd.Series(dtype=object) for col in ['FR','FR_Ratio',' FR_Ratio','Mode','Session_Type']: if col in self.columns: column = self[col] if not column.empty: if all(isinstance(i,int) for i in column): if len(set(column)) == 1: mode = 'FR' + str(column[0]) else: mode = 'PR' elif 'PR' in column[0]: mode = 'PR' else: mode = str(column[0]) return mode def event_type(self, timestamp): ''' Return the type of a given timestamp within the data (pellet or poke). TODO Currently, this only reads the Event column, and throws an error if not present. In the future, this may implement more logic to determine the type of an event. Parameters ---------- timestamp : str, `pandas.Timestamp` timestamp to query. Raises ------ Exception Fails when the Event column isn't present. Returns ------- str Event type for queried timestamp. ''' if 'Event' in self.columns: return self.loc[timestamp, 'Event'] else: raise Exception('Missing "Event" column.') def interpellet_intervals(self, check_concat=True, condense=False): ''' Calculate the interpellet intervals for each pellet event. This is the time (in minutes) since the last pellet was retrieved. Note that there is a shortcut for this method: `ipi`. Parameters ---------- check_concat : bool, optional Removes IPIs when they are identified as coming directly after data concatenation. The default is True. This will only work when data were concatenated with fed3. condense : bool, optional Return only rows where there are interpellet intervals. The default is False. When False, the returned Series will have same length as full FEDFrame. Returns ------- interpellet : pandas.Series Pandas Series containing the interpellet intervals. ''' bp = self._binary_pellets() bp = bp[bp == 1] diff = bp.index.to_series().diff().dt.total_seconds() / 60 interpellet = pd.Series(np.nan, index = self.index) interpellet.loc[diff.index] = diff if check_concat and 'Concat_#' in self.columns: #this can't do duplicate indexes if not any(self.index.duplicated()): #thanks to this answer https://stackoverflow.com/a/47115490/13386979 dropped = interpellet.dropna() pos = dropped.index.to_series().groupby(self['Concat_#']).first() interpellet.loc[pos[1:]] = np.nan if condense: interpellet = interpellet.loc[bp.index] interpellet = _filterout(interpellet, dropna=True) return interpellet def meals(self, pellet_minimum=1, intermeal_interval=1, condense=False): ''' Assign a meal number to each pellet retrieval. Returns a series with those assignments. Parameters to this function determine what constitutes a meal. Assignments are based on interpellet intervals (see `FEDFrame.interpellet_intervals()`). Parameters ---------- pellet_minimum : int, optional Number of pellets required in one meal. The default is 1. For high numbers, some pellets can be unassigned to any meal. intermeal_interval : int, optional Maximum length of time (in minutes) that can pass between any two consecutive pellets assigned to the same meal. The default is 1. condense : False, optional Return only rows where there are meals (i.e. only pellet index). The default is False. When False, the returned Series will have same length as full FEDFrame. Returns ------- meals : pandas.Series pandas Series with labeled meals ''' ipi = self.interpellet_intervals(condense=True) within_interval = ipi < intermeal_interval meals = ((~within_interval).cumsum() + 1) above_min = meals.value_counts().sort_index() >= pellet_minimum replacements = above_min[above_min].cumsum().reindex(above_min.index) meals = meals.map(replacements) if not condense: meals = meals.reindex(self.index) return meals def pellets(self, cumulative=True, condense=False): ''' Provide a series containing pellet retrieval information. Parameters ---------- cumulative : bool, optional When True (default), the values returned are a cumulative pellet count. When False, the values are binary. condense : bool, optional Return only rows corresponding to pellets. The default is False. When False, the returned Series will have same length as full FEDFrame. Returns ------- y : pandas Series pandas Series containing pellet retrieval counts/indicators. ''' if cumulative: y = self['Pellet_Count'] if condense: y = _filterout(y, deduplicate=True, dropzero=True) else: y = self._binary_pellets() if condense: y = _filterout(y, dropzero=True) return y def pokes(self, kind='any', cumulative=True, condense=False): ''' Get an array of poke events. Parameters ---------- kind : str, optional Key for determining the poke type returned. The default is 'any' (any poke event). Other options are 'left', 'right', 'correct', and 'error'. cumulative : bool, optional When True (default), the values returned are a cumulative poke count. When False, the values are binary. condense : bool, optional Return only rows corresponding to poke events. The default is False. When False, the returned Series will have same length as full FEDFrame. Raises ------ ValueError Unaccetped key passed to `kind`. Returns ------- y : pandas Series Pandas Series containing poke counts/indicators. ''' kind = kind.lower() kinds = ['left', 'right', 'any', 'correct', 'error'] if kind not in kinds: raise ValueError(f'`kind` must be one of {kinds}, not {kind}') if cumulative: y = self._cumulative_pokes(kind) if condense: y = _filterout(y, deduplicate=True, dropzero=True) else: y = self._binary_pokes(kind) if condense: y = _filterout(y, dropzero=True) return y def reassign_events(self, include_side=True): ''' Run an intitial assignment or reassignment of the "Event" column. Parameters ---------- include_side : bool, optional Label poke events with "Left" and "Right" instead of "Poke". The default is True. Returns ------- None. ''' if include_side: events = pd.Series(np.nan, index=self.index) events.loc[self._binary_pellets().astype(bool)] = 'Pellet' events.loc[self._binary_pokes('left').astype(bool)] = 'Left' events.loc[self._binary_pokes('right').astype(bool)] = 'Right' else: events = np.where(self._binary_pellets(), 'Pellet', 'Poke') self['Event'] = events def reset_cumulative_column(self, column): ''' Reset a cumulative column (usually Left_Poke_Count, Right_Poke_Count, or Pellet_Count) to be ascending integers. This may be useful when other operations cause rows to be removed. Parameters ---------- column : str String column name Returns ------- None. ''' reset, _ = pd.factorize(self[column]) reset += self[column].iloc[0] self[column] = reset def set_alignment(self, alignment, inplace=True): ''' Shift the timestamps of a FEDFrame to allow for comparisons with other data recorded at different times. This is particularly intended for plotting with `fed3.plot`. By default, fed3 will plot fed3 data over the timestamps they were recorded. For temporal plots (with time on the x-axis), this disallows combination (e.g. averaging) of data recorded on different dates. To combine these sorts of data, this function will shift the timestamps FEDFrames to a common time. There are three options for temporal alignment, 'datetime', 'time', and 'elapsed'. Note that these are the equivalents of 'shared date & time', 'shared time', and 'elapsed time' from FED3_Viz. - 'datetime': Use the original recorded timestamps for plotting. This is the default behavior for plotting. This is generally useful when all your data were collected at the same time, when you want to show exactly when data were recorded, or when working with plots where the time of recording does not matter. - 'time': Shift the timestamps so that they have the same start date, but preserved time of day information. This is useful for when you want to compare or average data recorded on different dates, but want to preserve circadian patterns. - 'elapsed': Shift the timestamps such that the first recorded timestamp is equal to a single, shared date. This is useful for comparing data relative to the initiation of the recording, and you do not need to preserve circadian information. Note that for 'elapsed' and 'time' alignment, the common date is set by the `ZERO_DATE` variable in this module. Parameters ---------- alignment : str, 'datetime', 'time', or 'elapsed' Option for temporal alignment. See above for more information. inplace : bool, optional When True, the current FEDFrame is modified. Else, a copy is returned with the new alignment. Raises ------ ValueError Option for alignment not recognized. Returns ------- newfed : fed3.FEDFrame FED3 data with new alignment. ''' options = ['datetime', 'time', 'elapsed'] if alignment not in options: raise ValueError(f'`alignment` must be one of {options}, ' f'not "{alignment}"') if alignment == 'datetime': new_diff = self._current_offset elif alignment == 'time': new_diff = self.index[0].date() - ZERO_DATE.date() elif alignment == 'elapsed': new_diff = self.index[0] - ZERO_DATE newfed = self if inplace else self.copy() newfed.index -= new_diff newfed._current_offset -= new_diff newfed._alignment = alignment return newfed # ---- Aliases ipi = interpellet_intervals
Ancestors
- pandas.core.frame.DataFrame
- pandas.core.generic.NDFrame
- pandas.core.base.PandasObject
- pandas.core.accessor.DirNamesMixin
- pandas.core.indexing.IndexingMixin
- pandas.core.arraylike.OpsMixin
Class variables
var LR_POKE_METHOD
var LR_POKE_METHOD_OPTIONS
var L_POKE_EVENTS
var R_POKE_EVENTS
Instance variables
var columns
-
The column labels of the DataFrame.
var duration
-
Time delta of last timestamp and first timestamp.
Expand source code
@property def duration(self): """Time delta of last timestamp and first timestamp.""" return self.end_time-self.start_time
var end_time
-
Last timestamp in file.
Expand source code
@property def end_time(self): """Last timestamp in file.""" return pd.Timestamp(self.index.values[-1])
var events
-
Number of logged events (i.e. rows).
Expand source code
@property def events(self): '''Number of logged events (i.e. rows).''' return len(self.data)
var fedmode
-
FED3 operating mode for this data.
Expand source code
@property def fedmode(self): '''FED3 operating mode for this data.''' return self.determine_mode()
var index
-
The index (row labels) of the DataFrame.
var start_time
-
First timestamp in file.
Expand source code
@property def start_time(self): '''First timestamp in file.''' return pd.Timestamp(self.index.values[0])
Methods
def check_duplicated_index(self)
-
Checks if the data has duplicated timestamps.
Returns
bool
True
if duplicates found, elseFalse
.
Expand source code
def check_duplicated_index(self): ''' Checks if the data has duplicated timestamps. Returns ------- bool `True` if duplicates found, else `False`. ''' return self.index.duplicated().any()
def deduplicate_index(self, method='keep_first', offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count'))
-
Apply a method to remove duplicate timestamps from the data.
With FEDFrames, the timestamp column (typically the column 'MM:DD:YYYY hh:mm:ss') is used as the row labels (AKA the
index
in pandas, see here). Some operations which invole selecting data based on this index can fail when there are duplicate entries.FED3 data should not generally have duplicated timestamps, however they do arise due to two main causes. One cause is transient logging errors, which should be rare (but may be more common with earlier FED software). The other, more common, cause is data editing with Microsoft Excel, which has been documented elsewhere (see links below).
Note that this method does not recover any information that is lost; it either removes duplicated indices or applies simple rules to alter them. When available, non-duplicated data should be loaded into the program.
fed3 should raise a warning when files with duplicated timestamps are loaded. The status of the index can also be checked with
check_duplicated_index()
.Parameters
method
:str
, optional-
Method for removing duplicates. Options are:
'keep_first'
: keep only the first instance of each duplicate set (default)'keep_last'
: keep only the last instance of each duplicate set'remove'
: delete any rows with duplicate timestamps'offset'
: add a small time offset to each date - does so iteratively until the index is not duplicated.'interpolate'
: offset duplicates such that they are spaced evenly between their value and the next timestamp in the series
Note that
'interpolate'
and'offset'
should preserve the length of the FEDFrame, while other options can reduce it. offset
:str
, optional- Pandas time offset string, only used when
method='offset'
. The default is'1S'
. reset_counts
:bool
, optional- Reset columns cumulative columns that may be altered as a result of removing rows. The default is False. Note that this alters the number of pokes/pellets!
reset_columns
:bool
, optional- Column names to reset when
reset_counts
is True.
Raises
ValueError
- Trying to use
'interpolate'
when the last timestamp is duplicated.
Returns
None. Data is modified in place.
Expand source code
def deduplicate_index(self, method='keep_first', offset='1S', reset_counts=False, reset_columns=('Pellet_Count', 'Left_Poke_Count', 'Right_Poke_Count')): ''' Apply a method to remove duplicate timestamps from the data. With FEDFrames, the timestamp column (typically the column 'MM:DD:YYYY hh:mm:ss') is used as the row labels (AKA the `index` in pandas, [see here](https://pandas.pydata.org/docs/reference/api/pandas.Index.html)). Some operations which invole selecting data based on this index can fail when there are duplicate entries. FED3 data should not generally have duplicated timestamps, however they do arise due to two main causes. One cause is transient logging errors, which should be rare (but may be more common with earlier FED software). The other, more common, cause is data editing with Microsoft Excel, which has been documented elsewhere (see links below). Note that this method does not recover any information that is lost; it either removes duplicated indices or applies simple rules to alter them. When available, non-duplicated data should be loaded into the program. fed3 should raise a warning when files with duplicated timestamps are loaded. The status of the index can also be checked with `check_duplicated_index()`. Parameters ---------- method : str, optional Method for removing duplicates. Options are: - `'keep_first'`: keep only the first instance of each duplicate set (default) - `'keep_last'`: keep only the last instance of each duplicate set - `'remove'`: delete any rows with duplicate timestamps - `'offset'`: add a small time offset to each date - does so iteratively until the index is not duplicated. - `'interpolate'`: offset duplicates such that they are spaced evenly between their value and the next timestamp in the series Note that `'interpolate'` and `'offset'` should preserve the length of the FEDFrame, while other options can reduce it. offset : str, optional Pandas time offset string, only used when `method='offset'`. The default is `'1S'`. reset_counts : bool, optional Reset columns cumulative columns that may be altered as a result of removing rows. The default is False. Note that this alters the number of pokes/pellets! reset_columns: bool, optional Column names to reset when `reset_counts` is True. Raises ------ ValueError Trying to use `'interpolate'` when the last timestamp is duplicated. Returns ------- None. Data is modified in place. ''' if method not in ['keep_first', 'keep_last', 'remove', 'offset', 'interpolate']: raise ValueError(f'`method` must be one of {method}, not "{method}"') if method == 'keep_first': mask = ~ self.index.duplicated(keep='first') self.query('@mask', inplace=True) elif method == 'keep_last': mask = ~ self.index.duplicated(keep='last') self.query('@mask', inplace=True) elif method == 'remove': mask = ~ self.index.duplicated(keep=False) self.query('@mask', inplace=True) elif method == 'offset': dt = pd.to_timedelta(offset) while self.check_duplicated_index(): self.index = np.where(self.index.duplicated(), self.index + dt, self.index) elif method == 'interpolate': if self.index.duplicated()[-1]: raise ValueError("Cannot interpolate when the last " "timestamp is duplicated; try a different " "deduplication method.") t0 = self.index[0] s = pd.Series(self.index) s[s.duplicated()] = None self.index = t0 + pd.to_timedelta((s - t0).dt.total_seconds().interpolate(), unit='seconds') # column resetting if reset_counts: for column in reset_columns: self.reset_cumulative_column(column)
def determine_mode(self)
-
Return the recording mode of the current FED data. This function tries to take this literally from the data headers. There are likely to be problems for custom programs or particular FED software versions.
Returns
mode
:str
- String indicating the mode.
Expand source code
def determine_mode(self): ''' Return the recording mode of the current FED data. This function tries to take this literally from the data headers. There are likely to be problems for custom programs or particular FED software versions. Returns ------- mode : str String indicating the mode. ''' mode = 'Unknown' column = pd.Series(dtype=object) for col in ['FR','FR_Ratio',' FR_Ratio','Mode','Session_Type']: if col in self.columns: column = self[col] if not column.empty: if all(isinstance(i,int) for i in column): if len(set(column)) == 1: mode = 'FR' + str(column[0]) else: mode = 'PR' elif 'PR' in column[0]: mode = 'PR' else: mode = str(column[0]) return mode
def event_type(self, timestamp)
-
Return the type of a given timestamp within the data (pellet or poke).
TODO Currently, this only reads the Event column, and throws an error if not present. In the future, this may implement more logic to determine the type of an event.
Parameters
timestamp
:str,
pandas.Timestamp``- timestamp to query.
Raises
Exception
- Fails when the Event column isn't present.
Returns
str
- Event type for queried timestamp.
Expand source code
def event_type(self, timestamp): ''' Return the type of a given timestamp within the data (pellet or poke). TODO Currently, this only reads the Event column, and throws an error if not present. In the future, this may implement more logic to determine the type of an event. Parameters ---------- timestamp : str, `pandas.Timestamp` timestamp to query. Raises ------ Exception Fails when the Event column isn't present. Returns ------- str Event type for queried timestamp. ''' if 'Event' in self.columns: return self.loc[timestamp, 'Event'] else: raise Exception('Missing "Event" column.')
def interpellet_intervals(self, check_concat=True, condense=False)
-
Calculate the interpellet intervals for each pellet event. This is the time (in minutes) since the last pellet was retrieved.
Note that there is a shortcut for this method:
ipi
.Parameters
check_concat
:bool
, optional- Removes IPIs when they are identified as coming directly after data concatenation. The default is True. This will only work when data were concatenated with fed3.
condense
:bool
, optional- Return only rows where there are interpellet intervals. The default is False. When False, the returned Series will have same length as full FEDFrame.
Returns
interpellet
:pandas.Series
- Pandas Series containing the interpellet intervals.
Expand source code
def interpellet_intervals(self, check_concat=True, condense=False): ''' Calculate the interpellet intervals for each pellet event. This is the time (in minutes) since the last pellet was retrieved. Note that there is a shortcut for this method: `ipi`. Parameters ---------- check_concat : bool, optional Removes IPIs when they are identified as coming directly after data concatenation. The default is True. This will only work when data were concatenated with fed3. condense : bool, optional Return only rows where there are interpellet intervals. The default is False. When False, the returned Series will have same length as full FEDFrame. Returns ------- interpellet : pandas.Series Pandas Series containing the interpellet intervals. ''' bp = self._binary_pellets() bp = bp[bp == 1] diff = bp.index.to_series().diff().dt.total_seconds() / 60 interpellet = pd.Series(np.nan, index = self.index) interpellet.loc[diff.index] = diff if check_concat and 'Concat_#' in self.columns: #this can't do duplicate indexes if not any(self.index.duplicated()): #thanks to this answer https://stackoverflow.com/a/47115490/13386979 dropped = interpellet.dropna() pos = dropped.index.to_series().groupby(self['Concat_#']).first() interpellet.loc[pos[1:]] = np.nan if condense: interpellet = interpellet.loc[bp.index] interpellet = _filterout(interpellet, dropna=True) return interpellet
def ipi(self, check_concat=True, condense=False)
-
Calculate the interpellet intervals for each pellet event. This is the time (in minutes) since the last pellet was retrieved.
Note that there is a shortcut for this method:
ipi
.Parameters
check_concat
:bool
, optional- Removes IPIs when they are identified as coming directly after data concatenation. The default is True. This will only work when data were concatenated with fed3.
condense
:bool
, optional- Return only rows where there are interpellet intervals. The default is False. When False, the returned Series will have same length as full FEDFrame.
Returns
interpellet
:pandas.Series
- Pandas Series containing the interpellet intervals.
Expand source code
def interpellet_intervals(self, check_concat=True, condense=False): ''' Calculate the interpellet intervals for each pellet event. This is the time (in minutes) since the last pellet was retrieved. Note that there is a shortcut for this method: `ipi`. Parameters ---------- check_concat : bool, optional Removes IPIs when they are identified as coming directly after data concatenation. The default is True. This will only work when data were concatenated with fed3. condense : bool, optional Return only rows where there are interpellet intervals. The default is False. When False, the returned Series will have same length as full FEDFrame. Returns ------- interpellet : pandas.Series Pandas Series containing the interpellet intervals. ''' bp = self._binary_pellets() bp = bp[bp == 1] diff = bp.index.to_series().diff().dt.total_seconds() / 60 interpellet = pd.Series(np.nan, index = self.index) interpellet.loc[diff.index] = diff if check_concat and 'Concat_#' in self.columns: #this can't do duplicate indexes if not any(self.index.duplicated()): #thanks to this answer https://stackoverflow.com/a/47115490/13386979 dropped = interpellet.dropna() pos = dropped.index.to_series().groupby(self['Concat_#']).first() interpellet.loc[pos[1:]] = np.nan if condense: interpellet = interpellet.loc[bp.index] interpellet = _filterout(interpellet, dropna=True) return interpellet
def meals(self, pellet_minimum=1, intermeal_interval=1, condense=False)
-
Assign a meal number to each pellet retrieval. Returns a series with those assignments.
Parameters to this function determine what constitutes a meal. Assignments are based on interpellet intervals (see
FEDFrame.interpellet_intervals()
).Parameters
pellet_minimum
:int
, optional- Number of pellets required in one meal. The default is 1. For high numbers, some pellets can be unassigned to any meal.
intermeal_interval
:int
, optional- Maximum length of time (in minutes) that can pass between any two consecutive pellets assigned to the same meal. The default is 1.
condense
:False
, optional- Return only rows where there are meals (i.e. only pellet index). The default is False. When False, the returned Series will have same length as full FEDFrame.
Returns
meals
:pandas.Series
- pandas Series with labeled meals
Expand source code
def meals(self, pellet_minimum=1, intermeal_interval=1, condense=False): ''' Assign a meal number to each pellet retrieval. Returns a series with those assignments. Parameters to this function determine what constitutes a meal. Assignments are based on interpellet intervals (see `FEDFrame.interpellet_intervals()`). Parameters ---------- pellet_minimum : int, optional Number of pellets required in one meal. The default is 1. For high numbers, some pellets can be unassigned to any meal. intermeal_interval : int, optional Maximum length of time (in minutes) that can pass between any two consecutive pellets assigned to the same meal. The default is 1. condense : False, optional Return only rows where there are meals (i.e. only pellet index). The default is False. When False, the returned Series will have same length as full FEDFrame. Returns ------- meals : pandas.Series pandas Series with labeled meals ''' ipi = self.interpellet_intervals(condense=True) within_interval = ipi < intermeal_interval meals = ((~within_interval).cumsum() + 1) above_min = meals.value_counts().sort_index() >= pellet_minimum replacements = above_min[above_min].cumsum().reindex(above_min.index) meals = meals.map(replacements) if not condense: meals = meals.reindex(self.index) return meals
def pellets(self, cumulative=True, condense=False)
-
Provide a series containing pellet retrieval information.
Parameters
cumulative
:bool
, optional- When True (default), the values returned are a cumulative pellet count. When False, the values are binary.
condense
:bool
, optional- Return only rows corresponding to pellets. The default is False. When False, the returned Series will have same length as full FEDFrame.
Returns
y
:pandas Series
- pandas Series containing pellet retrieval counts/indicators.
Expand source code
def pellets(self, cumulative=True, condense=False): ''' Provide a series containing pellet retrieval information. Parameters ---------- cumulative : bool, optional When True (default), the values returned are a cumulative pellet count. When False, the values are binary. condense : bool, optional Return only rows corresponding to pellets. The default is False. When False, the returned Series will have same length as full FEDFrame. Returns ------- y : pandas Series pandas Series containing pellet retrieval counts/indicators. ''' if cumulative: y = self['Pellet_Count'] if condense: y = _filterout(y, deduplicate=True, dropzero=True) else: y = self._binary_pellets() if condense: y = _filterout(y, dropzero=True) return y
def pokes(self, kind='any', cumulative=True, condense=False)
-
Get an array of poke events.
Parameters
kind
:str
, optional- Key for determining the poke type returned. The default is 'any' (any poke event). Other options are 'left', 'right', 'correct', and 'error'.
cumulative
:bool
, optional- When True (default), the values returned are a cumulative poke count. When False, the values are binary.
condense
:bool
, optional- Return only rows corresponding to poke events. The default is False. When False, the returned Series will have same length as full FEDFrame.
Raises
ValueError
- Unaccetped key passed to
kind
.
Returns
y
:pandas Series
- Pandas Series containing poke counts/indicators.
Expand source code
def pokes(self, kind='any', cumulative=True, condense=False): ''' Get an array of poke events. Parameters ---------- kind : str, optional Key for determining the poke type returned. The default is 'any' (any poke event). Other options are 'left', 'right', 'correct', and 'error'. cumulative : bool, optional When True (default), the values returned are a cumulative poke count. When False, the values are binary. condense : bool, optional Return only rows corresponding to poke events. The default is False. When False, the returned Series will have same length as full FEDFrame. Raises ------ ValueError Unaccetped key passed to `kind`. Returns ------- y : pandas Series Pandas Series containing poke counts/indicators. ''' kind = kind.lower() kinds = ['left', 'right', 'any', 'correct', 'error'] if kind not in kinds: raise ValueError(f'`kind` must be one of {kinds}, not {kind}') if cumulative: y = self._cumulative_pokes(kind) if condense: y = _filterout(y, deduplicate=True, dropzero=True) else: y = self._binary_pokes(kind) if condense: y = _filterout(y, dropzero=True) return y
def reassign_events(self, include_side=True)
-
Run an intitial assignment or reassignment of the "Event" column.
Parameters
include_side
:bool
, optional- Label poke events with "Left" and "Right" instead of "Poke". The default is True.
Returns
None.
Expand source code
def reassign_events(self, include_side=True): ''' Run an intitial assignment or reassignment of the "Event" column. Parameters ---------- include_side : bool, optional Label poke events with "Left" and "Right" instead of "Poke". The default is True. Returns ------- None. ''' if include_side: events = pd.Series(np.nan, index=self.index) events.loc[self._binary_pellets().astype(bool)] = 'Pellet' events.loc[self._binary_pokes('left').astype(bool)] = 'Left' events.loc[self._binary_pokes('right').astype(bool)] = 'Right' else: events = np.where(self._binary_pellets(), 'Pellet', 'Poke') self['Event'] = events
def reset_cumulative_column(self, column)
-
Reset a cumulative column (usually Left_Poke_Count, Right_Poke_Count, or Pellet_Count) to be ascending integers. This may be useful when other operations cause rows to be removed.
Parameters
column
:str
- String column name
Returns
None.
Expand source code
def reset_cumulative_column(self, column): ''' Reset a cumulative column (usually Left_Poke_Count, Right_Poke_Count, or Pellet_Count) to be ascending integers. This may be useful when other operations cause rows to be removed. Parameters ---------- column : str String column name Returns ------- None. ''' reset, _ = pd.factorize(self[column]) reset += self[column].iloc[0] self[column] = reset
def set_alignment(self, alignment, inplace=True)
-
Shift the timestamps of a FEDFrame to allow for comparisons with other data recorded at different times.
This is particularly intended for plotting with
fed3.plot
. By default, fed3 will plot fed3 data over the timestamps they were recorded. For temporal plots (with time on the x-axis), this disallows combination (e.g. averaging) of data recorded on different dates. To combine these sorts of data, this function will shift the timestamps FEDFrames to a common time.There are three options for temporal alignment, 'datetime', 'time', and 'elapsed'. Note that these are the equivalents of 'shared date & time', 'shared time', and 'elapsed time' from FED3_Viz.
- 'datetime': Use the original recorded timestamps for plotting. This is the default behavior for plotting. This is generally useful when all your data were collected at the same time, when you want to show exactly when data were recorded, or when working with plots where the time of recording does not matter.
- 'time': Shift the timestamps so that they have the same start date, but preserved time of day information. This is useful for when you want to compare or average data recorded on different dates, but want to preserve circadian patterns.
- 'elapsed': Shift the timestamps such that the first recorded timestamp is equal to a single, shared date. This is useful for comparing data relative to the initiation of the recording, and you do not need to preserve circadian information.
Note that for 'elapsed' and 'time' alignment, the common date is set by the
ZERO_DATE
variable in this module.Parameters
alignment
:str, 'datetime', 'time',
or'elapsed'
- Option for temporal alignment. See above for more information.
inplace
:bool
, optional- When True, the current FEDFrame is modified. Else, a copy is returned with the new alignment.
Raises
ValueError
- Option for alignment not recognized.
Returns
newfed
:FEDFrame
- FED3 data with new alignment.
Expand source code
def set_alignment(self, alignment, inplace=True): ''' Shift the timestamps of a FEDFrame to allow for comparisons with other data recorded at different times. This is particularly intended for plotting with `fed3.plot`. By default, fed3 will plot fed3 data over the timestamps they were recorded. For temporal plots (with time on the x-axis), this disallows combination (e.g. averaging) of data recorded on different dates. To combine these sorts of data, this function will shift the timestamps FEDFrames to a common time. There are three options for temporal alignment, 'datetime', 'time', and 'elapsed'. Note that these are the equivalents of 'shared date & time', 'shared time', and 'elapsed time' from FED3_Viz. - 'datetime': Use the original recorded timestamps for plotting. This is the default behavior for plotting. This is generally useful when all your data were collected at the same time, when you want to show exactly when data were recorded, or when working with plots where the time of recording does not matter. - 'time': Shift the timestamps so that they have the same start date, but preserved time of day information. This is useful for when you want to compare or average data recorded on different dates, but want to preserve circadian patterns. - 'elapsed': Shift the timestamps such that the first recorded timestamp is equal to a single, shared date. This is useful for comparing data relative to the initiation of the recording, and you do not need to preserve circadian information. Note that for 'elapsed' and 'time' alignment, the common date is set by the `ZERO_DATE` variable in this module. Parameters ---------- alignment : str, 'datetime', 'time', or 'elapsed' Option for temporal alignment. See above for more information. inplace : bool, optional When True, the current FEDFrame is modified. Else, a copy is returned with the new alignment. Raises ------ ValueError Option for alignment not recognized. Returns ------- newfed : fed3.FEDFrame FED3 data with new alignment. ''' options = ['datetime', 'time', 'elapsed'] if alignment not in options: raise ValueError(f'`alignment` must be one of {options}, ' f'not "{alignment}"') if alignment == 'datetime': new_diff = self._current_offset elif alignment == 'time': new_diff = self.index[0].date() - ZERO_DATE.date() elif alignment == 'elapsed': new_diff = self.index[0] - ZERO_DATE newfed = self if inplace else self.copy() newfed.index -= new_diff newfed._current_offset -= new_diff newfed._alignment = alignment return newfed