Source code for neurokin.utils.features_extraction.feature_extraction

from importlib import import_module
from typing import List
import pandas as pd

from neurokin.constants.features_extraction import FEATURES_EXTRACTION_MODULE


[docs] def get_extractor_obj(feature_name): module_, feature_class = feature_name.rsplit(".", maxsplit=1) module_ = FEATURES_EXTRACTION_MODULE + module_ m = import_module(module_) feature_extract_class = getattr(m, feature_class) return feature_extract_class()
[docs] def extract_features(features, bodyparts, skeleton, markers_df, get_binned, bin_params, custom_feats): extracted_features = [] binned_features = [] binned_df = None for feature_name, params in features.items(): params = params if params else {} params["custom_features"] = custom_feats extractor_obj = get_extractor_obj(feature_name) extraction_target = extractor_obj.extraction_target if extraction_target == "markers": target_bodyparts = bodyparts if len(params["marker_ids"])==0 else params["marker_ids"] elif extraction_target == "joints": target_joints = skeleton[extraction_target].keys() if len(params["joint_ids"])==0 else params["joint_ids"] target_bodyparts = {joint: skeleton[extraction_target][joint] for joint in target_joints} elif extraction_target == "misc": target_bodyparts = params.get("misc_ids", bodyparts) else: raise ValueError(f"{extraction_target} is not a valid extraction target." f"Please use: markers, joints or multiple_markers") feature = extractor_obj.run_feat_extraction(df=markers_df, target_bodyparts=target_bodyparts, **params) extracted_features.append(pd.DataFrame(feature).reset_index(drop=True)) if get_binned: try: binning_strategy = params["binning_strategy"] except KeyError: continue binned_features.append(bin_feature(feature, binning_strategies=binning_strategy, window=bin_params["window_size"], overlap=bin_params["overlap"]).reset_index(drop=True)) binned_df = pd.concat(binned_features, axis=1) feats_df = pd.concat(extracted_features, axis=1) return feats_df, binned_df
[docs] def bin_feature(feature, binning_strategies: List[str], window, overlap): step = window - overlap binned_features = [] if step <= 0: raise ValueError(f"The overlap should be lower than the window. Got overlap: {overlap} and window: {window}.") for strategy in binning_strategies: if strategy.lower().strip(" ") == "mean": binned = feature.rolling(window=window, step=step).mean().add_suffix("_mean") elif strategy.lower().strip(" ") == "median": binned = feature.rolling(window=window, step=step).median().add_suffix("_median") elif strategy.lower().strip(" ") == "min": binned = feature.rolling(window=window, step=step).min().add_suffix("_min") elif strategy.lower().strip(" ") == "max": binned = feature.rolling(window=window, step=step).max().add_suffix("_max") elif strategy.lower().strip(" ") == "sum": binned = feature.rolling(window=window, step=step).sum().add_suffix("_sum") elif strategy.lower().strip(" ") == "std": binned = feature.rolling(window=window, step=step).std().add_suffix("_std") else: raise ValueError(f"The chosen binning strategy: {strategy} is not available. Please choose between:" f"mean, median, min, max, sum or std") binned_features.append(binned) binned_features_df = pd.concat(binned_features, axis=1) return binned_features_df