"""FeatureUnion for running multiple featurizers and concatenating results."""
import logging
from typing import TYPE_CHECKING, Any, List, Optional, Union
import pandas as pd
from timesmith.core.base import BaseFeaturizer
if TYPE_CHECKING:
from timesmith.typing import SeriesLike, TableLike
logger = logging.getLogger(__name__)
[docs]
class FeatureUnion(BaseFeaturizer):
"""Runs multiple featurizers then concatenates their table outputs.
Attributes:
featurizers: List of (name, featurizer) tuples.
"""
[docs]
def __init__(self, featurizers: List[tuple]):
"""Initialize feature union.
Args:
featurizers: List of (name, featurizer) tuples.
"""
self.featurizers = featurizers
self._validate_featurizers()
def _validate_featurizers(self) -> None:
"""Validate that all featurizers are BaseFeaturizer instances."""
for name, featurizer in self.featurizers:
if not isinstance(featurizer, BaseFeaturizer):
raise TypeError(
f"Featurizer '{name}' must be a BaseFeaturizer, "
f"got {type(featurizer).__name__}"
)
[docs]
def fit(
self,
y: Union["SeriesLike", Any],
X: Optional[Union["TableLike", Any]] = None,
**fit_params: Any,
) -> "FeatureUnion":
"""Fit all featurizers.
Args:
y: Target data.
X: Optional exogenous/feature data.
**fit_params: Additional fit parameters.
Returns:
Self for method chaining.
"""
for name, featurizer in self.featurizers:
logger.debug(f"Fitting featurizer: {name}")
featurizer.fit(y, X, **fit_params)
self._is_fitted = True
return self
[docs]
def get_params(self, deep: bool = True) -> dict:
"""Get parameters for all featurizers.
Args:
deep: If True, will return parameters of contained subobjects.
Returns:
Dictionary of parameters.
"""
params = {}
for name, featurizer in self.featurizers:
featurizer_params = featurizer.get_params(deep=deep)
for key, value in featurizer_params.items():
params[f"{name}__{key}"] = value
return params
[docs]
def set_params(self, **params: Any) -> "FeatureUnion":
"""Set parameters for featurizers.
Args:
**params: Parameters in format 'featurizer_name__param_name': value.
Returns:
Self for method chaining.
"""
featurizer_params = {}
for key, value in params.items():
if "__" in key:
featurizer_name, param_name = key.split("__", 1)
if featurizer_name not in featurizer_params:
featurizer_params[featurizer_name] = {}
featurizer_params[featurizer_name][param_name] = value
else:
setattr(self, key, value)
for name, featurizer in self.featurizers:
if name in featurizer_params:
featurizer.set_params(**featurizer_params[name])
return self