Source code for timesmith.utils.stationarity

"""Stationarity tests for time series."""

import logging
from typing import Any, Dict, Tuple

import numpy as np
import pandas as pd

from timesmith.typing import SeriesLike

logger = logging.getLogger(__name__)

# Optional statsmodels for advanced tests
try:
    from statsmodels.tsa.stattools import adfuller, kpss

    STATSMODELS_AVAILABLE = True
except ImportError:
    STATSMODELS_AVAILABLE = False
    logger.warning(
        "statsmodels not available. Advanced stationarity tests will use basic methods. "
        "Install with: pip install statsmodels"
    )


[docs] def test_stationarity( y: SeriesLike, significance_level: float = 0.05 ) -> Dict[str, Any]: """Test time series for stationarity using ADF and KPSS tests. Args: y: Time series data. significance_level: Significance level for tests. Returns: Dictionary with stationarity test results. """ if isinstance(y, pd.Series): series = y elif isinstance(y, pd.DataFrame) and y.shape[1] == 1: series = y.iloc[:, 0] else: series = pd.Series(y) series_clean = series.dropna() if not STATSMODELS_AVAILABLE: logger.warning("Statsmodels not available. Using basic stationarity test.") return _basic_stationarity_test(series_clean, significance_level) results = {} # Augmented Dickey-Fuller test try: adf_result = adfuller(series_clean, autolag="AIC") results["adf"] = { "statistic": float(adf_result[0]), "p_value": float(adf_result[1]), "critical_values": {k: float(v) for k, v in adf_result[4].items()}, "is_stationary": adf_result[1] < significance_level, } except Exception as e: logger.warning(f"ADF test failed: {e}") results["adf"] = {"error": str(e)} # KPSS test try: kpss_result = kpss(series_clean, regression="c") results["kpss"] = { "statistic": float(kpss_result[0]), "p_value": float(kpss_result[1]), "critical_values": {k: float(v) for k, v in kpss_result[3].items()}, "is_stationary": kpss_result[1] > significance_level, } except Exception as e: logger.warning(f"KPSS test failed: {e}") results["kpss"] = {"error": str(e)} # Overall conclusion adf_stationary = results.get("adf", {}).get("is_stationary", False) kpss_stationary = results.get("kpss", {}).get("is_stationary", False) if adf_stationary and kpss_stationary: results["conclusion"] = "stationary" elif not adf_stationary and not kpss_stationary: results["conclusion"] = "non_stationary" else: results["conclusion"] = "inconclusive" return results
def _basic_stationarity_test( series: pd.Series, significance_level: float ) -> Dict[str, Any]: """Basic stationarity assessment without statsmodels.""" from scipy import stats series_clean = series.dropna() if len(series_clean) < 10: return { "basic_test": { "error": "Insufficient data for stationarity test", }, "conclusion": "insufficient_data", } # Split series into two halves and compare means/variances mid_point = len(series_clean) // 2 first_half = series_clean.iloc[:mid_point] second_half = series_clean.iloc[mid_point:] # T-test for mean difference t_stat, p_value = stats.ttest_ind(first_half, second_half) # F-test for variance difference var_first = first_half.var() var_second = second_half.var() f_stat = var_first / var_second if var_second > 0 else np.inf likely_stationary = p_value > significance_level and 0.5 < f_stat < 2.0 return { "basic_test": { "mean_difference_p_value": float(p_value), "variance_ratio": float(f_stat), "likely_stationary": likely_stationary, }, "conclusion": ( "likely_stationary" if likely_stationary else "likely_non_stationary" ), }
[docs] def is_stationary( y: SeriesLike, test: str = "adf", significance_level: float = 0.05, ) -> Tuple[bool, float]: """Simple stationarity test that returns boolean and p-value. Args: y: Time series data. test: Test method ('adf' for Augmented Dickey-Fuller, 'kpss' for KPSS). significance_level: Significance level for the test. Returns: Tuple of (is_stationary, p_value). Example: >>> is_stationary, p_value = ts.is_stationary(series, test='adf') """ if isinstance(y, pd.Series): series = y elif isinstance(y, pd.DataFrame) and y.shape[1] == 1: series = y.iloc[:, 0] else: series = pd.Series(y) series_clean = series.dropna() if not STATSMODELS_AVAILABLE: logger.warning( "statsmodels not available. Using basic stationarity test. " "Results may be less reliable." ) result = _basic_stationarity_test(series_clean, significance_level) basic_result = result.get("basic_test", {}) is_stat = basic_result.get("likely_stationary", False) p_val = basic_result.get("mean_difference_p_value", 1.0) return is_stat, p_val if test.lower() == "adf": try: from statsmodels.tsa.stattools import adfuller adf_result = adfuller(series_clean, autolag="AIC") p_value = float(adf_result[1]) is_stationary_result = p_value < significance_level return is_stationary_result, p_value except Exception as e: logger.warning(f"ADF test failed: {e}") raise ValueError(f"ADF test failed: {e}") from e elif test.lower() == "kpss": try: from statsmodels.tsa.stattools import kpss kpss_result = kpss(series_clean, regression="c") p_value = float(kpss_result[1]) # KPSS: H0 is stationarity, so p > alpha means stationary is_stationary_result = p_value > significance_level return is_stationary_result, p_value except Exception as e: logger.warning(f"KPSS test failed: {e}") raise ValueError(f"KPSS test failed: {e}") from e else: raise ValueError(f"Unknown test '{test}'. Use 'adf' or 'kpss'.")