Source code for mcframework.stats_engine

r"""
Statistical metrics and orchestration engine for Monte Carlo simulations.

This module provides:

Configuration
    :class:`StatsContext` — Shared configuration for all metric computations
    (confidence level, NaN policy, bootstrap settings, etc.)

Descriptive Metrics
    :func:`mean`, :func:`std`, :func:`percentiles`, :func:`skew`, :func:`kurtosis`

Confidence Intervals
    - :func:`ci_mean` — Parametric CI using z/t critical values
    - :func:`ci_mean_bootstrap` — Bootstrap CI (percentile or BCa)
    - :func:`ci_mean_chebyshev` — Distribution-free Chebyshev bound

Target-Based Metrics
    :func:`bias_to_target`, :func:`mse_to_target`, :func:`markov_error_prob`,
    :func:`chebyshev_required_n`

Engine
    :class:`StatsEngine` — Orchestrator that evaluates multiple metrics
    :class:`FnMetric` — Adapter to bind a name to a metric function
    :obj:`DEFAULT_ENGINE` — Pre-configured engine with all standard metrics

Example
-------
>>> from mcframework.stats_engine import DEFAULT_ENGINE, StatsContext
>>> import numpy as np
>>> data = np.random.normal(0, 1, 10000)
>>> ctx = StatsContext(n=len(data), confidence=0.95)
>>> result = DEFAULT_ENGINE.compute(data, ctx)
>>> result.metrics['mean']  # doctest: +SKIP
0.0012

See Also
--------
mcframework.utils.autocrit
    Selects z or t critical value based on sample size.
mcframework.core.MonteCarloSimulation
    Simulation base class that uses this engine.
"""


# DEV NOTE:
# ===========================================================================
# The type checker throws a fit since x is a numpy ndarray and the checker can't verify
# that numpy/scipy functions accept that. So, we're suppressing the error
# with type: ignore[arg-type] where needed.
# ===========================================================================

# pylint: disable=invalid-name
# Enum values (propagate, omit, auto, z, t, bootstrap, percentile, bca) are lowercase by design

from __future__ import annotations

import logging
from dataclasses import dataclass, field, replace
from enum import Enum
from typing import (
    Any,
    Callable,
    Generic,
    Iterable,
    Mapping,
    Protocol,
    Sequence,
    SupportsFloat,
    TypeAlias,
    TypeVar,
)

import numpy as np
from scipy.special import erfinv  # type: ignore[import-untyped]
from scipy.stats import kurtosis as sp_kurtosis  # type: ignore[import-untyped]
from scipy.stats import norm  # type: ignore[import-untyped]
from scipy.stats import skew as sp_skew  # type: ignore[import-untyped]

from .utils import autocrit

# Create local logger to avoid circular import with core
logger = logging.getLogger(__name__)
if not logger.handlers:
    handler = logging.StreamHandler()
    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)


_PCTS = (5, 25, 50, 75, 95)  # default percentiles


# TODO: make these configurable
# Numerical stability constants
_SMALL_PROB_BOUND = 1e-12  # Minimum probability for BCa bootstrap to avoid log(0)
_SMALL_VARIANCE_BOUND = 1e-30  # Minimum variance denominator to prevent division by zero
_BCA_JACKKNIFE_DENOMINATOR = 6.0  # Constant in BCa acceleration calculation


# Custom exceptions for better error handling

[docs]
class MissingContextError(ValueError):
    """Raised when a required context field is missing."""




[docs]
class InsufficientDataError(ValueError):
    """Raised when insufficient data is available for computation."""




[docs]
class NanPolicy(str, Enum):
    r"""
    Strategies for handling the propagation of non-finite values.

    Attributes
    ----------
    propagate : str
        Propagate any NaNs or infinities encountered in the sample.
    omit : str
        Drop non-finite observations before computing a metric.
    """

    propagate = "propagate"
    omit = "omit"




[docs]
class CIMethod(str, Enum):
    r"""
    Parametric strategies for selecting confidence-interval critical values.

    Attributes
    ----------
    auto : str
        Choose Student-t when :math:`n_\text{eff} < 30`, otherwise z.
    z : str
        Always use the normal :math:`z` critical value.
    t : str
        Always use the Student-:math:`t` critical value.
    bootstrap : str
        Defer to resampling-based bootstrap intervals.
    """

    auto = "auto"
    z = "z"
    t = "t"
    bootstrap = "bootstrap"




[docs]
class BootstrapMethod(str, Enum):
    r"""
    Supported bootstrap confidence-interval flavors.

    Attributes
    ----------
    percentile : str
        Use the percentile method on the bootstrap distribution.
    bca : str
        Use the bias-corrected and accelerated (BCa) adjustment.
    """

    percentile = "percentile"
    bca = "bca"






[docs]
@dataclass(slots=True)
class StatsContext:
    r"""
    Shared, explicit configuration for statistic and CI computations.

    The context keeps track of three recurring quantities:

    * Confidence level :math:`\gamma = \texttt{confidence}` with tail mass
      :math:`\alpha = 1 - \gamma`.
    * Effective sample size :math:`n_\text{eff}`, obtained from
      :meth:`eff_n` and used by every finite-sample adjustment.
    * Requested quantiles :math:`\mathcal{P} = \{p_i\}` that drive percentile
      metrics.

    Throughout the module we repeatedly use the identities

    .. math::

       \alpha = 1 - \gamma, \qquad
       q_\text{low} = 100 \frac{\alpha}{2}, \qquad
       q_\text{high} = 100 \left(1 - \frac{\alpha}{2}\right),

    which are provided via the :meth:`alpha` and :meth:`q_bound` helpers.

    Attributes
    ----------
    n : int
        Declared sample size (fallback when NaNs are not omitted).
    confidence : float, default 0.95
        Confidence level in :math:`(0, 1)`.
    ci_method : {"auto", "z", "t", "bootstrap"}, default "auto"
        Strategy for :func:`ci_mean`.
        If ``"auto"``, use Student-t when :math:`n_\text{eff} < 30` else normal z.
    percentiles : tuple of int, default ``(5, 25, 50, 75, 95)``
        Percentiles to compute in :func:`percentiles`.
    nan_policy : {"propagate", "omit"}, default "propagate"
        If ``"omit"``, drop non-finite values before all computations.
    target : float, optional
        Optional target value (e.g., true mean) for bias/MSE/Markov metrics.
    eps : float, optional
        Tolerance used by Chebyshev sizing and Markov bounds, when required.
    ddof : int, default 1
        Degrees of freedom for :func:`numpy.std` (1 => Bessel correction).
    ess : int, optional
        Effective sample size override (e.g., from MCMC diagnostics).
    rng : int or numpy.random.Generator, optional
        Seed or Generator used by bootstrap methods for reproducibility.
    n_bootstrap : int, default 10000
        Number of bootstrap resamples for :func:`ci_mean_bootstrap`.
    bootstrap : {"percentile", "bca"}, default "percentile"
        Bootstrap flavor for :func:`ci_mean_bootstrap`.
    block_size : int, optional
        Reserved for future block bootstrap support.

    Notes
    -----
    The context is immutable by convention at runtime; prefer :meth:`with_overrides`
    to construct a modified copy with a small set of changed fields.

    Examples
    --------
    >>> ctx = StatsContext(n=5000, confidence=0.95, ci_method=CIMethod.auto, nan_policy=NanPolicy.omit)
    >>> round(ctx.alpha, 2)
    0.05
    """

    n: int
    confidence: float = 0.95
    ci_method: CIMethod = CIMethod.auto
    percentiles: tuple[int, ...] = (5, 25, 50, 75, 95)
    nan_policy: NanPolicy = NanPolicy.propagate
    target: float | None = None
    eps: float | None = None
    ddof: int = 1
    ess: int | None = None
    rng: int | np.random.Generator | None = None
    n_bootstrap: int = 10_000
    bootstrap: BootstrapMethod = BootstrapMethod.percentile

    # ergonomics

[docs]
    def with_overrides(self, **changes) -> "StatsContext":
        r"""
        Return a shallow copy with selected fields replaced.

        Parameters
        ----------
        ``**changes`` :
            Field overrides passed to :func:`dataclasses.replace`.

        Returns
        -------
        StatsContext
            Modified copy.

        Examples
        --------
        >>> ctx = StatsContext(n=1000)
        >>> ctx2 = ctx.with_overrides(confidence=0.9, n_bootstrap=2000)
        """
        return replace(self, **changes)


    @property
    def alpha(self) -> float:
        r"""
        One-sided tail probability :math:`\alpha = 1 - \text{confidence}`.

        Returns
        -------
        float
        """
        return 1.0 - self.confidence


[docs]
    def q_bound(self) -> tuple[float, float]:
        r"""
        Percentile bounds corresponding to the current confidence.

        For :math:`\alpha = 1 - \text{confidence}`, returns
        :math:`(100\alpha/2,\; 100(1-\alpha/2))`.

        Returns
        -------
        tuple of float
            (lower_percentile, upper_percentile)
        """
        alpha = self.alpha
        return 100.0 * (alpha / 2), 100.0 * (1 - alpha / 2)



[docs]
    def eff_n(self, observed_len: int, finite_count: int | None = None) -> int:
        r"""
        Effective sample size :math:`n_\text{eff}` used by CI calculations.

        Priority is:
        1) explicit :attr:`ess`; 2) count of finite values if ``nan_policy="omit"``;
        3) declared :attr:`n` (fallback); else ``observed_len``.

        In symbols,

        .. math::

           n_\text{eff} =
           \begin{cases}
              \texttt{ess}, & \text{if provided},\\[4pt]
              \#\{i : x_i \text{ finite}\}, & \text{if nan policy = ``omit''},\\[4pt]
              \texttt{n}, & \text{otherwise}.
           \end{cases}

        Parameters
        ----------
        observed_len : int
            Raw length of the input array.
        finite_count : int, optional
            Count of finite values (used when ``nan_policy="omit"``).

        Returns
        -------
        int
        """
        if self.ess is not None:
            return int(self.ess)
        if self.nan_policy == "omit" and finite_count is not None:
            return int(finite_count)
        return int(self.n or observed_len)



[docs]
    def get_generators(self) -> np.random.Generator:
        r"""
        Return a NumPy :class:`~numpy.random.Generator` initialized from :attr:`rng`.

        Returns
        -------
        numpy.random.Generator
        """
        if isinstance(self.rng, np.random.Generator):
            return self.rng
        if isinstance(self.rng, (int, np.integer)):
            return np.random.default_rng(int(self.rng))
        return np.random.default_rng()


    def __post_init__(self) -> None:
        r"""
        Validate field ranges and cross-field consistency.

        Raises
        ------
        ValueError
            If any field is outside its allowed range or fields are inconsistent.
        """
        # Individual field validations
        if not 0.0 < self.confidence < 1.0:
            raise ValueError("confidence must be in (0,1)")
        if any(p < 0 or p > 100 for p in self.percentiles):
            raise ValueError("percentiles must be in [0,100]")
        if self.n_bootstrap <= 0:
            raise ValueError("n_bootstrap must be > 0")
        if self.ddof < 0:
            raise ValueError("ddof must be >= 0")
        if self.eps is not None and self.eps <= 0:
            raise ValueError("eps must be positive")

        # Cross-field validations
        if self.ess is not None:
            if self.ess > self.n:
                raise ValueError(f"ess ({self.ess}) cannot exceed n ({self.n})")
            if self.ess <= 0:
                raise ValueError(f"ess must be positive, got {self.ess}")

        # Warn about small bootstrap samples (but don't fail)
        if self.n_bootstrap < 100:
            raise ValueError(f"n_bootstrap ({self.n_bootstrap}) should be >= 100 for reliable estimates")



@dataclass(frozen=True)
class _CIResult:
    r"""
    Internal helper that stores a confidence interval before converting to ``dict``.

    Attributes
    ----------
    confidence : float
        Confidence level :math:`\gamma`.
    method : str
        Human-readable label (e.g., ``"z"`` or ``"bootstrap-bca"``).
    low, high : float
        Interval endpoints :math:`(\ell, u)`.
    extras : Mapping[str, SupportsFloat]
        Optional diagnostics such as ``se`` or ``crit``.
    """
    confidence: float
    method: str
    low: float
    high: float
    extras: Mapping[str, SupportsFloat] = field(default_factory=dict)

    def as_dict(self) -> dict[str, float | str]:
        result: dict[str, float | str] = {
            "confidence": float(self.confidence),
            "method": self.method.value if hasattr(self.method, 'value') else str(self.method),
            "low": float(self.low),
            "high": float(self.high),
        }
        if self.extras:
            result.update({key: float(value) for key, value in self.extras.items()})
        return result



[docs]
@dataclass(frozen=True)
class ComputeResult:
    r"""
    Result from :meth:`StatsEngine.compute` with tracking of computation failures.

    Attributes
    ----------
    metrics : dict[str, Any]
        Successfully computed metric values, keyed by metric name.
    skipped : list[tuple[str, str]]
        List of (metric_name, reason) pairs for metrics that were skipped.
    errors : list[tuple[str, str]]
        List of (metric_name, error_message) pairs for metrics that raised errors.

    Examples
    --------
    >>> engine = StatsEngine([FnMetric("mean", mean)])
    >>> data = np.array([1.0, 2.0, 3.0])
    >>> ctx = StatsContext(n=3)
    >>> result = engine.compute(data, ctx)
    >>> result.metrics['mean']
    2.0
    >>> result.skipped
    []
    """

    metrics: dict[str, Any]
    skipped: list[tuple[str, str]] = field(default_factory=list)
    errors: list[tuple[str, str]] = field(default_factory=list)

    def __repr__(self) -> str:
        return (
            f"{self.__class__.__name__}(\n"
            f"  metrics={list(self.metrics.keys())},\n"
            f"  skipped={[name for name, _ in self.skipped]},\n"
            f"  errors={[name for name, _ in self.errors]}\n"
            ")"
    )


[docs]
    def successful_metrics(self) -> set[str]:
        r"""
        Return names of successfully computed metrics.

        Returns
        -------
        set[str]
            Set of metric names present in :attr:`metrics`.
        """
        return set(self.metrics.keys())




MetricSet: TypeAlias = Iterable["Metric"]


class Metric(Protocol):
    r"""
    Protocol for metric callables used by :class:`StatsEngine`.

    A metric exposes a ``name`` attribute and is callable as:

    ``metric(x: numpy.ndarray, ctx: StatsContext) -> Any``

    Attributes
    ----------
    name : str
        Human-readable key under which the metric's value is returned.
    """

    @property
    def name(self) -> str: ...

    def __call__(self, x: np.ndarray, ctx: StatsContext, /) -> Any: ...


_MetricT = TypeVar("_MetricT")  # Type parameter for FnMetric



[docs]
@dataclass(frozen=True)
class FnMetric(Generic[_MetricT]):
    r"""
    Lightweight adapter that binds a human-readable ``name`` to a metric function.

    Parameters
    ----------
    name : str
        Key under which the metric result is stored in :meth:`StatsEngine.compute`.
    fn : callable
        Function with signature ``fn(x: ndarray, ctx: StatsContext) -> T``.
    doc : str, optional
        Short description displayed by UIs or docs.

    Examples
    --------
    >>> import numpy as np
    >>> m = FnMetric("mean", lambda a, ctx: float(np.mean(a)))
    >>> m(np.array([1, 2, 3]), StatsContext(n=3))
    2.0
    """

    name: str
    fn: Callable[[np.ndarray, StatsContext], _MetricT]
    doc: str = ""

    def __call__(self, x: np.ndarray, ctx: StatsContext) -> _MetricT:
        r"""
        Compute the metric.

        Parameters
        ----------
        x : ndarray
            Input sample.
        ctx : StatsContext
            Context parameters (see individual metric docs).

        Returns
        -------
        Any
            Metric value.

        Examples
        --------
        >>> m = FnMetric("mean", lambda a, _: float(np.mean(a)))
        >>> m(np.array([1, 2, 3]), {})
        2.0
        """
        return self.fn(x, ctx)




[docs]
class StatsEngine:
    r"""
    Orchestrator that evaluates a set of metrics over an input array.

    Given a collection of metric callables :math:`\{\phi_j\}_{j=1}^m` and an
    array :math:`x \in \mathbb{R}^n`, the engine returns the dictionary

    .. math::
       \{\phi_j(x, \texttt{ctx}) : j = 1,\dots,m\},

    while recording any skipped/failed evaluations for downstream inspection.

    Parameters
    ----------
    metrics : iterable of Metric
        Callables with a ``name`` and signature ``metric(x, ctx)``.

    Notes
    -----
    All metrics receive the *same* :class:`~mcframework.stats_engine.StatsContext`. Prefer field names that
    read well across multiple metrics and avoid collisions.

    Examples
    --------
    >>> eng = StatsEngine([FnMetric("mean", mean), FnMetric("std", std)])
    >>> x = np.array([1., 2., 3.])
    >>> result = eng.compute(x, StatsContext(n=len(x)))
    >>> result.metrics['mean']
    2.0
    >>> result.metrics['std']
    1.0
    """


[docs]
    def __init__(self, metrics: MetricSet):
        self._metrics = list(metrics)


    def __repr__(self) -> str:
        metric_names = ", ".join(m.name for m in self._metrics)
        return f"{self.__class__.__name__}(metrics=[{metric_names}])"


[docs]
    def compute(
        self,
        x: np.ndarray,
        ctx: StatsContext | None = None,
        select: Sequence[str] | None = None,
        **kwargs: Any,
    ) -> ComputeResult:
        r"""
        Evaluate all registered metrics on ``x``.

        Parameters
        ----------
        x : ndarray
            Sample values.
        ctx : StatsContext, optional
            Context parameters. If None, one is built from ``**kwargs``.
        select : sequence of str, optional
            If given, compute only the metrics with these names.
        **kwargs : Any
            Used to build a StatsContext if ctx is None.
            Required: 'n' (int).
            Optional: 'confidence', 'ci_method', 'percentiles', etc.

        Returns
        -------
        ComputeResult
            Result object containing:

            - ``metrics``: Successfully computed metric values.
            - ``skipped``: List of (metric_name, reason) for skipped metrics.
            - ``errors``: List of (metric_name, error_message) for failed metrics.
        """
        if ctx is not None:
            ctx = _ensure_ctx(ctx, x)

        # build context from kwargs
        else:
            base = dict(kwargs)
            base.setdefault("n", int(np.asarray(x).size))
            ctx = StatsContext(**base)

        metrics_to_compute = (
            self._metrics if select is None else [m for m in self._metrics if m.name in set(select)]
        )

        out: dict[str, Any] = {}
        skipped: list[tuple[str, str]] = []
        errors: list[tuple[str, str]] = []

        for m in metrics_to_compute:
            try:
                result = m(x, ctx)

                # Handle None return (insufficient data)
                if result is None:
                    skipped.append((m.name, "insufficient data"))
                    logger.debug("Metric %s returned None (insufficient data), skipping", m.name)
                    continue

                out[m.name] = result

            except MissingContextError as e:
                # Skip metrics that require context fields not provided
                reason = str(e)
                skipped.append((m.name, reason))
                logger.debug("Skipping metric %s: %s", m.name, reason)
                continue
            except ValueError as e:
                # Also handle general ValueError for backward compatibility
                msg = str(e)
                if "Missing required context keys" in msg:
                    skipped.append((m.name, msg))
                    logger.debug("Skipping metric %s: %s", m.name, msg)
                    continue
                raise
            except Exception as e:  # pylint: disable=broad-exception-caught
                # Log and track unexpected errors (intentionally broad to not crash engine)
                error_msg = str(e)
                errors.append((m.name, error_msg))
                logger.exception("Error computing metric %s", m.name)
                continue

        return ComputeResult(metrics=out, skipped=skipped, errors=errors)




def _ensure_ctx(ctx: Any, x: np.ndarray) -> StatsContext:
    r"""
    Normalize arbitrary context inputs into a :class:`~mcframework.stats_engine.StatsContext`.

    Parameters
    ----------
    ctx : Any
        A :class:`~mcframework.stats_engine.StatsContext`, mapping, object with attributes, or ``None``.
    x : ndarray
        Sample used to infer the fallback ``n`` when missing.

    Returns
    -------
    StatsContext
        Context instance with all required fields populated.

    Raises
    ------
    TypeError
        If ``ctx`` cannot be interpreted as configuration data.
    """
    # ctx is a StatsContext
    if isinstance(ctx, StatsContext):
        return ctx

    arr_len = int(np.asarray(x).size)

    # ctx is None
    if ctx is None:
        return StatsContext(n=arr_len)

    # ctx is a dict
    if isinstance(ctx, dict):
        data = dict(ctx)
        data.setdefault("n", arr_len)
        return StatsContext(**data)

    # Fallback: try to read attributes
    try:
        data = dict(vars(ctx))
    except TypeError as exc:
        raise TypeError(
            "ctx must be a StatsContext, dict, None, or an object with attributes"
        ) from exc
    data.setdefault("n", arr_len)
    return StatsContext(**data)


def _clean(x: np.ndarray, ctx: Any) -> tuple[np.ndarray, np.ndarray]:
    r"""
    Sanitize the input sample and return the finite-value mask.

    Parameters
    ----------
    x : ndarray
        Raw input sample.
    ctx : StatsContext
        Context whose :attr:`~mcframework.stats_engine.StatsContext.nan_policy` guides filtering.

    Returns
    -------
    tuple of ndarray
        ``(arr, finite_mask)`` where ``arr`` is the possibly filtered sample and
        ``finite_mask`` marks finite elements in the original sample.

    Raises
    ------
    ValueError
        If an unknown :attr:`~mcframework.stats_engine.StatsContext.nan_policy` is supplied.
    """
    arr = np.asarray(x, dtype=float)
    finite = np.isfinite(arr)

    if ctx.nan_policy == "omit":
        arr = arr[finite]
    elif ctx.nan_policy not in ("omit", "propagate", "raise"):
        raise ValueError(f"Unknown nan_policy: {ctx.nan_policy}")

    return arr, finite


def _effective_sample_size(x: np.ndarray, ctx: StatsContext) -> int:
    r"""
    Compute :math:`n_\text{eff}` used by confidence-interval calculations.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext
        Context governing NaN handling and declared sample size.

    Returns
    -------
    int
        Effective sample size according to :meth:`~mcframework.stats_engine.StatsContext.eff_n`.
    """
    arr, finite = _clean(x, ctx)
    # finite is a boolean mask; pass the count of True values
    finite_count = int(np.sum(finite))
    return ctx.eff_n(observed_len=arr.size, finite_count=finite_count)



[docs]
def mean(x: np.ndarray, ctx: StatsContext) -> float | None:
    r"""
    Sample mean :math:`\bar X = \frac{1}{n}\sum_{i=1}^n x_i`.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext
        If ``nan_policy="omit"``, non-finite values are excluded.

    Returns
    -------
    float or None
        Estimate of :math:`\mathbb{E}[X]`, or ``None`` if the cleaned sample is empty.

    Notes
    -----
    The averaging is performed on the filtered array returned by :func:`~mcframework.stats_engine._clean`,
    so the effective sample size corresponds to the number of finite values that
    survive the configured NaN policy.

    Examples
    --------
    >>> mean(np.array([1, 2, 3]), StatsContext(n=3))
    2.0
    """
    ctx = _ensure_ctx(ctx, x)
    arr, _ = _clean(x, ctx)
    return float(np.mean(arr)) if arr.size else None




[docs]
def std(x: np.ndarray, ctx: StatsContext) -> float | None:
    r"""
    Sample standard deviation with Bessel correction.

    The estimator is

    .. math::
       s = \sqrt{\frac{1}{n_\text{eff} - \texttt{ddof}}
       \sum_{i=1}^{n_\text{eff}} (x_i - \bar X)^2 }.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext
        Uses :attr:`~mcframework.stats_engine.StatsContext.ddof` (default 1).
        If ``nan_policy="omit"``, non-finite values are excluded.
    Returns
    -------
    float or None
        :math:`s` when :math:`n_\text{eff} > \texttt{ddof}`, else ``None``.

    Examples
    --------
    >>> std(np.array([1, 2, 3]), {})
    1.0
    """
    ctx = _ensure_ctx(ctx, x)
    arr, finite = _clean(x, ctx)
    # finite is a boolean mask; pass the count of True values
    finite_count = int(np.sum(finite))
    n_eff = ctx.eff_n(observed_len=arr.size, finite_count=finite_count)
    if n_eff <= 1:
        return None
    return float(np.std(arr, ddof=ctx.ddof))




[docs]
def percentiles(x: np.ndarray, ctx: StatsContext) -> dict[int, float]:
    r"""
    Empirical percentiles evaluated on the cleaned sample.

    For each :math:`p \in \mathcal{P}` we compute the empirical quantile
    :math:`Q_p(x)` using :func:`numpy.percentile` (linear interpolation).

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext
        Uses :attr:`~mcframework.stats_engine.StatsContext.percentiles` and
        :attr:`~mcframework.stats_engine.StatsContext.nan_policy`.

    Returns
    -------
    dict[int, float]
        Mapping :math:`p \mapsto Q_p(x)`.

    Examples
    --------
    >>> percentiles(np.array([0., 1., 2., 3.]), {"percentiles": (50, 75)})
    {50: 1.5, 75: 2.25}
    """
    ctx = _ensure_ctx(ctx, x)
    arr, _ = _clean(x, ctx)
    if arr.size == 0:
        return {p: float("nan") for p in ctx.percentiles}
    pct_values = np.percentile(arr, ctx.percentiles)
    return dict(zip(ctx.percentiles, map(float, pct_values)))




[docs]
def skew(x: np.ndarray, ctx: StatsContext) -> float:
    r"""
    Unbiased sample skewness (Fisher–Pearson standardized third central moment).

    .. math::
       \text{skew}(x) =
       \frac{1}{n_\text{eff}} \sum_i
       \left(\frac{x_i - \bar X}{s}\right)^3.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext
        Uses :attr:`~mcframework.stats_engine.StatsContext.nan_policy`.

    Returns
    -------
    float
        Fisher–Pearson standardized third central moment, returning ``0.0`` if
        :math:`n_\text{eff} \le 2`.

    Notes
    -----
    Uses :func:`scipy.stats.skew` with ``bias=False``.

    Examples
    --------
    >>> round(skew(np.array([1, 2, 3, 10.0]), {}), 3) > 0
    True
    """
    ctx = _ensure_ctx(ctx, x)
    arr, _ = _clean(x, ctx)
    return float(sp_skew(arr, bias=False)) if arr.size > 2 else 0.0  # type: ignore[arg-type]




[docs]
def kurtosis(x: np.ndarray, ctx: StatsContext) -> float:
    r"""
    Unbiased sample **excess** kurtosis (Fisher definition).

    .. math::
       \text{kurt}(x) =
       \frac{1}{n_\text{eff}} \sum_i
       \left(\frac{x_i - \bar X}{s}\right)^4 - 3.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext
        Uses :attr:`~mcframework.stats_engine.StatsContext.nan_policy`.

    Returns
    -------
    float
        Excess kurtosis (``0.0`` if :math:`n_\text{eff} \le 3`).

    Notes
    -----
    Uses :func:`scipy.stats.kurtosis` with ``fisher=True, bias=False``.

    Examples
    --------
    >>> round(kurtosis(np.array([1, 2, 3, 4.0]), {}), 1)
    -1.2
    """
    ctx = _ensure_ctx(ctx, x)
    arr, _ = _clean(x, ctx)
    return float(sp_kurtosis(arr, fisher=True, bias=False)) if arr.size > 3 else 0.0  # type: ignore[arg-type]




[docs]
def ci_mean(x: np.ndarray, ctx) -> dict[str, float | str]:
    r"""
    Parametric CI for :math:`\mathbb{E}[X]` using z/t critical values.

    Let :math:`\bar X` be the sample mean and :math:`SE = s/\sqrt{n_\text{eff}}`.
    The interval is

    .. math::
       \bar X \pm c \cdot SE,

    where :math:`c` is selected by :func:`mcframework.utils.autocrit` according
    to :attr:`~mcframework.stats_engine.StatsContext.ci_method` and :math:`n_\text{eff}`.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext or Mapping
        Configuration supplying at least :attr:`~mcframework.stats_engine.StatsContext.n`. Additional
        fields such as :attr:`~mcframework.stats_engine.StatsContext.confidence`,
        :attr:`~mcframework.stats_engine.StatsContext.ddof`, and
        :attr:`~mcframework.stats_engine.StatsContext.ci_method` refine the interval.

    Returns
    -------
    dict[str, float | str]
        Mapping with keys

        ``confidence``
            Requested confidence level.
        ``method``
            Resolved CI method (``"z"``, ``"t"``, or ``"auto"``).
        ``low`` / ``high``
            Lower and upper endpoints.
        ``se`` / ``crit``
            Standard error and critical value when :math:`n_\text{eff} \ge 2`.
    """
    ctx = _ensure_ctx(ctx, x)
    arr, _ = _clean(x, ctx)

    # if everything got dropped or x was empty
    if arr.size == 0:
        return _CIResult(
            confidence=ctx.confidence,
            method=ctx.ci_method,
            low=float("nan"),
            high=float("nan"),
        ).as_dict()

    n_eff = _effective_sample_size(arr, ctx)  # counts after cleaning if 'omit'
    if n_eff < 2:
        return _CIResult(
            confidence=ctx.confidence,
            method=ctx.ci_method,
            low=float("nan"),
            high=float("nan"),
            extras={"se": float("nan"), "crit": float("nan")},
        ).as_dict()

    mu = float(np.mean(arr))

    s = float(np.std(arr, ddof=getattr(ctx, "ddof", 1)))
    if s == 0.0:
        # degenerate data -> zero SE -> CI collapses to point
        se = 0.0
    else:
        se = s / np.sqrt(n_eff)

    crit, method = autocrit(ctx.confidence, n_eff, ctx.ci_method)

    return _CIResult(
        confidence=ctx.confidence,
        method=method,
        low=mu - crit * se,
        high=mu + crit * se,
        extras={"se": se, "crit": crit},
    ).as_dict()



def _bootstrap_means(
    arr: np.ndarray, n_resamples: int, rng: np.random.Generator
) -> np.ndarray:
    r"""
    Generate bootstrap replicates of the sample mean.

    Parameters
    ----------
    arr : ndarray
        Cleaned sample values.
    n_resamples : int
        Number of bootstrap resamples.
    rng : numpy.random.Generator
        Random generator used to draw resamples.

    Returns
    -------
    ndarray
        Array of length ``n_resamples`` containing :math:`\{\bar X_b^*\}`.
    """
    n = arr.size
    idx = rng.integers(0, n, size=(n_resamples, n), endpoint=False)
    return arr[idx].mean(axis=1)


def _bca_bias_correction(arr: np.ndarray, bootstrap_means: np.ndarray) -> float:
    r"""
    Compute :math:`z_0`, the bias-correction factor for BCa bootstrap.

    The bias correction accounts for median bias in the bootstrap distribution.
    It is computed as:

    .. math::
       z_0 = \Phi^{-1}(P(\bar X_b^* < \bar X))

    where :math:`\Phi^{-1}` is the inverse standard normal CDF, :math:`\bar X`
    is the observed sample mean, and :math:`\bar X_b^*` are the bootstrap means.

    Parameters
    ----------
    arr : ndarray
        Original sample values.
    bootstrap_means : ndarray
        Bootstrap replicate means.

    Returns
    -------
    float
        Bias correction factor :math:`z_0`.

    Notes
    -----
    The proportion is clipped to :math:`[\epsilon, 1-\epsilon]` where
    :math:`\epsilon` = ``_SMALL_PROB_BOUND`` to prevent numerical issues
    in the inverse error function.
    """
    m_hat = float(np.mean(arr))
    prop = float(np.sum(bootstrap_means < m_hat)) / bootstrap_means.size
    # Clip to prevent log(0) in erfinv (which is used by inverse normal CDF)
    prop = np.clip(prop, _SMALL_PROB_BOUND, 1 - _SMALL_PROB_BOUND)
    # Convert proportion to z-score using inverse normal CDF
    # erfinv is related to norm.ppf via: norm.ppf(p) = sqrt(2) * erfinv(2*p - 1)
    return float(np.sqrt(2) * erfinv(2 * prop - 1))


def _bca_acceleration(arr: np.ndarray) -> float:
    r"""
    Compute :math:`a`, the acceleration factor for BCa bootstrap via jackknife.

    The acceleration factor measures the rate of change of the standard error
    of the estimator as a function of the true parameter value. It is computed
    using the jackknife (leave-one-out) means:

    .. math::
       a = \frac{\sum_{i=1}^n d_i^3}{6 \left(\sum_{i=1}^n d_i^2\right)^{3/2}}

    where :math:`d_i = \bar X_{(-i)} - \bar{\bar X}_{(\cdot)}` is the deviation
    of each jackknife mean from their overall mean.

    Parameters
    ----------
    arr : ndarray
        Original sample values.

    Returns
    -------
    float
        Acceleration factor :math:`a`.

    Notes
    -----
    The formula follows Efron & Tibshirani (1993), "An Introduction to the Bootstrap".
    A small constant is added to the denominator to prevent division by zero.

    References
    ----------
    .. [1] Efron, B., & Tibshirani, R. J. (1993). An Introduction to the Bootstrap.
           Chapman & Hall/CRC.
    """
    # Jackknife: compute leave-one-out means efficiently
    # If sum(arr) = S and arr[i] = x_i, then mean without x_i = (S - x_i)/(n-1)
    s = np.sum(arr, dtype=float)
    jack_means = (s - arr) / (arr.size - 1)

    # Deviations from jackknife mean
    d = jack_means - float(np.mean(jack_means))

    # Acceleration formula from Efron & Tibshirani (1993)
    # The constant 6.0 comes from the theoretical derivation
    numerator = float(np.sum(d**3))
    denominator = _BCA_JACKKNIFE_DENOMINATOR * (np.sum(d**2) ** 1.5) + _SMALL_VARIANCE_BOUND
    return numerator / denominator


def _compute_bca_interval(arr: np.ndarray, means: np.ndarray, confidence: float) -> tuple[float, float]:
    r"""
    Compute bias-corrected and accelerated (BCa) bootstrap interval.

    The BCa method adjusts the bootstrap percentiles to account for:

    1. **Bias correction** (:math:`z_0`): Corrects for median bias in the bootstrap
       distribution relative to the observed statistic.
    2. **Acceleration** (:math:`a`): Adjusts for non-constant variance of the
       estimator as the true parameter varies.

    The adjusted percentiles are computed via the transformation:

    .. math::
       p = \Phi\left(z_0 + \frac{z_0 + z_\alpha}{1 - a(z_0 + z_\alpha)}\right)

    where :math:`\Phi` is the standard normal CDF and :math:`z_\alpha` is the
    :math:`\alpha`-level quantile.

    Parameters
    ----------
    arr : ndarray
        Original sample values.
    means : ndarray
        Bootstrap replicate means.
    confidence : float
        Confidence level in (0, 1).

    Returns
    -------
    tuple[float, float]
        (lower_bound, upper_bound)

    See Also
    --------
    _bca_bias_correction : Computes the bias correction factor :math:`z_0`.
    _bca_acceleration : Computes the acceleration factor :math:`a`.

    References
    ----------
    .. [1] Efron, B. (1987). "Better Bootstrap Confidence Intervals".
           Journal of the American Statistical Association, 82(397), 171-185.
    """
    # Compute BCa adjustment factors
    z0 = _bca_bias_correction(arr, means)
    a = _bca_acceleration(arr)

    # Standard normal quantiles for the confidence interval
    zlo = float(norm.ppf((1 - confidence) / 2))
    zhi = float(norm.ppf(1 - (1 - confidence) / 2))

    # BCa-adjusted percentiles using the transformation formula
    # Φ(z0 + (z0 + z) / (1 - a(z0 + z))) converted to percentile scale
    def adjusted_percentile(z: float) -> float:
        """Apply BCa transformation to convert z-score to adjusted percentile."""
        num = z0 + z
        den = 1.0 - a * num
        return float(norm.cdf(z0 + num / den)) * 100.0

    # Compute adjusted percentile bounds and clip to valid range
    p_lo = np.clip(adjusted_percentile(zlo), 0, 100)
    p_hi = np.clip(adjusted_percentile(zhi), 0, 100)

    # Extract the interval from the bootstrap distribution
    low, high = np.percentile(means, [p_lo, p_hi])

    return float(low), float(high)



[docs]
def ci_mean_bootstrap(x: np.ndarray, ctx: StatsContext) -> dict[str, float | str] | None:
    r"""
    Bootstrap confidence interval for :math:`\mathbb{E}[X]` via resampling.

    Generates ``n_bootstrap`` bootstrap samples by drawing with replacement
    from the input data, computes the mean of each sample, and returns
    the percentile-based confidence interval from the resulting bootstrap
    distribution.

    The CI is constructed as

    .. math::
       \left[\,Q_{\alpha/2}(\bar X^*),\; Q_{1-\alpha/2}(\bar X^*)\,\right],

    where :math:`\bar X^*` denotes the bootstrap means and
    :math:`\alpha = 1 - \text{confidence}`.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext or Mapping
        Configuration that may specify:

        - :attr:`~mcframework.stats_engine.StatsContext.confidence`
            Confidence level :math:`\in (0, 1)`.
        - :attr:`~mcframework.stats_engine.StatsContext.n_bootstrap`
            Number of bootstrap resamples (default ``10_000``).
        - :attr:`~mcframework.stats_engine.StatsContext.nan_policy`
            Whether to omit non-finite values before bootstrapping.
        - :attr:`~mcframework.stats_engine.StatsContext.bootstrap`
            Flavor (``"percentile"`` or ``"bca"``).
        - :attr:`~mcframework.stats_engine.StatsContext.rng`
            Seed or generator for reproducibility.

    Returns
    -------
    dict or None
        Mapping with keys:

        - ``confidence`` : float
          The requested confidence level.
        - ``method`` : str
          ``"bootstrap-percentile"`` or ``"bootstrap-bca"``.
        - ``low`` : float
          Lower bound of the CI.
        - ``high`` : float
          Upper bound of the CI.

        Returns ``None`` if the sample is empty after cleaning.

    See Also
    --------
    ci_mean : Parametric CI using z/t critical values.
    ci_mean_chebyshev : Distribution-free CI via Chebyshev's inequality.

    Notes
    -----
    The bootstrap percentile method is distribution-free and asymptotically
    valid under mild regularity conditions. For small samples, it may have
    lower coverage than the nominal confidence level.

    Examples
    --------
    >>> x = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
    >>> result = ci_mean_bootstrap(x, {"confidence": 0.9, "n_bootstrap": 5000, "rng": 42})
    >>> result["method"]
    'bootstrap-percentile'
    >>> 1.5 < result["low"] < result["high"] < 4.5
    True
    """
    ctx = _ensure_ctx(ctx, x)
    arr, _ = _clean(x, ctx)
    if arr.size == 0:
        return None

    n_resamples = int(ctx.n_bootstrap)
    g = ctx.get_generators()
    means = _bootstrap_means(arr, n_resamples, g)
    loq, hiq = ctx.q_bound()
    method = ctx.bootstrap

    # Use percentile method for small samples or when explicitly requested
    if method == "percentile" or arr.size < 3:
        low, high = np.percentile(means, [loq, hiq])
        return _CIResult(
            confidence=ctx.confidence,
            method="bootstrap-percentile",
            low=low,
            high=high,
        ).as_dict()

    # BCa method
    low, high = _compute_bca_interval(arr, means, ctx.confidence)
    return _CIResult(
        confidence=ctx.confidence,
        method="bootstrap-bca",
        low=low,
        high=high,
    ).as_dict()




[docs]
def ci_mean_chebyshev(x: np.ndarray, ctx: StatsContext) -> dict[str, float | str] | None:
    r"""
    Distribution-free CI for :math:`\mathbb{E}[X]` via Chebyshev's inequality.

    For :math:`\delta = 1 - \text{confidence}`, choose :math:`z=1/\sqrt{\delta}`
    so that

    .. math::
       \Pr\!\left(\,|\bar X - \mu| \ge z\,SE\,\right) \le \delta,
       \qquad SE = \frac{s}{\sqrt{n_\text{eff}}}.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext or Mapping
        Requires :attr:`~mcframework.stats_engine.StatsContext.n` and may specify
        :attr:`~mcframework.stats_engine.StatsContext.confidence`.

    Returns
    -------
    dict[str, float | str] or None
        Mapping with keys ``confidence``, ``method``, ``low``, and ``high``,
        or ``None`` if :math:`n_\text{eff} < 2`.
    """
    ctx = _ensure_ctx(ctx, x)
    n_eff = _effective_sample_size(x, ctx)
    if n_eff < 2:
        return None
    mu = mean(x, ctx)
    s = std(x, ctx)
    # Handle case where mean or std returns None
    if mu is None or s is None:
        return None
    k = 1.0 / np.sqrt(max(_SMALL_VARIANCE_BOUND, 1.0 - ctx.confidence))  # 1/sqrt(alpha)
    half = k * s / np.sqrt(n_eff)
    return _CIResult(
        confidence=ctx.confidence,
        method="chebyshev",
        low=mu - half,
        high=mu + half,
    ).as_dict()




[docs]
def chebyshev_required_n(x: np.ndarray, ctx: StatsContext) -> int:
    r"""
    Required :math:`n` to achieve Chebyshev CI half-width :math:`\le \varepsilon`.

    With :math:`\delta = 1 - \text{confidence}`, the half-width is
    :math:`z\,SE = \dfrac{s}{\sqrt{n_\text{eff}\,\delta}}` where :math:`z=1/\sqrt{\delta}`.
    Solve :math:`n_\text{eff} \ge \dfrac{s^2}{\varepsilon^2\,\delta}`.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext or Mapping
        Must supply :attr:`~mcframework.stats_engine.StatsContext.eps` (target half-width) and may
        override :attr:`~mcframework.stats_engine.StatsContext.confidence`.

    Returns
    -------
    int
        Minimum integer :math:`n_\text{eff}`.

    Examples
    --------
    >>> chebyshev_required_n(np.array([1., 2., 3.]), {"eps": 0.5, "confidence": 0.9})
    41
    """
    ctx = _ensure_ctx(ctx, x)
    if ctx.eps is None:
        raise MissingContextError("chebyshev_required_n requires ctx.eps")
    if ctx.eps <= 0:
        raise ValueError("ctx.eps must be positive")
    s = std(x, ctx)
    k = 1.0 / np.sqrt(max(_SMALL_VARIANCE_BOUND, 1.0 - ctx.confidence))
    return int(np.ceil(((k * s) / float(ctx.eps)) ** 2))




[docs]
def markov_error_prob(x: np.ndarray, ctx: StatsContext) -> float:
    r"""
    Markov bound on error probability for target :math:`\theta`.

    Using the squared error of the sample mean,
    :math:`\mathrm{MSE}(\bar X) \approx \frac{s^2}{n} + \text{Bias}^2`, Markov gives

    .. math::
       \Pr\!\left(\,|\bar X - \theta| \ge \varepsilon\,\right)
       \;\le\; \frac{\mathrm{MSE}}{\varepsilon^2}.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext or Mapping
        Requires :attr:`~mcframework.stats_engine.StatsContext.target` and
        :attr:`~mcframework.stats_engine.StatsContext.eps`. The declared
        :attr:`~mcframework.stats_engine.StatsContext.n` influences the context but does not enter
        directly into the bound.

    Returns
    -------
    float
        Upper bound in :math:`[0, 1]`.
    """
    ctx = _ensure_ctx(ctx, x)
    if ctx.target is None:
        raise MissingContextError("markov_error_prob requires ctx.target")
    if ctx.eps is None:
        raise MissingContextError("markov_error_prob requires ctx.eps")
    if ctx.eps <= 0:
        raise ValueError("ctx.eps must be positive")
    arr, _ = _clean(x, ctx)
    mse = float(np.mean((arr - ctx.target) ** 2))
    return mse / (ctx.eps**2)




[docs]
def bias_to_target(x: np.ndarray, ctx: StatsContext) -> float:
    r"""
    Bias of the sample mean relative to a target :math:`\theta`.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext or Mapping
        Requires :attr:`~mcframework.stats_engine.StatsContext.target`.

    Returns
    -------
    float
        Estimated bias :math:`\bar X - \theta`.
    """
    ctx = _ensure_ctx(ctx, x)
    if ctx.target is None:
        raise MissingContextError("bias_to_target requires ctx.target")
    sample_mean = mean(x, ctx)
    if sample_mean is None:
        raise InsufficientDataError("bias_to_target requires non-empty data after cleaning")
    return float(sample_mean - ctx.target)




[docs]
def mse_to_target(x: np.ndarray, ctx: StatsContext) -> float:
    r"""
    Mean squared error of :math:`\bar X` relative to a target :math:`\theta`.

    Approximated by

    .. math::
       \mathrm{MSE}(\bar X) \approx \frac{s^2}{n} + (\bar X - \theta)^2.

    Parameters
    ----------
    x : ndarray
        Input sample.
    ctx : StatsContext or Mapping
        Requires :attr:`~mcframework.stats_engine.StatsContext.target`.

    Returns
    -------
    float
        Estimated mean squared error relative to ``target``.
    """
    ctx = _ensure_ctx(ctx, x)
    if ctx.target is None:
        raise MissingContextError("mse_to_target requires ctx.target")
    arr, _ = _clean(x, ctx)
    return float(np.mean((arr - ctx.target) ** 2))




[docs]
def build_default_engine(
    include_dist_free: bool = True,
    include_target_bounds: bool = True,
) -> StatsEngine:
    r"""
    Construct a :class:`~mcframework.stats_engine.StatsEngine` with a practical set of metrics.

    Parameters
    ----------
    include_dist_free : bool, default True
        Include Chebyshev-based CI and sizing.
    include_target_bounds : bool, default True
        Include :func:`bias_to_target`, :func:`mse_to_target`, :func:`markov_error_prob`.

    Returns
    -------
    StatsEngine
    """
    metrics: list[Metric] = [
        FnMetric("mean", mean, "Sample mean"),
        FnMetric("std", std, "Sample standard deviation"),
        FnMetric("percentiles", percentiles, "Percentiles over the sample"),
        FnMetric("skew", skew, "Fisher skewness (unbiased)"),
        FnMetric("kurtosis", kurtosis, "Excess kurtosis (unbiased)"),
        FnMetric("ci_mean", ci_mean, "z/t CI for the mean"),
        FnMetric("ci_mean_bootstrap", ci_mean_bootstrap, "Bootstrap CI for the mean"),
    ]
    if include_dist_free:
        metrics.extend(
            [
                FnMetric(
                    "ci_mean_chebyshev", ci_mean_chebyshev, "Chebyshev bound CI for the mean"
                ),
                FnMetric(
                    "chebyshev_required_n", chebyshev_required_n, "Required n under Chebyshev to reach eps"
                ),
            ]
        )
    if include_target_bounds:
        metrics.extend(
            [
                FnMetric("markov_error_prob", markov_error_prob, "Markov bound P(|X-target|>=eps)"),
                FnMetric("bias_to_target", bias_to_target, "Bias relative to target"),
                FnMetric("mse_to_target", mse_to_target, "Mean squared error to target"),
            ]
        )
    return StatsEngine(metrics)



# Build a default engine at import time
DEFAULT_ENGINE = build_default_engine(
    include_dist_free=True,
    include_target_bounds=True,
)

__all__ = [
    "CIMethod",
    "NanPolicy",
    "BootstrapMethod",
    "StatsContext",
    "ComputeResult",
    "Metric",
    "MetricSet",
    "FnMetric",
    "StatsEngine",
    "MissingContextError",
    "InsufficientDataError",
    "mean",
    "std",
    "percentiles",
    "skew",
    "kurtosis",
    "ci_mean",
    "ci_mean_bootstrap",
    "ci_mean_chebyshev",
    "chebyshev_required_n",
    "markov_error_prob",
    "bias_to_target",
    "mse_to_target",
    "build_default_engine",
    "DEFAULT_ENGINE",
]