r"""
Statistical metrics and orchestration engine for Monte Carlo simulations.
This module provides:
Configuration
:class:`StatsContext` — Shared configuration for all metric computations
(confidence level, NaN policy, bootstrap settings, etc.)
Descriptive Metrics
:func:`mean`, :func:`std`, :func:`percentiles`, :func:`skew`, :func:`kurtosis`
Confidence Intervals
- :func:`ci_mean` — Parametric CI using z/t critical values
- :func:`ci_mean_bootstrap` — Bootstrap CI (percentile or BCa)
- :func:`ci_mean_chebyshev` — Distribution-free Chebyshev bound
Target-Based Metrics
:func:`bias_to_target`, :func:`mse_to_target`, :func:`markov_error_prob`,
:func:`chebyshev_required_n`
Engine
:class:`StatsEngine` — Orchestrator that evaluates multiple metrics
:class:`FnMetric` — Adapter to bind a name to a metric function
:obj:`DEFAULT_ENGINE` — Pre-configured engine with all standard metrics
Example
-------
>>> from mcframework.stats_engine import DEFAULT_ENGINE, StatsContext
>>> import numpy as np
>>> data = np.random.normal(0, 1, 10000)
>>> ctx = StatsContext(n=len(data), confidence=0.95)
>>> result = DEFAULT_ENGINE.compute(data, ctx)
>>> result.metrics['mean'] # doctest: +SKIP
0.0012
See Also
--------
mcframework.utils.autocrit
Selects z or t critical value based on sample size.
mcframework.core.MonteCarloSimulation
Simulation base class that uses this engine.
"""
# DEV NOTE:
# ===========================================================================
# The type checker throws a fit since x is a numpy ndarray and the checker can't verify
# that numpy/scipy functions accept that. So, we're suppressing the error
# with type: ignore[arg-type] where needed.
# ===========================================================================
# pylint: disable=invalid-name
# Enum values (propagate, omit, auto, z, t, bootstrap, percentile, bca) are lowercase by design
from __future__ import annotations
import logging
from dataclasses import dataclass, field, replace
from enum import Enum
from typing import (
Any,
Callable,
Generic,
Iterable,
Mapping,
Protocol,
Sequence,
SupportsFloat,
TypeAlias,
TypeVar,
)
import numpy as np
from scipy.special import erfinv # type: ignore[import-untyped]
from scipy.stats import kurtosis as sp_kurtosis # type: ignore[import-untyped]
from scipy.stats import norm # type: ignore[import-untyped]
from scipy.stats import skew as sp_skew # type: ignore[import-untyped]
from .utils import autocrit
# Create local logger to avoid circular import with core
logger = logging.getLogger(__name__)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
_PCTS = (5, 25, 50, 75, 95) # default percentiles
# TODO: make these configurable
# Numerical stability constants
_SMALL_PROB_BOUND = 1e-12 # Minimum probability for BCa bootstrap to avoid log(0)
_SMALL_VARIANCE_BOUND = 1e-30 # Minimum variance denominator to prevent division by zero
_BCA_JACKKNIFE_DENOMINATOR = 6.0 # Constant in BCa acceleration calculation
# Custom exceptions for better error handling
[docs]
class MissingContextError(ValueError):
"""Raised when a required context field is missing."""
[docs]
class InsufficientDataError(ValueError):
"""Raised when insufficient data is available for computation."""
[docs]
class NanPolicy(str, Enum):
r"""
Strategies for handling the propagation of non-finite values.
Attributes
----------
propagate : str
Propagate any NaNs or infinities encountered in the sample.
omit : str
Drop non-finite observations before computing a metric.
"""
propagate = "propagate"
omit = "omit"
[docs]
class CIMethod(str, Enum):
r"""
Parametric strategies for selecting confidence-interval critical values.
Attributes
----------
auto : str
Choose Student-t when :math:`n_\text{eff} < 30`, otherwise z.
z : str
Always use the normal :math:`z` critical value.
t : str
Always use the Student-:math:`t` critical value.
bootstrap : str
Defer to resampling-based bootstrap intervals.
"""
auto = "auto"
z = "z"
t = "t"
bootstrap = "bootstrap"
[docs]
class BootstrapMethod(str, Enum):
r"""
Supported bootstrap confidence-interval flavors.
Attributes
----------
percentile : str
Use the percentile method on the bootstrap distribution.
bca : str
Use the bias-corrected and accelerated (BCa) adjustment.
"""
percentile = "percentile"
bca = "bca"
[docs]
@dataclass(slots=True)
class StatsContext:
r"""
Shared, explicit configuration for statistic and CI computations.
The context keeps track of three recurring quantities:
* Confidence level :math:`\gamma = \texttt{confidence}` with tail mass
:math:`\alpha = 1 - \gamma`.
* Effective sample size :math:`n_\text{eff}`, obtained from
:meth:`eff_n` and used by every finite-sample adjustment.
* Requested quantiles :math:`\mathcal{P} = \{p_i\}` that drive percentile
metrics.
Throughout the module we repeatedly use the identities
.. math::
\alpha = 1 - \gamma, \qquad
q_\text{low} = 100 \frac{\alpha}{2}, \qquad
q_\text{high} = 100 \left(1 - \frac{\alpha}{2}\right),
which are provided via the :meth:`alpha` and :meth:`q_bound` helpers.
Attributes
----------
n : int
Declared sample size (fallback when NaNs are not omitted).
confidence : float, default 0.95
Confidence level in :math:`(0, 1)`.
ci_method : {"auto", "z", "t", "bootstrap"}, default "auto"
Strategy for :func:`ci_mean`.
If ``"auto"``, use Student-t when :math:`n_\text{eff} < 30` else normal z.
percentiles : tuple of int, default ``(5, 25, 50, 75, 95)``
Percentiles to compute in :func:`percentiles`.
nan_policy : {"propagate", "omit"}, default "propagate"
If ``"omit"``, drop non-finite values before all computations.
target : float, optional
Optional target value (e.g., true mean) for bias/MSE/Markov metrics.
eps : float, optional
Tolerance used by Chebyshev sizing and Markov bounds, when required.
ddof : int, default 1
Degrees of freedom for :func:`numpy.std` (1 => Bessel correction).
ess : int, optional
Effective sample size override (e.g., from MCMC diagnostics).
rng : int or numpy.random.Generator, optional
Seed or Generator used by bootstrap methods for reproducibility.
n_bootstrap : int, default 10000
Number of bootstrap resamples for :func:`ci_mean_bootstrap`.
bootstrap : {"percentile", "bca"}, default "percentile"
Bootstrap flavor for :func:`ci_mean_bootstrap`.
block_size : int, optional
Reserved for future block bootstrap support.
Notes
-----
The context is immutable by convention at runtime; prefer :meth:`with_overrides`
to construct a modified copy with a small set of changed fields.
Examples
--------
>>> ctx = StatsContext(n=5000, confidence=0.95, ci_method=CIMethod.auto, nan_policy=NanPolicy.omit)
>>> round(ctx.alpha, 2)
0.05
"""
n: int
confidence: float = 0.95
ci_method: CIMethod = CIMethod.auto
percentiles: tuple[int, ...] = (5, 25, 50, 75, 95)
nan_policy: NanPolicy = NanPolicy.propagate
target: float | None = None
eps: float | None = None
ddof: int = 1
ess: int | None = None
rng: int | np.random.Generator | None = None
n_bootstrap: int = 10_000
bootstrap: BootstrapMethod = BootstrapMethod.percentile
# ergonomics
[docs]
def with_overrides(self, **changes) -> "StatsContext":
r"""
Return a shallow copy with selected fields replaced.
Parameters
----------
``**changes`` :
Field overrides passed to :func:`dataclasses.replace`.
Returns
-------
StatsContext
Modified copy.
Examples
--------
>>> ctx = StatsContext(n=1000)
>>> ctx2 = ctx.with_overrides(confidence=0.9, n_bootstrap=2000)
"""
return replace(self, **changes)
@property
def alpha(self) -> float:
r"""
One-sided tail probability :math:`\alpha = 1 - \text{confidence}`.
Returns
-------
float
"""
return 1.0 - self.confidence
[docs]
def q_bound(self) -> tuple[float, float]:
r"""
Percentile bounds corresponding to the current confidence.
For :math:`\alpha = 1 - \text{confidence}`, returns
:math:`(100\alpha/2,\; 100(1-\alpha/2))`.
Returns
-------
tuple of float
(lower_percentile, upper_percentile)
"""
alpha = self.alpha
return 100.0 * (alpha / 2), 100.0 * (1 - alpha / 2)
[docs]
def eff_n(self, observed_len: int, finite_count: int | None = None) -> int:
r"""
Effective sample size :math:`n_\text{eff}` used by CI calculations.
Priority is:
1) explicit :attr:`ess`; 2) count of finite values if ``nan_policy="omit"``;
3) declared :attr:`n` (fallback); else ``observed_len``.
In symbols,
.. math::
n_\text{eff} =
\begin{cases}
\texttt{ess}, & \text{if provided},\\[4pt]
\#\{i : x_i \text{ finite}\}, & \text{if nan policy = ``omit''},\\[4pt]
\texttt{n}, & \text{otherwise}.
\end{cases}
Parameters
----------
observed_len : int
Raw length of the input array.
finite_count : int, optional
Count of finite values (used when ``nan_policy="omit"``).
Returns
-------
int
"""
if self.ess is not None:
return int(self.ess)
if self.nan_policy == "omit" and finite_count is not None:
return int(finite_count)
return int(self.n or observed_len)
[docs]
def get_generators(self) -> np.random.Generator:
r"""
Return a NumPy :class:`~numpy.random.Generator` initialized from :attr:`rng`.
Returns
-------
numpy.random.Generator
"""
if isinstance(self.rng, np.random.Generator):
return self.rng
if isinstance(self.rng, (int, np.integer)):
return np.random.default_rng(int(self.rng))
return np.random.default_rng()
def __post_init__(self) -> None:
r"""
Validate field ranges and cross-field consistency.
Raises
------
ValueError
If any field is outside its allowed range or fields are inconsistent.
"""
# Individual field validations
if not 0.0 < self.confidence < 1.0:
raise ValueError("confidence must be in (0,1)")
if any(p < 0 or p > 100 for p in self.percentiles):
raise ValueError("percentiles must be in [0,100]")
if self.n_bootstrap <= 0:
raise ValueError("n_bootstrap must be > 0")
if self.ddof < 0:
raise ValueError("ddof must be >= 0")
if self.eps is not None and self.eps <= 0:
raise ValueError("eps must be positive")
# Cross-field validations
if self.ess is not None:
if self.ess > self.n:
raise ValueError(f"ess ({self.ess}) cannot exceed n ({self.n})")
if self.ess <= 0:
raise ValueError(f"ess must be positive, got {self.ess}")
# Warn about small bootstrap samples (but don't fail)
if self.n_bootstrap < 100:
raise ValueError(f"n_bootstrap ({self.n_bootstrap}) should be >= 100 for reliable estimates")
@dataclass(frozen=True)
class _CIResult:
r"""
Internal helper that stores a confidence interval before converting to ``dict``.
Attributes
----------
confidence : float
Confidence level :math:`\gamma`.
method : str
Human-readable label (e.g., ``"z"`` or ``"bootstrap-bca"``).
low, high : float
Interval endpoints :math:`(\ell, u)`.
extras : Mapping[str, SupportsFloat]
Optional diagnostics such as ``se`` or ``crit``.
"""
confidence: float
method: str
low: float
high: float
extras: Mapping[str, SupportsFloat] = field(default_factory=dict)
def as_dict(self) -> dict[str, float | str]:
result: dict[str, float | str] = {
"confidence": float(self.confidence),
"method": self.method.value if hasattr(self.method, 'value') else str(self.method),
"low": float(self.low),
"high": float(self.high),
}
if self.extras:
result.update({key: float(value) for key, value in self.extras.items()})
return result
[docs]
@dataclass(frozen=True)
class ComputeResult:
r"""
Result from :meth:`StatsEngine.compute` with tracking of computation failures.
Attributes
----------
metrics : dict[str, Any]
Successfully computed metric values, keyed by metric name.
skipped : list[tuple[str, str]]
List of (metric_name, reason) pairs for metrics that were skipped.
errors : list[tuple[str, str]]
List of (metric_name, error_message) pairs for metrics that raised errors.
Examples
--------
>>> engine = StatsEngine([FnMetric("mean", mean)])
>>> data = np.array([1.0, 2.0, 3.0])
>>> ctx = StatsContext(n=3)
>>> result = engine.compute(data, ctx)
>>> result.metrics['mean']
2.0
>>> result.skipped
[]
"""
metrics: dict[str, Any]
skipped: list[tuple[str, str]] = field(default_factory=list)
errors: list[tuple[str, str]] = field(default_factory=list)
def __repr__(self) -> str:
return (
f"{self.__class__.__name__}(\n"
f" metrics={list(self.metrics.keys())},\n"
f" skipped={[name for name, _ in self.skipped]},\n"
f" errors={[name for name, _ in self.errors]}\n"
")"
)
[docs]
def successful_metrics(self) -> set[str]:
r"""
Return names of successfully computed metrics.
Returns
-------
set[str]
Set of metric names present in :attr:`metrics`.
"""
return set(self.metrics.keys())
MetricSet: TypeAlias = Iterable["Metric"]
class Metric(Protocol):
r"""
Protocol for metric callables used by :class:`StatsEngine`.
A metric exposes a ``name`` attribute and is callable as:
``metric(x: numpy.ndarray, ctx: StatsContext) -> Any``
Attributes
----------
name : str
Human-readable key under which the metric's value is returned.
"""
@property
def name(self) -> str: ...
def __call__(self, x: np.ndarray, ctx: StatsContext, /) -> Any: ...
_MetricT = TypeVar("_MetricT") # Type parameter for FnMetric
[docs]
@dataclass(frozen=True)
class FnMetric(Generic[_MetricT]):
r"""
Lightweight adapter that binds a human-readable ``name`` to a metric function.
Parameters
----------
name : str
Key under which the metric result is stored in :meth:`StatsEngine.compute`.
fn : callable
Function with signature ``fn(x: ndarray, ctx: StatsContext) -> T``.
doc : str, optional
Short description displayed by UIs or docs.
Examples
--------
>>> import numpy as np
>>> m = FnMetric("mean", lambda a, ctx: float(np.mean(a)))
>>> m(np.array([1, 2, 3]), StatsContext(n=3))
2.0
"""
name: str
fn: Callable[[np.ndarray, StatsContext], _MetricT]
doc: str = ""
def __call__(self, x: np.ndarray, ctx: StatsContext) -> _MetricT:
r"""
Compute the metric.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext
Context parameters (see individual metric docs).
Returns
-------
Any
Metric value.
Examples
--------
>>> m = FnMetric("mean", lambda a, _: float(np.mean(a)))
>>> m(np.array([1, 2, 3]), {})
2.0
"""
return self.fn(x, ctx)
[docs]
class StatsEngine:
r"""
Orchestrator that evaluates a set of metrics over an input array.
Given a collection of metric callables :math:`\{\phi_j\}_{j=1}^m` and an
array :math:`x \in \mathbb{R}^n`, the engine returns the dictionary
.. math::
\{\phi_j(x, \texttt{ctx}) : j = 1,\dots,m\},
while recording any skipped/failed evaluations for downstream inspection.
Parameters
----------
metrics : iterable of Metric
Callables with a ``name`` and signature ``metric(x, ctx)``.
Notes
-----
All metrics receive the *same* :class:`~mcframework.stats_engine.StatsContext`. Prefer field names that
read well across multiple metrics and avoid collisions.
Examples
--------
>>> eng = StatsEngine([FnMetric("mean", mean), FnMetric("std", std)])
>>> x = np.array([1., 2., 3.])
>>> result = eng.compute(x, StatsContext(n=len(x)))
>>> result.metrics['mean']
2.0
>>> result.metrics['std']
1.0
"""
[docs]
def __init__(self, metrics: MetricSet):
self._metrics = list(metrics)
def __repr__(self) -> str:
metric_names = ", ".join(m.name for m in self._metrics)
return f"{self.__class__.__name__}(metrics=[{metric_names}])"
[docs]
def compute(
self,
x: np.ndarray,
ctx: StatsContext | None = None,
select: Sequence[str] | None = None,
**kwargs: Any,
) -> ComputeResult:
r"""
Evaluate all registered metrics on ``x``.
Parameters
----------
x : ndarray
Sample values.
ctx : StatsContext, optional
Context parameters. If None, one is built from ``**kwargs``.
select : sequence of str, optional
If given, compute only the metrics with these names.
**kwargs : Any
Used to build a StatsContext if ctx is None.
Required: 'n' (int).
Optional: 'confidence', 'ci_method', 'percentiles', etc.
Returns
-------
ComputeResult
Result object containing:
- ``metrics``: Successfully computed metric values.
- ``skipped``: List of (metric_name, reason) for skipped metrics.
- ``errors``: List of (metric_name, error_message) for failed metrics.
"""
if ctx is not None:
ctx = _ensure_ctx(ctx, x)
# build context from kwargs
else:
base = dict(kwargs)
base.setdefault("n", int(np.asarray(x).size))
ctx = StatsContext(**base)
metrics_to_compute = (
self._metrics if select is None else [m for m in self._metrics if m.name in set(select)]
)
out: dict[str, Any] = {}
skipped: list[tuple[str, str]] = []
errors: list[tuple[str, str]] = []
for m in metrics_to_compute:
try:
result = m(x, ctx)
# Handle None return (insufficient data)
if result is None:
skipped.append((m.name, "insufficient data"))
logger.debug("Metric %s returned None (insufficient data), skipping", m.name)
continue
out[m.name] = result
except MissingContextError as e:
# Skip metrics that require context fields not provided
reason = str(e)
skipped.append((m.name, reason))
logger.debug("Skipping metric %s: %s", m.name, reason)
continue
except ValueError as e:
# Also handle general ValueError for backward compatibility
msg = str(e)
if "Missing required context keys" in msg:
skipped.append((m.name, msg))
logger.debug("Skipping metric %s: %s", m.name, msg)
continue
raise
except Exception as e: # pylint: disable=broad-exception-caught
# Log and track unexpected errors (intentionally broad to not crash engine)
error_msg = str(e)
errors.append((m.name, error_msg))
logger.exception("Error computing metric %s", m.name)
continue
return ComputeResult(metrics=out, skipped=skipped, errors=errors)
def _ensure_ctx(ctx: Any, x: np.ndarray) -> StatsContext:
r"""
Normalize arbitrary context inputs into a :class:`~mcframework.stats_engine.StatsContext`.
Parameters
----------
ctx : Any
A :class:`~mcframework.stats_engine.StatsContext`, mapping, object with attributes, or ``None``.
x : ndarray
Sample used to infer the fallback ``n`` when missing.
Returns
-------
StatsContext
Context instance with all required fields populated.
Raises
------
TypeError
If ``ctx`` cannot be interpreted as configuration data.
"""
# ctx is a StatsContext
if isinstance(ctx, StatsContext):
return ctx
arr_len = int(np.asarray(x).size)
# ctx is None
if ctx is None:
return StatsContext(n=arr_len)
# ctx is a dict
if isinstance(ctx, dict):
data = dict(ctx)
data.setdefault("n", arr_len)
return StatsContext(**data)
# Fallback: try to read attributes
try:
data = dict(vars(ctx))
except TypeError as exc:
raise TypeError(
"ctx must be a StatsContext, dict, None, or an object with attributes"
) from exc
data.setdefault("n", arr_len)
return StatsContext(**data)
def _clean(x: np.ndarray, ctx: Any) -> tuple[np.ndarray, np.ndarray]:
r"""
Sanitize the input sample and return the finite-value mask.
Parameters
----------
x : ndarray
Raw input sample.
ctx : StatsContext
Context whose :attr:`~mcframework.stats_engine.StatsContext.nan_policy` guides filtering.
Returns
-------
tuple of ndarray
``(arr, finite_mask)`` where ``arr`` is the possibly filtered sample and
``finite_mask`` marks finite elements in the original sample.
Raises
------
ValueError
If an unknown :attr:`~mcframework.stats_engine.StatsContext.nan_policy` is supplied.
"""
arr = np.asarray(x, dtype=float)
finite = np.isfinite(arr)
if ctx.nan_policy == "omit":
arr = arr[finite]
elif ctx.nan_policy not in ("omit", "propagate", "raise"):
raise ValueError(f"Unknown nan_policy: {ctx.nan_policy}")
return arr, finite
def _effective_sample_size(x: np.ndarray, ctx: StatsContext) -> int:
r"""
Compute :math:`n_\text{eff}` used by confidence-interval calculations.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext
Context governing NaN handling and declared sample size.
Returns
-------
int
Effective sample size according to :meth:`~mcframework.stats_engine.StatsContext.eff_n`.
"""
arr, finite = _clean(x, ctx)
# finite is a boolean mask; pass the count of True values
finite_count = int(np.sum(finite))
return ctx.eff_n(observed_len=arr.size, finite_count=finite_count)
[docs]
def mean(x: np.ndarray, ctx: StatsContext) -> float | None:
r"""
Sample mean :math:`\bar X = \frac{1}{n}\sum_{i=1}^n x_i`.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext
If ``nan_policy="omit"``, non-finite values are excluded.
Returns
-------
float or None
Estimate of :math:`\mathbb{E}[X]`, or ``None`` if the cleaned sample is empty.
Notes
-----
The averaging is performed on the filtered array returned by :func:`~mcframework.stats_engine._clean`,
so the effective sample size corresponds to the number of finite values that
survive the configured NaN policy.
Examples
--------
>>> mean(np.array([1, 2, 3]), StatsContext(n=3))
2.0
"""
ctx = _ensure_ctx(ctx, x)
arr, _ = _clean(x, ctx)
return float(np.mean(arr)) if arr.size else None
[docs]
def std(x: np.ndarray, ctx: StatsContext) -> float | None:
r"""
Sample standard deviation with Bessel correction.
The estimator is
.. math::
s = \sqrt{\frac{1}{n_\text{eff} - \texttt{ddof}}
\sum_{i=1}^{n_\text{eff}} (x_i - \bar X)^2 }.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext
Uses :attr:`~mcframework.stats_engine.StatsContext.ddof` (default 1).
If ``nan_policy="omit"``, non-finite values are excluded.
Returns
-------
float or None
:math:`s` when :math:`n_\text{eff} > \texttt{ddof}`, else ``None``.
Examples
--------
>>> std(np.array([1, 2, 3]), {})
1.0
"""
ctx = _ensure_ctx(ctx, x)
arr, finite = _clean(x, ctx)
# finite is a boolean mask; pass the count of True values
finite_count = int(np.sum(finite))
n_eff = ctx.eff_n(observed_len=arr.size, finite_count=finite_count)
if n_eff <= 1:
return None
return float(np.std(arr, ddof=ctx.ddof))
[docs]
def percentiles(x: np.ndarray, ctx: StatsContext) -> dict[int, float]:
r"""
Empirical percentiles evaluated on the cleaned sample.
For each :math:`p \in \mathcal{P}` we compute the empirical quantile
:math:`Q_p(x)` using :func:`numpy.percentile` (linear interpolation).
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext
Uses :attr:`~mcframework.stats_engine.StatsContext.percentiles` and
:attr:`~mcframework.stats_engine.StatsContext.nan_policy`.
Returns
-------
dict[int, float]
Mapping :math:`p \mapsto Q_p(x)`.
Examples
--------
>>> percentiles(np.array([0., 1., 2., 3.]), {"percentiles": (50, 75)})
{50: 1.5, 75: 2.25}
"""
ctx = _ensure_ctx(ctx, x)
arr, _ = _clean(x, ctx)
if arr.size == 0:
return {p: float("nan") for p in ctx.percentiles}
pct_values = np.percentile(arr, ctx.percentiles)
return dict(zip(ctx.percentiles, map(float, pct_values)))
[docs]
def skew(x: np.ndarray, ctx: StatsContext) -> float:
r"""
Unbiased sample skewness (Fisher–Pearson standardized third central moment).
.. math::
\text{skew}(x) =
\frac{1}{n_\text{eff}} \sum_i
\left(\frac{x_i - \bar X}{s}\right)^3.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext
Uses :attr:`~mcframework.stats_engine.StatsContext.nan_policy`.
Returns
-------
float
Fisher–Pearson standardized third central moment, returning ``0.0`` if
:math:`n_\text{eff} \le 2`.
Notes
-----
Uses :func:`scipy.stats.skew` with ``bias=False``.
Examples
--------
>>> round(skew(np.array([1, 2, 3, 10.0]), {}), 3) > 0
True
"""
ctx = _ensure_ctx(ctx, x)
arr, _ = _clean(x, ctx)
return float(sp_skew(arr, bias=False)) if arr.size > 2 else 0.0 # type: ignore[arg-type]
[docs]
def kurtosis(x: np.ndarray, ctx: StatsContext) -> float:
r"""
Unbiased sample **excess** kurtosis (Fisher definition).
.. math::
\text{kurt}(x) =
\frac{1}{n_\text{eff}} \sum_i
\left(\frac{x_i - \bar X}{s}\right)^4 - 3.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext
Uses :attr:`~mcframework.stats_engine.StatsContext.nan_policy`.
Returns
-------
float
Excess kurtosis (``0.0`` if :math:`n_\text{eff} \le 3`).
Notes
-----
Uses :func:`scipy.stats.kurtosis` with ``fisher=True, bias=False``.
Examples
--------
>>> round(kurtosis(np.array([1, 2, 3, 4.0]), {}), 1)
-1.2
"""
ctx = _ensure_ctx(ctx, x)
arr, _ = _clean(x, ctx)
return float(sp_kurtosis(arr, fisher=True, bias=False)) if arr.size > 3 else 0.0 # type: ignore[arg-type]
[docs]
def ci_mean(x: np.ndarray, ctx) -> dict[str, float | str]:
r"""
Parametric CI for :math:`\mathbb{E}[X]` using z/t critical values.
Let :math:`\bar X` be the sample mean and :math:`SE = s/\sqrt{n_\text{eff}}`.
The interval is
.. math::
\bar X \pm c \cdot SE,
where :math:`c` is selected by :func:`mcframework.utils.autocrit` according
to :attr:`~mcframework.stats_engine.StatsContext.ci_method` and :math:`n_\text{eff}`.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext or Mapping
Configuration supplying at least :attr:`~mcframework.stats_engine.StatsContext.n`. Additional
fields such as :attr:`~mcframework.stats_engine.StatsContext.confidence`,
:attr:`~mcframework.stats_engine.StatsContext.ddof`, and
:attr:`~mcframework.stats_engine.StatsContext.ci_method` refine the interval.
Returns
-------
dict[str, float | str]
Mapping with keys
``confidence``
Requested confidence level.
``method``
Resolved CI method (``"z"``, ``"t"``, or ``"auto"``).
``low`` / ``high``
Lower and upper endpoints.
``se`` / ``crit``
Standard error and critical value when :math:`n_\text{eff} \ge 2`.
"""
ctx = _ensure_ctx(ctx, x)
arr, _ = _clean(x, ctx)
# if everything got dropped or x was empty
if arr.size == 0:
return _CIResult(
confidence=ctx.confidence,
method=ctx.ci_method,
low=float("nan"),
high=float("nan"),
).as_dict()
n_eff = _effective_sample_size(arr, ctx) # counts after cleaning if 'omit'
if n_eff < 2:
return _CIResult(
confidence=ctx.confidence,
method=ctx.ci_method,
low=float("nan"),
high=float("nan"),
extras={"se": float("nan"), "crit": float("nan")},
).as_dict()
mu = float(np.mean(arr))
s = float(np.std(arr, ddof=getattr(ctx, "ddof", 1)))
if s == 0.0:
# degenerate data -> zero SE -> CI collapses to point
se = 0.0
else:
se = s / np.sqrt(n_eff)
crit, method = autocrit(ctx.confidence, n_eff, ctx.ci_method)
return _CIResult(
confidence=ctx.confidence,
method=method,
low=mu - crit * se,
high=mu + crit * se,
extras={"se": se, "crit": crit},
).as_dict()
def _bootstrap_means(
arr: np.ndarray, n_resamples: int, rng: np.random.Generator
) -> np.ndarray:
r"""
Generate bootstrap replicates of the sample mean.
Parameters
----------
arr : ndarray
Cleaned sample values.
n_resamples : int
Number of bootstrap resamples.
rng : numpy.random.Generator
Random generator used to draw resamples.
Returns
-------
ndarray
Array of length ``n_resamples`` containing :math:`\{\bar X_b^*\}`.
"""
n = arr.size
idx = rng.integers(0, n, size=(n_resamples, n), endpoint=False)
return arr[idx].mean(axis=1)
def _bca_bias_correction(arr: np.ndarray, bootstrap_means: np.ndarray) -> float:
r"""
Compute :math:`z_0`, the bias-correction factor for BCa bootstrap.
The bias correction accounts for median bias in the bootstrap distribution.
It is computed as:
.. math::
z_0 = \Phi^{-1}(P(\bar X_b^* < \bar X))
where :math:`\Phi^{-1}` is the inverse standard normal CDF, :math:`\bar X`
is the observed sample mean, and :math:`\bar X_b^*` are the bootstrap means.
Parameters
----------
arr : ndarray
Original sample values.
bootstrap_means : ndarray
Bootstrap replicate means.
Returns
-------
float
Bias correction factor :math:`z_0`.
Notes
-----
The proportion is clipped to :math:`[\epsilon, 1-\epsilon]` where
:math:`\epsilon` = ``_SMALL_PROB_BOUND`` to prevent numerical issues
in the inverse error function.
"""
m_hat = float(np.mean(arr))
prop = float(np.sum(bootstrap_means < m_hat)) / bootstrap_means.size
# Clip to prevent log(0) in erfinv (which is used by inverse normal CDF)
prop = np.clip(prop, _SMALL_PROB_BOUND, 1 - _SMALL_PROB_BOUND)
# Convert proportion to z-score using inverse normal CDF
# erfinv is related to norm.ppf via: norm.ppf(p) = sqrt(2) * erfinv(2*p - 1)
return float(np.sqrt(2) * erfinv(2 * prop - 1))
def _bca_acceleration(arr: np.ndarray) -> float:
r"""
Compute :math:`a`, the acceleration factor for BCa bootstrap via jackknife.
The acceleration factor measures the rate of change of the standard error
of the estimator as a function of the true parameter value. It is computed
using the jackknife (leave-one-out) means:
.. math::
a = \frac{\sum_{i=1}^n d_i^3}{6 \left(\sum_{i=1}^n d_i^2\right)^{3/2}}
where :math:`d_i = \bar X_{(-i)} - \bar{\bar X}_{(\cdot)}` is the deviation
of each jackknife mean from their overall mean.
Parameters
----------
arr : ndarray
Original sample values.
Returns
-------
float
Acceleration factor :math:`a`.
Notes
-----
The formula follows Efron & Tibshirani (1993), "An Introduction to the Bootstrap".
A small constant is added to the denominator to prevent division by zero.
References
----------
.. [1] Efron, B., & Tibshirani, R. J. (1993). An Introduction to the Bootstrap.
Chapman & Hall/CRC.
"""
# Jackknife: compute leave-one-out means efficiently
# If sum(arr) = S and arr[i] = x_i, then mean without x_i = (S - x_i)/(n-1)
s = np.sum(arr, dtype=float)
jack_means = (s - arr) / (arr.size - 1)
# Deviations from jackknife mean
d = jack_means - float(np.mean(jack_means))
# Acceleration formula from Efron & Tibshirani (1993)
# The constant 6.0 comes from the theoretical derivation
numerator = float(np.sum(d**3))
denominator = _BCA_JACKKNIFE_DENOMINATOR * (np.sum(d**2) ** 1.5) + _SMALL_VARIANCE_BOUND
return numerator / denominator
def _compute_bca_interval(arr: np.ndarray, means: np.ndarray, confidence: float) -> tuple[float, float]:
r"""
Compute bias-corrected and accelerated (BCa) bootstrap interval.
The BCa method adjusts the bootstrap percentiles to account for:
1. **Bias correction** (:math:`z_0`): Corrects for median bias in the bootstrap
distribution relative to the observed statistic.
2. **Acceleration** (:math:`a`): Adjusts for non-constant variance of the
estimator as the true parameter varies.
The adjusted percentiles are computed via the transformation:
.. math::
p = \Phi\left(z_0 + \frac{z_0 + z_\alpha}{1 - a(z_0 + z_\alpha)}\right)
where :math:`\Phi` is the standard normal CDF and :math:`z_\alpha` is the
:math:`\alpha`-level quantile.
Parameters
----------
arr : ndarray
Original sample values.
means : ndarray
Bootstrap replicate means.
confidence : float
Confidence level in (0, 1).
Returns
-------
tuple[float, float]
(lower_bound, upper_bound)
See Also
--------
_bca_bias_correction : Computes the bias correction factor :math:`z_0`.
_bca_acceleration : Computes the acceleration factor :math:`a`.
References
----------
.. [1] Efron, B. (1987). "Better Bootstrap Confidence Intervals".
Journal of the American Statistical Association, 82(397), 171-185.
"""
# Compute BCa adjustment factors
z0 = _bca_bias_correction(arr, means)
a = _bca_acceleration(arr)
# Standard normal quantiles for the confidence interval
zlo = float(norm.ppf((1 - confidence) / 2))
zhi = float(norm.ppf(1 - (1 - confidence) / 2))
# BCa-adjusted percentiles using the transformation formula
# Φ(z0 + (z0 + z) / (1 - a(z0 + z))) converted to percentile scale
def adjusted_percentile(z: float) -> float:
"""Apply BCa transformation to convert z-score to adjusted percentile."""
num = z0 + z
den = 1.0 - a * num
return float(norm.cdf(z0 + num / den)) * 100.0
# Compute adjusted percentile bounds and clip to valid range
p_lo = np.clip(adjusted_percentile(zlo), 0, 100)
p_hi = np.clip(adjusted_percentile(zhi), 0, 100)
# Extract the interval from the bootstrap distribution
low, high = np.percentile(means, [p_lo, p_hi])
return float(low), float(high)
[docs]
def ci_mean_bootstrap(x: np.ndarray, ctx: StatsContext) -> dict[str, float | str] | None:
r"""
Bootstrap confidence interval for :math:`\mathbb{E}[X]` via resampling.
Generates ``n_bootstrap`` bootstrap samples by drawing with replacement
from the input data, computes the mean of each sample, and returns
the percentile-based confidence interval from the resulting bootstrap
distribution.
The CI is constructed as
.. math::
\left[\,Q_{\alpha/2}(\bar X^*),\; Q_{1-\alpha/2}(\bar X^*)\,\right],
where :math:`\bar X^*` denotes the bootstrap means and
:math:`\alpha = 1 - \text{confidence}`.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext or Mapping
Configuration that may specify:
- :attr:`~mcframework.stats_engine.StatsContext.confidence`
Confidence level :math:`\in (0, 1)`.
- :attr:`~mcframework.stats_engine.StatsContext.n_bootstrap`
Number of bootstrap resamples (default ``10_000``).
- :attr:`~mcframework.stats_engine.StatsContext.nan_policy`
Whether to omit non-finite values before bootstrapping.
- :attr:`~mcframework.stats_engine.StatsContext.bootstrap`
Flavor (``"percentile"`` or ``"bca"``).
- :attr:`~mcframework.stats_engine.StatsContext.rng`
Seed or generator for reproducibility.
Returns
-------
dict or None
Mapping with keys:
- ``confidence`` : float
The requested confidence level.
- ``method`` : str
``"bootstrap-percentile"`` or ``"bootstrap-bca"``.
- ``low`` : float
Lower bound of the CI.
- ``high`` : float
Upper bound of the CI.
Returns ``None`` if the sample is empty after cleaning.
See Also
--------
ci_mean : Parametric CI using z/t critical values.
ci_mean_chebyshev : Distribution-free CI via Chebyshev's inequality.
Notes
-----
The bootstrap percentile method is distribution-free and asymptotically
valid under mild regularity conditions. For small samples, it may have
lower coverage than the nominal confidence level.
Examples
--------
>>> x = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> result = ci_mean_bootstrap(x, {"confidence": 0.9, "n_bootstrap": 5000, "rng": 42})
>>> result["method"]
'bootstrap-percentile'
>>> 1.5 < result["low"] < result["high"] < 4.5
True
"""
ctx = _ensure_ctx(ctx, x)
arr, _ = _clean(x, ctx)
if arr.size == 0:
return None
n_resamples = int(ctx.n_bootstrap)
g = ctx.get_generators()
means = _bootstrap_means(arr, n_resamples, g)
loq, hiq = ctx.q_bound()
method = ctx.bootstrap
# Use percentile method for small samples or when explicitly requested
if method == "percentile" or arr.size < 3:
low, high = np.percentile(means, [loq, hiq])
return _CIResult(
confidence=ctx.confidence,
method="bootstrap-percentile",
low=low,
high=high,
).as_dict()
# BCa method
low, high = _compute_bca_interval(arr, means, ctx.confidence)
return _CIResult(
confidence=ctx.confidence,
method="bootstrap-bca",
low=low,
high=high,
).as_dict()
[docs]
def ci_mean_chebyshev(x: np.ndarray, ctx: StatsContext) -> dict[str, float | str] | None:
r"""
Distribution-free CI for :math:`\mathbb{E}[X]` via Chebyshev's inequality.
For :math:`\delta = 1 - \text{confidence}`, choose :math:`z=1/\sqrt{\delta}`
so that
.. math::
\Pr\!\left(\,|\bar X - \mu| \ge z\,SE\,\right) \le \delta,
\qquad SE = \frac{s}{\sqrt{n_\text{eff}}}.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext or Mapping
Requires :attr:`~mcframework.stats_engine.StatsContext.n` and may specify
:attr:`~mcframework.stats_engine.StatsContext.confidence`.
Returns
-------
dict[str, float | str] or None
Mapping with keys ``confidence``, ``method``, ``low``, and ``high``,
or ``None`` if :math:`n_\text{eff} < 2`.
"""
ctx = _ensure_ctx(ctx, x)
n_eff = _effective_sample_size(x, ctx)
if n_eff < 2:
return None
mu = mean(x, ctx)
s = std(x, ctx)
# Handle case where mean or std returns None
if mu is None or s is None:
return None
k = 1.0 / np.sqrt(max(_SMALL_VARIANCE_BOUND, 1.0 - ctx.confidence)) # 1/sqrt(alpha)
half = k * s / np.sqrt(n_eff)
return _CIResult(
confidence=ctx.confidence,
method="chebyshev",
low=mu - half,
high=mu + half,
).as_dict()
[docs]
def chebyshev_required_n(x: np.ndarray, ctx: StatsContext) -> int:
r"""
Required :math:`n` to achieve Chebyshev CI half-width :math:`\le \varepsilon`.
With :math:`\delta = 1 - \text{confidence}`, the half-width is
:math:`z\,SE = \dfrac{s}{\sqrt{n_\text{eff}\,\delta}}` where :math:`z=1/\sqrt{\delta}`.
Solve :math:`n_\text{eff} \ge \dfrac{s^2}{\varepsilon^2\,\delta}`.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext or Mapping
Must supply :attr:`~mcframework.stats_engine.StatsContext.eps` (target half-width) and may
override :attr:`~mcframework.stats_engine.StatsContext.confidence`.
Returns
-------
int
Minimum integer :math:`n_\text{eff}`.
Examples
--------
>>> chebyshev_required_n(np.array([1., 2., 3.]), {"eps": 0.5, "confidence": 0.9})
41
"""
ctx = _ensure_ctx(ctx, x)
if ctx.eps is None:
raise MissingContextError("chebyshev_required_n requires ctx.eps")
if ctx.eps <= 0:
raise ValueError("ctx.eps must be positive")
s = std(x, ctx)
k = 1.0 / np.sqrt(max(_SMALL_VARIANCE_BOUND, 1.0 - ctx.confidence))
return int(np.ceil(((k * s) / float(ctx.eps)) ** 2))
[docs]
def markov_error_prob(x: np.ndarray, ctx: StatsContext) -> float:
r"""
Markov bound on error probability for target :math:`\theta`.
Using the squared error of the sample mean,
:math:`\mathrm{MSE}(\bar X) \approx \frac{s^2}{n} + \text{Bias}^2`, Markov gives
.. math::
\Pr\!\left(\,|\bar X - \theta| \ge \varepsilon\,\right)
\;\le\; \frac{\mathrm{MSE}}{\varepsilon^2}.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext or Mapping
Requires :attr:`~mcframework.stats_engine.StatsContext.target` and
:attr:`~mcframework.stats_engine.StatsContext.eps`. The declared
:attr:`~mcframework.stats_engine.StatsContext.n` influences the context but does not enter
directly into the bound.
Returns
-------
float
Upper bound in :math:`[0, 1]`.
"""
ctx = _ensure_ctx(ctx, x)
if ctx.target is None:
raise MissingContextError("markov_error_prob requires ctx.target")
if ctx.eps is None:
raise MissingContextError("markov_error_prob requires ctx.eps")
if ctx.eps <= 0:
raise ValueError("ctx.eps must be positive")
arr, _ = _clean(x, ctx)
mse = float(np.mean((arr - ctx.target) ** 2))
return mse / (ctx.eps**2)
[docs]
def bias_to_target(x: np.ndarray, ctx: StatsContext) -> float:
r"""
Bias of the sample mean relative to a target :math:`\theta`.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext or Mapping
Requires :attr:`~mcframework.stats_engine.StatsContext.target`.
Returns
-------
float
Estimated bias :math:`\bar X - \theta`.
"""
ctx = _ensure_ctx(ctx, x)
if ctx.target is None:
raise MissingContextError("bias_to_target requires ctx.target")
sample_mean = mean(x, ctx)
if sample_mean is None:
raise InsufficientDataError("bias_to_target requires non-empty data after cleaning")
return float(sample_mean - ctx.target)
[docs]
def mse_to_target(x: np.ndarray, ctx: StatsContext) -> float:
r"""
Mean squared error of :math:`\bar X` relative to a target :math:`\theta`.
Approximated by
.. math::
\mathrm{MSE}(\bar X) \approx \frac{s^2}{n} + (\bar X - \theta)^2.
Parameters
----------
x : ndarray
Input sample.
ctx : StatsContext or Mapping
Requires :attr:`~mcframework.stats_engine.StatsContext.target`.
Returns
-------
float
Estimated mean squared error relative to ``target``.
"""
ctx = _ensure_ctx(ctx, x)
if ctx.target is None:
raise MissingContextError("mse_to_target requires ctx.target")
arr, _ = _clean(x, ctx)
return float(np.mean((arr - ctx.target) ** 2))
[docs]
def build_default_engine(
include_dist_free: bool = True,
include_target_bounds: bool = True,
) -> StatsEngine:
r"""
Construct a :class:`~mcframework.stats_engine.StatsEngine` with a practical set of metrics.
Parameters
----------
include_dist_free : bool, default True
Include Chebyshev-based CI and sizing.
include_target_bounds : bool, default True
Include :func:`bias_to_target`, :func:`mse_to_target`, :func:`markov_error_prob`.
Returns
-------
StatsEngine
"""
metrics: list[Metric] = [
FnMetric("mean", mean, "Sample mean"),
FnMetric("std", std, "Sample standard deviation"),
FnMetric("percentiles", percentiles, "Percentiles over the sample"),
FnMetric("skew", skew, "Fisher skewness (unbiased)"),
FnMetric("kurtosis", kurtosis, "Excess kurtosis (unbiased)"),
FnMetric("ci_mean", ci_mean, "z/t CI for the mean"),
FnMetric("ci_mean_bootstrap", ci_mean_bootstrap, "Bootstrap CI for the mean"),
]
if include_dist_free:
metrics.extend(
[
FnMetric(
"ci_mean_chebyshev", ci_mean_chebyshev, "Chebyshev bound CI for the mean"
),
FnMetric(
"chebyshev_required_n", chebyshev_required_n, "Required n under Chebyshev to reach eps"
),
]
)
if include_target_bounds:
metrics.extend(
[
FnMetric("markov_error_prob", markov_error_prob, "Markov bound P(|X-target|>=eps)"),
FnMetric("bias_to_target", bias_to_target, "Bias relative to target"),
FnMetric("mse_to_target", mse_to_target, "Mean squared error to target"),
]
)
return StatsEngine(metrics)
# Build a default engine at import time
DEFAULT_ENGINE = build_default_engine(
include_dist_free=True,
include_target_bounds=True,
)
__all__ = [
"CIMethod",
"NanPolicy",
"BootstrapMethod",
"StatsContext",
"ComputeResult",
"Metric",
"MetricSet",
"FnMetric",
"StatsEngine",
"MissingContextError",
"InsufficientDataError",
"mean",
"std",
"percentiles",
"skew",
"kurtosis",
"ci_mean",
"ci_mean_bootstrap",
"ci_mean_chebyshev",
"chebyshev_required_n",
"markov_error_prob",
"bias_to_target",
"mse_to_target",
"build_default_engine",
"DEFAULT_ENGINE",
]