Source code for djura.record_selection.metrics
# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (C) 2025-2026 Djura | Risk - Data - Engineering S.r.l.
import warnings
import numpy as np
from scipy import integrate
from scipy.stats import lognorm
_trapezoid = np.trapezoid if np.lib.NumpyVersion(np.__version__) >= "2.0.0" \
else np.trapz
[docs]
def hellinger_distance(mu1, sigma1, mu2, sigma2, method="quadrature"):
"""
Compute Hellinger distance between two probability distributions
Parameters
----------
mu1, sigma1 : float
Parameters of first probability distribution
(location and scale of underlying normal)
mu2, sigma2 : float
Parameters of second probability distribution
method : str
'quadrature' for numerical interation or
'sampling' for discrete approximation
'closed-form' for a closed form computation using
medians and dispersions
by default, 'quadrature'
Returns
----------
float : Hellinger distance (between 0 and 1)
"""
messages = {"warnings": [], "errors": []}
h = np.nan
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
try:
if method.lower() == "quadrature":
def integrand(x):
f1 = lognorm.pdf(x, s=sigma1, scale=np.exp(mu1))
f2 = lognorm.pdf(x, s=sigma2, scale=np.exp(mu2))
return (np.sqrt(f1) - np.sqrt(f2)) ** 2
result, _ = integrate.quad(integrand, 0, np.inf)
h = np.sqrt(0.5 * result)
elif method.lower() == "sampling":
x_max = max(np.exp(mu1 + 5 * sigma1), np.exp(mu2 + 5 * sigma2))
x = np.logspace(-6, np.log10(x_max), 10000)
f1 = lognorm.pdf(x, s=sigma1, scale=np.exp(mu1))
f2 = lognorm.pdf(x, s=sigma2, scale=np.exp(mu2))
integrand = (np.sqrt(f1) - np.sqrt(f2)) ** 2
result = _trapezoid(integrand, x)
h = np.sqrt(0.5 * result)
elif method.lower() == "closed-form":
h = np.sqrt(1 - np.sqrt(2 * sigma1
* sigma2 / (sigma1**2 + sigma2**2))
* np.exp(-((mu1 - mu2)**2
/ (4 * (sigma1**2 + sigma2**2)))))
else:
raise ValueError(
"Wrong method, must be 'quadrature' or 'sampling'"
" or 'closed-form'")
except Exception as e:
messages["errors"].append({
"type": type(e).__name__,
"message": str(e)
})
for warn in w:
messages["warnings"].append({
"message": str(warn.message),
"category": warn.category.__name__,
"filename": warn.filename,
"lineno": warn.lineno
})
return h, messages