Source code for MiLoMerge.metrics.ROC_curves

import numpy as np
import numba as nb
from collections.abc import Iterable


[docs] @nb.njit("(Array(float64, 1, 'A'), Array(float64, 1, 'A'))", fastmath=True, cache=True) def ROC_curve(sample1: Iterable[float], sample2: Iterable[float]): """A function to calculate the classical ROC curve given 2 distributions Parameters ---------- sample1 : Iterable[float] The "signal" sample. Must be a 1-d array. sample2 : Iterable[float] The "background" sample. Must be the same size as sample1 Returns ------- tuple[Iterable[float], Iterable[float], float] Returns 2 arrays with the same size as sample1 indicating the True Positive Rate (TPR) and False Positive Rate (FPR) per-bin, as well as the Area Under the Curve (AUC) """ ratios = np.argsort(sample1 / sample2) PAC = np.zeros(len(sample1) + 1, dtype=np.float64) NAC = np.zeros(len(sample1) + 1, dtype=np.float64) PAC[1:] = np.cumsum(sample1[ratios]) NAC[1:] = np.cumsum(sample2[ratios]) TPR = PAC / (sample1.sum()) # vectorized calculation FPR = NAC / (sample2.sum()) return TPR, FPR, np.trapz(FPR, TPR)
[docs] @nb.njit("(Array(float64, 1, 'A'), Array(float64, 1, 'A'))", fastmath=True, cache=False) def LOC_curve(sample1: Iterable[float], sample2: Iterable[float]): """A function to calculate the LOC curve described in (ARXIV LINK) given 2 distributions. Parameters ---------- sample1 : Iterable[float] The "signal" sample. Must be a 1-d array. sample2 : Iterable[float] The "background" sample. Must be the same size as sample1 Returns ------- tuple[Iterable[float], Iterable[float], float] Returns 2 arrays with the same size as sample1 indicating the True Positive Rate (TPR) and False Positive Rate (FPR) per-bin, as well as the Length of the Curve (LoC). Raises ------ ValueError If both samples are not wholly positive, raise an error. At least one sample must be completely positive. """ if np.any(sample2 < 0): if np.any(sample1 < 0): raise ValueError("Needs 1 positive sample!") negative_counts = sample2 positive_counts = sample1 else: negative_counts = sample1 positive_counts = sample2 ratios = np.argsort(negative_counts / positive_counts) TPR = np.zeros(len(negative_counts) + 1, dtype=np.float64) FPR = np.zeros(len(negative_counts) + 1, dtype=np.float64) TPR[1:] = np.cumsum(negative_counts[ratios]) FPR[1:] = np.cumsum(positive_counts[ratios]) length = np.sqrt( np.diff(TPR) ** 2 + np.diff(FPR) ** 2 ).sum() # vectorized distance formula return TPR, FPR, length