import random
import numpy as np
from joblib import Parallel, delayed
from pyriemann.utils.distance import distance
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.utils.extmath import softmax
from ...optimization.distance import qdistance_logeuclid_to_convex_hull
from ...optimization.docplex import ClassicalOptimizer
[docs]
class NearestConvexHull(ClassifierMixin, TransformerMixin, BaseEstimator):
"""Classification by Nearest Convex Hull (NCH).
In Nearest Convex Hull (NCH) classifier [1]_, each class is modelized by
the convex hull generated by the matrices corresponding to this class.
There is no training. Calculating a distance to a hull is an optimization
problem and it is calculated for each testing SPD matrix and each hull.
The minimal distance defines the predicted class.
Current implementation is available only for log-Euclidean distance.
It is compatible with constraint programming optimization [2]_.
Notes
-----
.. versionadded:: 0.2.0
.. versionchanged:: 0.4.2
Added optimizer parameter
.. versionchanged:: 0.6.0
Moved to algorithms sub-package
Parameters
----------
n_jobs : int, default=6
Number of jobs to use for the computation. This works by computing
each of the hulls in parallel.
n_hulls_per_class : int, default=3
Number of hulls used per class, when subsampling is "random".
n_samples_per_hull : int, default=15
Number of matrices used to build a hull.
-1 will include all matrices per class.
If subsampling is "full", this parameter is defaulted to -1.
subsampling : {"min", "random", "full"}, default="min"
Strategy to subsample the training set to estimate the hull,
when computing the distance to hulls:
- "full" computes the hull on the entire training matrices, as in [1]_;
- "min" estimates hull using the n_samples_per_hull closest matrices;
- "random" estimates hull using n_samples_per_hull random matrices.
seed : float, default=None
Optional random seed to use when subsampling is set to "random".
optimizer : pyQiskitOptimizer, default=ClassicalOptimizer()
An instance of :class:`pyriemann_qiskit.optimization.docplex.pyQiskitOptimizer`.
References
----------
.. [1] `Convex Class Model on Symmetric Positive Definite Manifolds
<https://arxiv.org/pdf/1806.05343>`_
K. Zhao, A. Wiliem, S. Chen, and B. C. Lovell,
Image and Vision Computing, 2019.
.. [2] `Translating the Nearest Convex Hull Classifier from Classical to
Quantum Computing <https://www.mdpi.com/2624-960X/7/4/51>`_
G. Cattan, A. Andreev, and Q. Barthélemy, 2025.
"""
[docs]
def __init__(
self,
n_jobs=6,
n_hulls_per_class=3,
n_samples_per_hull=10,
subsampling="min",
seed=None,
optimizer=None,
):
self.n_jobs = n_jobs
self.n_samples_per_hull = n_samples_per_hull
self.n_hulls_per_class = n_hulls_per_class
self.matrices_per_class_ = {}
self.subsampling = subsampling
self.seed = seed
self.optimizer = optimizer if optimizer is not None else ClassicalOptimizer()
if subsampling not in ["min", "random", "full"]:
raise ValueError(f"Unknown subsampling type {subsampling}.")
if subsampling == "full":
# From code perspective, "full" strategy is the same as min strategy
# without sorting
self.n_samples_per_hull = -1
def fit(self, X, y):
"""Fit (store the training data).
Parameters
----------
X : ndarray, shape (n_matrices, n_channels, n_channels)
Set of SPD matrices.
y : ndarray, shape (n_matrices,)
Labels for each matrix.
Returns
-------
self : NearestConvexHull instance
The NearestConvexHull instance.
"""
self.random_generator = random.Random(self.seed)
self.classes_ = np.unique(y)
for c in self.classes_:
self.matrices_per_class_[c] = X[y == c]
def _process_sample_min_hull(self, x):
"""Finds the closest matrices and uses them to build a single hull per class"""
dists = []
for c in self.classes_:
dist = distance(self.matrices_per_class_[c], x, metric="logeuclid")[:, 0]
# take the closest matrices
indexes = np.argsort(dist)[0 : self.n_samples_per_hull]
d = qdistance_logeuclid_to_convex_hull(
self.matrices_per_class_[c][indexes], x, self.optimizer
)
dists.append(d)
return dists
def _process_sample_random_hull(self, x):
"""Uses random matrices to build a hull, can be several hulls per class"""
dists = []
for c in self.classes_:
dist_total = 0
# using multiple hulls
for i in range(0, self.n_hulls_per_class):
if self.n_samples_per_hull == -1: # use all data per class
hull_data = self.matrices_per_class_[c]
else: # use a subset of the data per class
random_samples = self.random_generator.sample(
range(self.matrices_per_class_[c].shape[0]),
k=self.n_samples_per_hull,
)
hull_data = self.matrices_per_class_[c][random_samples]
dist = qdistance_logeuclid_to_convex_hull(hull_data, x, self.optimizer)
dist_total = dist_total + dist
dists.append(dist_total)
return dists
def _predict_distances(self, X):
"""Helper to predict the distance. Equivalent to transform."""
dists = []
if self.subsampling == "min" or self.subsampling == "full":
self._process_sample = self._process_sample_min_hull
elif self.subsampling == "random":
self._process_sample = self._process_sample_random_hull
else:
raise ValueError(f"Unknown subsampling type {self.subsampling}.")
parallel = self.n_jobs > 1
if parallel:
def job(x):
return self._process_sample(x)
dists = Parallel(n_jobs=self.n_jobs)(delayed(job)(x) for x in X)
else:
for x in X:
dist = self._process_sample(x)
dists.append(dist)
dists = np.asarray(dists)
return dists
def predict_proba(self, X):
"""Predict proba using softmax of negative squared distances.
Parameters
----------
X : ndarray, shape (n_matrices, n_channels, n_channels)
Set of SPD/HPD matrices.
Returns
-------
prob : ndarray, shape (n_matrices, n_classes)
Probabilities for each class.
"""
return softmax(-self._predict_distances(X) ** 2)
def predict(self, X):
"""Get the predictions.
Parameters
----------
X : ndarray, shape (n_matrices, n_channels, n_channels)
Set of SPD matrices.
Returns
-------
pred : ndarray of int, shape (n_matrices,)
Predictions for each matrix according to the closest convex hull.
"""
dist = self._predict_distances(X)
predictions = self.classes_[dist.argmin(axis=1)]
return predictions
def transform(self, X):
"""Get the distance to each convex hull.
Parameters
----------
X : ndarray, shape (n_matrices, n_channels, n_channels)
Set of SPD matrices.
Returns
-------
dist : ndarray, shape (n_matrices, n_classes)
The distance to each convex hull.
"""
return self._predict_distances(X)