Source code for pyriemann_qiskit.optimization.anderson_optimizer

"""Anderson Acceleration Optimizer for Variational Quantum Circuits.

This module implements mesh-free Anderson acceleration for optimizing
variational quantum circuit parameters on Riemannian manifolds (Bloch sphere).

The parameters are rotation angles for quantum gates, which live on a
Riemannian manifold with periodic boundary conditions. The optimizer:

- Computes gradients in the tangent space
- Handles periodic wraparound for angle parameters
- Uses Riemannian retraction to project back to the manifold

The algorithm follows equations 41-44 from the mesh-free IGL formulation:

- ΔW = [w_k - w_{k-1}, ..., w_{k-m+1} - w_{k-m}]  (Eq. 41)
- ΔR = [r_k - r_{k-1}, ..., r_{k-m+1} - r_{k-m}]  (Eq. 42)
- γ = argmin_γ ||r_k - ΔR·γ||² + λ||γ||²         (Eq. 43)
- w_{k+1} = w_k + α·r_k - (ΔW + α·ΔR)·γ          (Eq. 44)

where differences are computed in the tangent space and r_k is the
Riemannian gradient-based residual.

Notes
-----
.. versionchanged:: 0.6.0
    Moved from ``pyriemann_qiskit.utils.anderson_optimizer`` to
    ``pyriemann_qiskit.optimization.anderson_optimizer``.

References
----------
.. [1] Toth, A., & Kelley, C. T. (2015). Convergence analysis for Anderson
       acceleration. SIAM Journal on Numerical Analysis, 53(2), 805-819.
.. [2] Walker, H. F., & Ni, P. (2011). Anderson acceleration for fixed-point
       iterations. SIAM Journal on Numerical Analysis, 49(4), 1715-1735.
"""

from collections import deque

import numpy as np
from qiskit_algorithms.optimizers import (
    Optimizer,
    OptimizerResult,
    OptimizerSupportLevel,
)
from scipy.linalg import lstsq


[docs] class AndersonAccelerationOptimizer(Optimizer): """Anderson acceleration optimizer for variational quantum circuits. Anderson acceleration is a derivative-free fixed-point iteration method that achieves superlinear convergence for compact operators. Gradients are approximated via central finite differences, so no analytical gradient is required. It is particularly well-suited for quantum circuit optimization where: - Analytical gradients are expensive or unavailable - The objective function is bounded (compact operator) - The parameter space may have Riemannian structure The algorithm maintains a history of parameter vectors and residuals, then solves a small least-squares problem to extrapolate the next iterate. This typically converges in 15-25 iterations compared to 100+ for gradient-based methods. Parameters ---------- maxiter : int, default=100 Maximum number of iterations. m : int, default=5 History depth (number of previous iterates to use). Typical values: 5-10. Larger values may improve convergence but increase memory and computation. alpha : float, default=1.0 Damping parameter. alpha=1.0 is undamped (recommended for quantum circuits). Values < 1.0 add damping for stability. lambda_reg : float, default=1e-8 Regularization parameter for the least-squares problem. Prevents ill-conditioning when residual vectors become nearly collinear. tol : float, default=1e-6 Convergence tolerance on the residual norm. fd_epsilon : float, default=1e-5 Finite-difference step size for gradient approximation. Should be small (~1e-5 to 1e-7) for accurate numerical gradients. learning_rate : float, default=0.1 Step size for the fixed-point map ``g(x) = x - lr·∇f(x)``. Controls how far the iterates move each step. Values in 0.05–0.5 are typical; too small causes slow convergence, too large causes oscillation or bounds overshoot. Attributes ---------- trajectory_ : list of ndarray Optimization trajectory (parameter history). loss_history_ : list of float Loss function values at each iteration. Examples -------- >>> from pyriemann_qiskit.optimization.anderson_optimizer import ( ... AndersonAccelerationOptimizer ... ) >>> optimizer = AndersonAccelerationOptimizer(maxiter=25, m=5) >>> # Use with QuanticNCH or other quantum classifiers >>> # qaoa_optimizer=optimizer """
[docs] def __init__( self, maxiter=100, m=5, alpha=1.0, lambda_reg=1e-8, tol=1e-6, fd_epsilon=1e-5, learning_rate=0.1, ): super().__init__() self._maxiter = maxiter self._m = m self._alpha = alpha self._lambda_reg = lambda_reg self._tol = tol self._fd_epsilon = fd_epsilon self._lr = learning_rate self.trajectory_ = [] self.loss_history_ = []
def minimize(self, fun, x0, jac=None, bounds=None): """Minimize the objective function using Anderson acceleration. Parameters ---------- fun : callable Objective function to minimize. Should accept a 1D array and return a scalar. x0 : ndarray Initial parameter vector. jac : callable, optional Gradient function (not used, included for compatibility). bounds : list of tuples, optional Parameter bounds (not enforced in current implementation). Returns ------- OptimizerResult Result object containing: - x: optimal parameters - fun: objective value at optimal parameters - nfev: number of function evaluations - nit: number of iterations """ # Initialize x = np.array(x0, dtype=float) n = len(x) nfev = 0 # History storage — deque auto-evicts oldest entry when full (O(1) FIFO) W_history = deque(maxlen=self._m) # Parameter history R_history = deque(maxlen=self._m) # Residual history # Evaluate initial point f_current = fun(x) nfev += 1 # Initialize trajectory tracking self.trajectory_ = [x.copy()] self.loss_history_ = [f_current] # Precompute vectorised bounds arrays once (avoids repeated per-element work) if bounds is not None: lower_b = np.array([b[0] if b[0] is not None else np.nan for b in bounds]) upper_b = np.array([b[1] if b[1] is not None else np.nan for b in bounds]) # Only treat as periodic if the span is a full 2π rotation period. # Bounds like [0, π] (QIOCE) must be clipped, not wrapped — wrapping # with period π teleports parameters to the wrong end of the range. span = upper_b - lower_b periodic = ~(np.isnan(lower_b) | np.isnan(upper_b)) & np.isclose( span, 2 * np.pi ) period_b = np.where(periodic, span, 1.0) # dummy for non-periodic has_lower = ~np.isnan(lower_b) & ~periodic has_upper = ~np.isnan(upper_b) & ~periodic # For wraparound correction: period column-vector broadcasts # over m_k columns period_col = period_b[:, np.newaxis] iteration = 0 for iteration in range(self._maxiter): # Anderson acceleration for Riemannian manifold (Bloch sphere) # Parameters are angles, so we need to respect the manifold structure # Estimate gradient using central differences. # Modify x[i] in-place and restore — avoids 2n array allocations per iter. # Clamp perturbations to stay within bounds so loss is never evaluated # at an illegal parameter value. grad_approx = np.zeros(n) for i in range(n): orig = x[i] step = self._fd_epsilon x_plus = orig + step x_minus = orig - step if bounds is not None: lo = bounds[i][0] hi = bounds[i][1] if lo is not None: x_minus = max(x_minus, lo) if hi is not None: x_plus = min(x_plus, hi) actual_step = (x_plus - x_minus) / 2.0 x[i] = x_plus f_plus = fun(x) nfev += 1 x[i] = x_minus f_minus = fun(x) nfev += 1 x[i] = orig if actual_step != 0.0: grad_approx[i] = (f_plus - f_minus) / (2.0 * actual_step) # Compute residual: r_k = g(x_k) - x_k = -lr * grad_approx # lr controls step size independently from fd_epsilon. r = -self._lr * grad_approx # Check convergence r_norm = np.linalg.norm(r) if r_norm < self._tol: break # Store current iterate and residual # (r is freshly allocated — no .copy() needed) W_history.append(x.copy()) R_history.append(r) # Anderson acceleration step if len(W_history) > 1: # Build difference matrices (Equations 41-42) via vectorised np.diff W_arr = np.array(W_history) # shape (m_k+1, n) R_arr = np.array(R_history) Delta_W = np.diff(W_arr, axis=0).T # shape (n, m_k) Delta_R = np.diff(R_arr, axis=0).T # For angles, handle wraparound in tangent-space differences if bounds is not None: for dmat in (Delta_W, Delta_R): mask = np.abs(dmat) > period_col / 2 dmat -= np.where(mask, np.sign(dmat) * period_col, 0.0) m_k = Delta_W.shape[1] # Solve least-squares problem (Equation 43) # min_gamma ||r_k - Delta_R * gamma||^2 + lambda * ||gamma||^2 A = Delta_R.T @ Delta_R + self._lambda_reg * np.eye(m_k) b = Delta_R.T @ r try: gamma = np.linalg.solve(A, b) except np.linalg.LinAlgError: # Fallback to lstsq if solve fails gamma, _, _, _ = lstsq(A, b) # Update step (Equation 44) # w_{k+1} = w_k + alpha * r_k - (Delta_W + alpha * Delta_R) * gamma x_new = x + self._alpha * r - (Delta_W + self._alpha * Delta_R) @ gamma else: # First iteration or no history: simple fixed-point step # w_{k+1} = w_k + alpha * r_k x_new = x + self._alpha * r # Project back to manifold (Riemannian retraction) — vectorised if bounds is not None: x_new = np.where( periodic, lower_b + np.mod(x_new - lower_b, period_b), x_new, ) x_new = np.where(has_lower, np.maximum(x_new, lower_b), x_new) x_new = np.where(has_upper, np.minimum(x_new, upper_b), x_new) # Update x = x_new f_current = fun(x) nfev += 1 # Append to trajectory (fix: was overwriting instead of appending) self.trajectory_.append(x.copy()) self.loss_history_.append(f_current) # Return result result = OptimizerResult() result.x = x result.fun = f_current result.nfev = nfev result.nit = iteration + 1 if iteration >= 0 else 0 return result @property def settings(self): """Return optimizer settings.""" return { "maxiter": self._maxiter, "m": self._m, "alpha": self._alpha, "lambda_reg": self._lambda_reg, "tol": self._tol, "fd_epsilon": self._fd_epsilon, "learning_rate": self._lr, } def get_support_level(self): """Return support level dictionary.""" return { "gradient": OptimizerSupportLevel.ignored, "bounds": OptimizerSupportLevel.supported, "initial_point": OptimizerSupportLevel.required, }