Source code for pyriemann_qiskit.optimization.anderson_optimizer

"""Anderson Acceleration Optimizer for Variational Quantum Circuits.

This module implements mesh-free Anderson acceleration for optimizing
variational quantum circuit parameters on Riemannian manifolds (Bloch sphere).

The parameters are rotation angles for quantum gates, which live on a
Riemannian manifold with periodic boundary conditions. The optimizer:

- Computes gradients in the tangent space
- Handles periodic wraparound for angle parameters
- Uses Riemannian retraction to project back to the manifold

The algorithm follows equations 41-44 from the mesh-free IGL formulation:

- ΔW = [w_k - w_{k-1}, ..., w_{k-m+1} - w_{k-m}]  (Eq. 41)
- ΔR = [r_k - r_{k-1}, ..., r_{k-m+1} - r_{k-m}]  (Eq. 42)
- γ = argmin_γ ||r_k - ΔR·γ||² + λ||γ||²         (Eq. 43)
- w_{k+1} = w_k + α·r_k - (ΔW + α·ΔR)·γ          (Eq. 44)

where differences are computed in the tangent space and r_k is the
Riemannian gradient-based residual.

Notes
-----
.. versionchanged:: 0.6.0
    Moved from ``pyriemann_qiskit.utils.anderson_optimizer`` to
    ``pyriemann_qiskit.optimization.anderson_optimizer``.

References
----------
.. [1] Toth, A., & Kelley, C. T. (2015). Convergence analysis for Anderson
       acceleration. SIAM Journal on Numerical Analysis, 53(2), 805-819.
.. [2] Walker, H. F., & Ni, P. (2011). Anderson acceleration for fixed-point
       iterations. SIAM Journal on Numerical Analysis, 49(4), 1715-1735.
"""

from collections import deque

import numpy as np
from qiskit_algorithms.optimizers import (
    Optimizer,
    OptimizerResult,
    OptimizerSupportLevel,
)
from scipy.linalg import lstsq



[docs]
class AndersonAccelerationOptimizer(Optimizer):
    """Anderson acceleration optimizer for variational quantum circuits.

    Anderson acceleration is a derivative-free fixed-point iteration method
    that achieves superlinear convergence for compact operators. Gradients are
    approximated via central finite differences, so no analytical gradient is
    required. It is particularly well-suited for quantum circuit optimization where:

    - Analytical gradients are expensive or unavailable
    - The objective function is bounded (compact operator)
    - The parameter space may have Riemannian structure

    The algorithm maintains a history of parameter vectors and residuals,
    then solves a small least-squares problem to extrapolate the next
    iterate. This typically converges in 15-25 iterations compared to
    100+ for gradient-based methods.

    Parameters
    ----------
    maxiter : int, default=100
        Maximum number of iterations.
    m : int, default=5
        History depth (number of previous iterates to use).
        Typical values: 5-10. Larger values may improve convergence
        but increase memory and computation.
    alpha : float, default=1.0
        Damping parameter. alpha=1.0 is undamped (recommended for
        quantum circuits). Values < 1.0 add damping for stability.
    lambda_reg : float, default=1e-8
        Regularization parameter for the least-squares problem.
        Prevents ill-conditioning when residual vectors become
        nearly collinear.
    tol : float, default=1e-6
        Convergence tolerance on the residual norm.
    fd_epsilon : float, default=1e-5
        Finite-difference step size for gradient approximation.
        Should be small (~1e-5 to 1e-7) for accurate numerical gradients.
    learning_rate : float, default=0.1
        Step size for the fixed-point map ``g(x) = x - lr·∇f(x)``.
        Controls how far the iterates move each step. Values in 0.05–0.5
        are typical; too small causes slow convergence, too large causes
        oscillation or bounds overshoot.

    Attributes
    ----------
    trajectory_ : list of ndarray
        Optimization trajectory (parameter history).
    loss_history_ : list of float
        Loss function values at each iteration.

    Examples
    --------
    >>> from pyriemann_qiskit.optimization.anderson_optimizer import (
    ...     AndersonAccelerationOptimizer
    ... )
    >>> optimizer = AndersonAccelerationOptimizer(maxiter=25, m=5)
    >>> # Use with QuanticNCH or other quantum classifiers
    >>> # qaoa_optimizer=optimizer
    """


[docs]
    def __init__(
        self,
        maxiter=100,
        m=5,
        alpha=1.0,
        lambda_reg=1e-8,
        tol=1e-6,
        fd_epsilon=1e-5,
        learning_rate=0.1,
    ):
        super().__init__()
        self._maxiter = maxiter
        self._m = m
        self._alpha = alpha
        self._lambda_reg = lambda_reg
        self._tol = tol
        self._fd_epsilon = fd_epsilon
        self._lr = learning_rate
        self.trajectory_ = []
        self.loss_history_ = []


    def minimize(self, fun, x0, jac=None, bounds=None):
        """Minimize the objective function using Anderson acceleration.

        Parameters
        ----------
        fun : callable
            Objective function to minimize. Should accept a 1D array
            and return a scalar.
        x0 : ndarray
            Initial parameter vector.
        jac : callable, optional
            Gradient function (not used, included for compatibility).
        bounds : list of tuples, optional
            Parameter bounds (not enforced in current implementation).

        Returns
        -------
        OptimizerResult
            Result object containing:

            - x: optimal parameters
            - fun: objective value at optimal parameters
            - nfev: number of function evaluations
            - nit: number of iterations
        """
        # Initialize
        x = np.array(x0, dtype=float)
        n = len(x)
        nfev = 0

        # History storage — deque auto-evicts oldest entry when full (O(1) FIFO)
        W_history = deque(maxlen=self._m)  # Parameter history
        R_history = deque(maxlen=self._m)  # Residual history

        # Evaluate initial point
        f_current = fun(x)
        nfev += 1

        # Initialize trajectory tracking
        self.trajectory_ = [x.copy()]
        self.loss_history_ = [f_current]

        # Precompute vectorised bounds arrays once (avoids repeated per-element work)
        if bounds is not None:
            lower_b = np.array([b[0] if b[0] is not None else np.nan for b in bounds])
            upper_b = np.array([b[1] if b[1] is not None else np.nan for b in bounds])
            # Only treat as periodic if the span is a full 2π rotation period.
            # Bounds like [0, π] (QIOCE) must be clipped, not wrapped — wrapping
            # with period π teleports parameters to the wrong end of the range.
            span = upper_b - lower_b
            periodic = ~(np.isnan(lower_b) | np.isnan(upper_b)) & np.isclose(
                span, 2 * np.pi
            )
            period_b = np.where(periodic, span, 1.0)  # dummy for non-periodic
            has_lower = ~np.isnan(lower_b) & ~periodic
            has_upper = ~np.isnan(upper_b) & ~periodic
            # For wraparound correction: period column-vector broadcasts
            # over m_k columns
            period_col = period_b[:, np.newaxis]

        iteration = 0
        for iteration in range(self._maxiter):
            # Anderson acceleration for Riemannian manifold (Bloch sphere)
            # Parameters are angles, so we need to respect the manifold structure

            # Estimate gradient using central differences.
            # Modify x[i] in-place and restore — avoids 2n array allocations per iter.
            # Clamp perturbations to stay within bounds so loss is never evaluated
            # at an illegal parameter value.
            grad_approx = np.zeros(n)
            for i in range(n):
                orig = x[i]
                step = self._fd_epsilon

                x_plus = orig + step
                x_minus = orig - step
                if bounds is not None:
                    lo = bounds[i][0]
                    hi = bounds[i][1]
                    if lo is not None:
                        x_minus = max(x_minus, lo)
                    if hi is not None:
                        x_plus = min(x_plus, hi)
                actual_step = (x_plus - x_minus) / 2.0

                x[i] = x_plus
                f_plus = fun(x)
                nfev += 1
                x[i] = x_minus
                f_minus = fun(x)
                nfev += 1
                x[i] = orig
                if actual_step != 0.0:
                    grad_approx[i] = (f_plus - f_minus) / (2.0 * actual_step)

            # Compute residual: r_k = g(x_k) - x_k = -lr * grad_approx
            # lr controls step size independently from fd_epsilon.
            r = -self._lr * grad_approx

            # Check convergence
            r_norm = np.linalg.norm(r)
            if r_norm < self._tol:
                break

            # Store current iterate and residual
            # (r is freshly allocated — no .copy() needed)
            W_history.append(x.copy())
            R_history.append(r)

            # Anderson acceleration step
            if len(W_history) > 1:
                # Build difference matrices (Equations 41-42) via vectorised np.diff
                W_arr = np.array(W_history)  # shape (m_k+1, n)
                R_arr = np.array(R_history)
                Delta_W = np.diff(W_arr, axis=0).T  # shape (n, m_k)
                Delta_R = np.diff(R_arr, axis=0).T

                # For angles, handle wraparound in tangent-space differences
                if bounds is not None:
                    for dmat in (Delta_W, Delta_R):
                        mask = np.abs(dmat) > period_col / 2
                        dmat -= np.where(mask, np.sign(dmat) * period_col, 0.0)

                m_k = Delta_W.shape[1]

                # Solve least-squares problem (Equation 43)
                # min_gamma ||r_k - Delta_R * gamma||^2 + lambda * ||gamma||^2
                A = Delta_R.T @ Delta_R + self._lambda_reg * np.eye(m_k)
                b = Delta_R.T @ r

                try:
                    gamma = np.linalg.solve(A, b)
                except np.linalg.LinAlgError:
                    # Fallback to lstsq if solve fails
                    gamma, _, _, _ = lstsq(A, b)

                # Update step (Equation 44)
                # w_{k+1} = w_k + alpha * r_k - (Delta_W + alpha * Delta_R) * gamma
                x_new = x + self._alpha * r - (Delta_W + self._alpha * Delta_R) @ gamma
            else:
                # First iteration or no history: simple fixed-point step
                # w_{k+1} = w_k + alpha * r_k
                x_new = x + self._alpha * r

            # Project back to manifold (Riemannian retraction) — vectorised
            if bounds is not None:
                x_new = np.where(
                    periodic,
                    lower_b + np.mod(x_new - lower_b, period_b),
                    x_new,
                )
                x_new = np.where(has_lower, np.maximum(x_new, lower_b), x_new)
                x_new = np.where(has_upper, np.minimum(x_new, upper_b), x_new)

            # Update
            x = x_new
            f_current = fun(x)
            nfev += 1

            # Append to trajectory (fix: was overwriting instead of appending)
            self.trajectory_.append(x.copy())
            self.loss_history_.append(f_current)

        # Return result
        result = OptimizerResult()
        result.x = x
        result.fun = f_current
        result.nfev = nfev
        result.nit = iteration + 1 if iteration >= 0 else 0
        return result

    @property
    def settings(self):
        """Return optimizer settings."""
        return {
            "maxiter": self._maxiter,
            "m": self._m,
            "alpha": self._alpha,
            "lambda_reg": self._lambda_reg,
            "tol": self._tol,
            "fd_epsilon": self._fd_epsilon,
            "learning_rate": self._lr,
        }

    def get_support_level(self):
        """Return support level dictionary."""

        return {
            "gradient": OptimizerSupportLevel.ignored,
            "bounds": OptimizerSupportLevel.supported,
            "initial_point": OptimizerSupportLevel.required,
        }