Comparison for quadratics

import numpy as np
import matplotlib.pyplot as plt
np.random.seed(228)

# Parameters
n = 100  # Dimension of x
mu = 10
L = 100

def format_num(x):
    """
    Formats a number into scientific notation with two decimal places
    for the significand and removes the extra 0 in the exponent.
    Example: 1.14e-01 becomes 1.14e-1.
    """
    s = f"{x:.2e}"
    s = s.replace("e+0", "e+").replace("e-0", "e-")
    return

def generate_problem(n=100, mu=mu, L=L, problem_type="clustered"):
    np.random.seed(228)
    if problem_type == "clustered":
        A = np.diagflat([mu * np.ones(n // 4), (mu + (L - mu) / 3) * np.ones(n // 4), 
                         (mu + 2 * (L - mu) / 3) * np.ones(n // 4), L * np.ones(n // 4)])
        U = np.random.rand(n, n)
        Q, _ = np.linalg.qr(U)
        A = Q.dot(A).dot(Q.T)
        A = (A + A.T) * 0.5
        x_opt = np.random.rand(n)
        b = A @ x_opt
        x_0 = 5 * np.random.randn(n)

    elif problem_type == "random":
        A = np.random.randn(n, n)
        factual_L = max(np.linalg.eigvalsh(A.T @ A))
        A = A.T.dot(A) / factual_L * L + mu * np.eye(n)
        x_opt = np.random.rand(n)
        b = A @ x_opt
        x_0 = 3 * np.random.randn(n)
    
    elif problem_type == "uniform spectrum":
        A = np.diag(np.linspace(mu, L, n, endpoint=True))
        x_opt = np.random.rand(n)
        b = A @ x_opt
        x_0 = 3 * np.random.randn(n)

    elif problem_type == "Hilbert":
        A = np.array([[1.0 / (i + j - 1) for i in range(1, n+1)] for j in range(1, n+1)])
        b = np.ones(n)
        x_0 = 3 * np.random.randn(n)
        x_opt = np.linalg.lstsq(A, b, rcond=None)[0]

    elif problem_type == "worst_cg":
        t = 0.6  # Small t leads to worse conditioning
        main_diag = np.ones(n)
        main_diag[0] = t
        main_diag[1:] = 1 + t
        off_diag = np.sqrt(t) * np.ones(n-1)
        A = np.diag(main_diag) + np.diag(off_diag, k=1) + np.diag(off_diag, k=-1)
        b = np.zeros(n)
        b[0] = 1
        x_opt = np.linalg.solve(A, b)
        x_0 = np.zeros(n)
        return A, b, x_0, x_opt

    return A, b, x_0, x_opt

# Optimization methods now return (function_gap, domain_gap, grad_norm, alpha, beta)

def gradient_descent(f, grad_f, x_0, step_size, iterations, x_opt):
    x = x_0.copy()
    f_opt = f(x_opt)
    function_gap = [abs(f(x) - f_opt)]
    domain_gap = [np.linalg.norm(x - x_opt)]
    grad_norms = [np.linalg.norm(grad_f(x))]
    for _ in range(iterations):
        x = x - step_size * grad_f(x)
        function_gap.append(abs(f(x) - f_opt))
        domain_gap.append(np.linalg.norm(x - x_opt))
        grad_norms.append(np.linalg.norm(grad_f(x)))
    return function_gap, domain_gap, grad_norms, step_size, 0

def heavy_ball(f, grad_f, x_0, iterations, x_opt, mu, L):
    ### YOUR CODE HERE
    return
    return function_gap, domain_gap, grad_norms, alpha, beta

def nesterov_accelerated_gradient(f, grad_f, x_0, iterations, x_opt, mu, L):
    ### YOUR CODE HERE
    return
    return function_gap, domain_gap, grad_norms, alpha, beta

def run_experiment(params):
    A, b, x_0, x_opt = generate_problem(n=params["n"], mu=params["mu"], L=params["L"], 
                                          problem_type=params["problem_type"])
    eigs = np.linalg.eigvalsh(A)
    # Update mu and L based on the spectrum
    mu_val, L_val = min(eigs), max(eigs)

    f = lambda x: 0.5 * x.T @ A @ x - b.T @ x
    grad_f = lambda x: A @ x - b

    if mu_val <= 1e-2:
        alpha = 1 / L_val
    else:
        alpha = 2 / (mu_val + L_val)
    beta = (np.sqrt(L_val) - np.sqrt(mu_val)) / (np.sqrt(L_val) + np.sqrt(mu_val))

    results = {
        "methods": {
            "Gradient Descent": gradient_descent(f, grad_f, x_0, alpha, params["iterations"], x_opt),
            "Heavy Ball": heavy_ball(f, grad_f, x_0, params["iterations"], x_opt, mu_val, L_val),
            "NAG": nesterov_accelerated_gradient(f, grad_f, x_0, params["iterations"], x_opt, mu_val, L_val),
        },
        "problem": {
            "eigs": eigs,
            "params": params
        }
    }
    return results

def plot_results(results):
    # Define linestyles for methods
    linestyles = {
        "Gradient Descent": "r-",
        "Heavy Ball": "g--",
        "NAG": "m:",
    }
    params = results["problem"]["params"]
    n = params["n"]
    problem_type = params["problem_type"]
    mu_val = params["mu"]
    L_val = params["L"]

    # Create a figure with 3 subplots for function gap, domain gap, and gradient norm
    plt.figure(figsize=(10, 3))
    plt.suptitle(f"{'Strongly convex' if mu_val > 1e-2 else 'Convex'} quadratics: n={n}, {problem_type} matrix, μ={mu_val}, L={L_val}")

    # Plot 1: Function gap
    plt.subplot(1, 3, 1)
    for method, result in results["methods"].items():
        # result = (function_gap, domain_gap, grad_norms, alpha, beta)
        plt.semilogy(result[0], linestyles[method])
    plt.xlabel('Iteration')
    plt.ylabel(r'$|f(x) - f^*|$')
    plt.grid(linestyle=":")
    plt.title("Function Gap")

    # Plot 2: Domain gap (||x - x*||)
    plt.subplot(1, 3, 2)
    for method, result in results["methods"].items():
        plt.semilogy(result[1], linestyles[method])
    plt.xlabel('Iteration')
    plt.ylabel(r'$\|x - x^*\|_2$')
    plt.grid(linestyle=":")
    plt.title("Domain Gap")

    # Plot 3: Norm of Gradient
    plt.subplot(1, 3, 3)
    for method, result in results["methods"].items():
        if method == "NAG":
            if mu_val > 1e-2:
                label = f"{method}. α={result[3]:.2e}, β={result[4]:.2e}"
            else:
                label = f"{method}"
        elif method == "Heavy Ball":
            if mu_val > 1e-2:
                label = f"{method}. α={result[3]:.2e}, β={result[4]:.2e}"
            else:
                label = f"{method}"
        elif method == "Gradient Descent":
            label = f"{method}. α={result[3]:.2e}"
        plt.semilogy(result[2], linestyles[method], label=label)
    plt.xlabel('Iteration')
    plt.ylabel(r'$\|\nabla f(x)\|_2$')
    plt.grid(linestyle=":")
    plt.title("Gradient Norm")

    # Place the legend below the plots
    plt.figlegend(loc='lower center', ncol=3, bbox_to_anchor=(0.5, -0.05))
    plt.tight_layout()
    plt.savefig(f"agd_{problem_type}_{mu_val}_{L_val}_{n}.pdf", bbox_inches='tight')
    plt.show()
# Experiment parameters
params = {
    "n": 60,
    "mu": 0,
    "L": 10,
    "iterations": 800,
    "problem_type": "random",  # Change to "clustered", "uniform spectrum", or "Hilbert" as needed
}

results = run_experiment(params)
plot_results(results)
# Experiment parameters
params = {
    "n": 60,
    "mu": 1,
    "L": 10,
    "iterations": 100,
    "problem_type": "random",  # Change to "clustered", "uniform spectrum", or "Hilbert" as needed
}

results = run_experiment(params)
plot_results(results)
# Experiment parameters
params = {
    "n": 60,
    "mu": 1,
    "L": 1000,
    "iterations": 1000,
    "problem_type": "random",  # Change to "clustered", "uniform spectrum", or "Hilbert" as needed
}

results = run_experiment(params)
plot_results(results)
# Experiment parameters
params = {
    "n": 1000,
    "mu": 1,
    "L": 10,
    "iterations": 100,
    "problem_type": "random",  # Change to "clustered", "uniform spectrum", or "Hilbert" as needed
}

results = run_experiment(params)
plot_results(results)
# Experiment parameters
params = {
    "n": 1000,
    "mu": 1,
    "L": 1000,
    "iterations": 1000,
    "problem_type": "random",  # Change to "clustered", "uniform spectrum", or "Hilbert" as needed
}

results = run_experiment(params)
plot_results(results)

Logreg

import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cp
import jax
from jax import numpy as jnp, grad
from scipy.optimize import minimize_scalar
import jax.numpy as jnp
from jax import grad, jit, hessian
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time
from ucimlrepo import fetch_ucirepo
from optax.losses import safe_softmax_cross_entropy as cros_entr
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize_scalar
import sklearn.datasets as skldata

# Set a random seed for reproducibility
np.random.seed(228)
jax.random.PRNGKey(228)

@jit
def logistic_loss(w, X, y, mu=1):
    m, n = X.shape
    return jnp.sum(jnp.logaddexp(0, -y * (X @ w))) / m + mu / 2 * jnp.sum(w**2)

def generate_problem(m=1000, n=300, mu=1):
    X, y = skldata.make_classification(n_classes=2, n_features=n, n_samples=m, n_informative=n//2, random_state=0)
    X = jnp.array(X)
    y = jnp.array(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

    return X_train, y_train, X_test, y_test

def compute_optimal(X, y, mu):
    w = cp.Variable(X.shape[1])
    objective = cp.Minimize(cp.sum(cp.logistic(cp.multiply(-y, X @ w))) / len(y) + mu / 2 * cp.norm(w, 2)**2)
    problem = cp.Problem(objective)
    problem.solve()
    return w.value, problem.value

@jit
def compute_accuracy(w, X, y):
    # Compute predicted probabilities using the logistic (sigmoid) function
    preds_probs = jax.nn.sigmoid(X @ w)
    # Convert probabilities to class predictions: -1 if p < 0.5, else 1
    preds = jnp.where(preds_probs < 0.5, 0, 1)
    # Calculate accuracy as the average of correct predictions
    accuracy = jnp.mean(preds == y)
    return accuracy



# @jit
def compute_metrics(trajectory, x_star, f_star, times, X_train, y_train, X_test, y_test, mu):
    f = lambda w: logistic_loss(w, X_train, y_train, mu)
    metrics = {
        "f_gap": [jnp.abs(f(x) - f_star) for x in trajectory],
        "x_gap": [jnp.linalg.norm(x - x_star) for x in trajectory],
        "time": times,
        "train_acc": [compute_accuracy(x, X_train, y_train) for x in trajectory],
        "test_acc": [compute_accuracy(x, X_test, y_test) for x in trajectory],
    }
    return metrics

def gradient_descent(w_0, X, y, learning_rate=0.01, num_iters=100, mu=0):
    trajectory = [w_0]
    times = [0]
    w = w_0
    f = lambda w: logistic_loss(w, X, y, mu)
    iter_start = time.time()
    for i in range(num_iters):
        grad_val = grad(f)(w)
        if learning_rate == "linesearch":
            # Simple line search implementation
            phi = lambda alpha: f(w - alpha*grad_val)
            result = minimize_scalar(fun=phi, 
                                     bounds=(1e-3, 2e2)
                              )
            step_size = result.x
        else:
            step_size = learning_rate
        w -= step_size * grad_val
        iter_time = time.time()
        trajectory.append(w)
        times.append(iter_time - iter_start)
    return trajectory, times

def heavy_ball_method(w_0, X, y, learning_rate=0.01, momentum=0.9, num_iters=100, mu=0):
    """
    Polyak Heavy-Ball Method implementation:
    
    Given:
        x^+ = x - α ∇f(x)  (Gradient step)
        d_k = β_k (x_k - x_{k-1})  (Momentum term)
    
    The update is:
        x_{k+1} = x_k^+ + d_k = x_k - α ∇f(x_k) + d_k
    
    Parameters:
        w_0         : initial point
        X, y        : data used in the objective function (e.g. logistic loss)
        learning_rate: α, step size for the gradient step
        momentum   : β, momentum coefficient (can be constant or adjusted per iteration)
        num_iters   : number of iterations to run
        mu          : strong convexity parameter (not used in this explicit update)
    
    Returns:
        trajectory  : list of iterates x_k (after the gradient update)
        times       : list of times (elapsed) at each iteration
    """
    ### YOUR CODE HERE        
    return
    return trajectory, times

def nesterov_accelerated_gradient(w_0, X, y, learning_rate=0.01, momentum=0.9, num_iters=100, mu=0):
    """
    Nesterov Accelerated Gradient (NAG) implementation using explicit momentum:
    
    Given:
        x^+ = x - α ∇f(x)  (Gradient step)
        d_k = β_k (x_k - x_{k-1})  (Momentum term)
    
    The update is:
        x_{k+1} = (x_k + d_k)^+ = (x_k + d_k) - α ∇f(x_k + d_k)
    
    Parameters:
        w_0         : initial point
        X, y        : data used in the objective function (e.g. logistic loss)
        learning_rate: α, step size for the gradient step
        momentum   : β, momentum coefficient (can be constant or adjusted per iteration)
        num_iters   : number of iterations to run
        mu          : strong convexity parameter (not used in this explicit update)
    
    Returns:
        trajectory  : list of iterates x_k (after the gradient update)
        times       : list of times (elapsed) at each iteration
    """
    ### YOUR CODE HERE        
    return
    return trajectory, times

def run_experiments(params):
    mu = params["mu"]
    m, n = params["m"], params["n"]
    methods = params["methods"]
    results = {}

    X_train, y_train, X_test, y_test = generate_problem(m, n, mu)
    n_features = X_train.shape[1]  # Number of features
    params["n_features"] = n_features
    
    x_0 = jax.random.normal(jax.random.PRNGKey(0), (n_features, ))
    x_star, f_star = compute_optimal(X_train, y_train, mu)

    for method in methods:
        if method["method"] == "GD":
            learning_rate = method["learning_rate"]
            iterations = method["iterations"]
            trajectory, times = gradient_descent(x_0, X_train, y_train, learning_rate, iterations, mu)
            label = method["method"] + " " + str(learning_rate)
            results[label] = compute_metrics(trajectory, x_star, f_star, times, X_train, y_train, X_test, y_test, mu)
        elif method["method"] == "Heavy Ball":
            learning_rate   = method.get("learning_rate", 0.01)
            momentum        = method.get("momentum", 0.9)
            iterations = method["iterations"]
            trajectory, times = heavy_ball_method(x_0, X_train, y_train, learning_rate, momentum, iterations, mu)
            # label = method["method"] + " " + str(learning_rate)
            label = f"{method['method']}. α={learning_rate:.2e}. β={momentum:.2e}"
            results[label] = compute_metrics(trajectory, x_star, f_star, times, X_train, y_train, X_test, y_test, mu)
        
        elif method["method"] == "NAG":
            learning_rate   = method.get("learning_rate", 0.01)
            momentum        = method.get("momentum", 0.9)
            iterations = method["iterations"]
            trajectory, times = nesterov_accelerated_gradient(x_0, X_train, y_train, learning_rate, momentum, iterations, mu)
            label = method["method"] + " " + str(learning_rate)
            label = f"{method['method']}. α={learning_rate:.2e}. β={momentum:.2e}"
            results[label] = compute_metrics(trajectory, x_star, f_star, times, X_train, y_train, X_test, y_test, mu)

    return results, params

def plot_results(results, params):
    plt.figure(figsize=(11, 2.6))
    mu = params["mu"]
    
    if mu > 1e-2:
        plt.suptitle(f"Strongly convex binary logistic regression. mu={mu}.")
    else:
        plt.suptitle(f"Convex binary logistic regression. mu={mu}.")

    plt.subplot(1, 4, 1)
    for method, metrics in results.items():
        plt.plot(metrics['f_gap'])
    plt.xlabel('Iteration')
    plt.ylabel(r'$|f(x) -f^*|$')
    plt.yscale('log')
    plt.grid(linestyle=":")

    plt.subplot(1, 4, 2)
    for method, metrics in results.items():
        plt.plot(metrics['x_gap'], label=method)
    plt.xlabel('Iteration')
    plt.ylabel('$\|x_k - x^*\|$')
    plt.yscale('log')
    plt.grid(linestyle=":")

    plt.subplot(1, 4, 3)
    for method, metrics in results.items():
        plt.plot(metrics["train_acc"])
    plt.xlabel('Iteration')
    plt.ylabel('Train accuracy')
    # plt.yscale('log')
    plt.grid(linestyle=":")

    plt.subplot(1, 4, 4)
    for method, metrics in results.items():
        plt.plot(metrics["test_acc"])
    plt.xlabel('Iteration')
    plt.ylabel('Test accuracy')
    # plt.yscale('log')
    plt.grid(linestyle=":")

    # Place the legend below the plots
    plt.figlegend(loc='lower center', ncol=5, bbox_to_anchor=(0.5, -0.00))
    # Adjust layout to make space for the legend below
    filename = ""
    for method, metrics in results.items():
        filename += method
    filename += f"_{mu}.pdf"
    plt.tight_layout(rect=[0, 0.05, 1, 1])
    plt.savefig(filename)
    plt.show()
<>:340: SyntaxWarning: invalid escape sequence '\|'
<>:340: SyntaxWarning: invalid escape sequence '\|'
/var/folders/6l/qhfv4nh50cqfd22s2mp1shlm0000gn/T/ipykernel_99724/1722798055.py:340: SyntaxWarning: invalid escape sequence '\|'
  plt.ylabel('$\|x_k - x^*\|$')
params = {
    "mu": 0,
    "m": 1000,
    "n": 100,
    "methods": [
        {
            "method": "GD",
            "learning_rate":9e4,
            "iterations": 400,
        },
        {
            "method": "Heavy Ball",
            "learning_rate": 9e-1,
            "iterations": 400,
            "momentum": 0.99,
        },
        {
            "method": "NAG",
            "learning_rate": 9e-1,
            "iterations": 400,
            "momentum": 0.99,
        },
    ]
}

results, params = run_experiments(params)
plot_results(results, params)
params = {
    "mu": 1,
    "m": 1000,
    "n": 100,
    "methods": [
        {
            "method": "GD",
            "learning_rate": 5e-2,
            "iterations": 300,
        },
        {
            "method": "Heavy Ball",
            "learning_rate": 5e-2,
            "iterations": 300,
            "momentum": 0.9,
        },
        {
            "method": "NAG",
            "learning_rate": 5e-2,
            "iterations": 300,
            "momentum": 0.9,
        },
    ]
}

results, params = run_experiments(params)
plot_results(results, params)