import numpy as np
import matplotlib.pyplot as plt
228)
np.random.seed(
# Parameters
= 100 # Dimension of x
n = 10
mu = 100
L
def generate_problem(n=100, mu=mu, L=L, problem_type="clustered"):
228)
np.random.seed(if problem_type == "clustered":
= np.diagflat([mu*np.ones(n//4), (mu+ (L-mu)/3) * np.ones(n//4), (mu+ 2*(L-mu)/3)*np.ones(n//4), L*np.ones(n//4)])
A = np.random.rand(n, n)
U = np.linalg.qr(U)
Q, _ = Q.dot(A).dot(Q.T)
A = (A + A.T) * 0.5
A = np.random.rand(n)
x_opt = A@x_opt
b = 5*np.random.randn(n)
x_0
elif problem_type == "random":
= np.random.randn(n, n)
A = max(np.linalg.eigvalsh(A.T@A))
factual_L = A.T.dot(A)/factual_L*L + mu*np.eye(n)
A = np.random.rand(n)
x_opt = A@x_opt
b = 3*np.random.randn(n)
x_0
elif problem_type == "uniform spectrum":
= np.diag(np.linspace(mu, L, n, endpoint=True))
A = np.random.rand(n)
x_opt = A@x_opt
b = 3*np.random.randn(n)
x_0
elif problem_type == "Hilbert":
= np.array([[1.0 / (i+j - 1) for i in range(1, n+1)] for j in range(1, n+1)])
A = np.ones(n)
b = 3*np.random.randn(n)
x_0 = np.linalg.lstsq(A, b)[0]
x_opt
elif problem_type == "worst_cg":
# Parameter t controls the condition number
= 0.6 # Small t leads to worse conditioning
t # Create tridiagonal matrix W
= np.ones(n)
main_diag 0] = t
main_diag[1:] = 1 + t
main_diag[= np.sqrt(t) * np.ones(n-1)
off_diag = np.diag(main_diag) + np.diag(off_diag, k=1) + np.diag(off_diag, k=-1)
A
# Create b vector [1, 0, ..., 0]
= np.zeros(n)
b 0] = 1
b[
# Since this is a specific problem, we compute x_opt explicitly
= np.linalg.solve(A, b)
x_opt = np.zeros(n) # Start from zero vector
x_0 return A, b, x_0, x_opt
return A, b, x_0, x_opt
# Optimization methods
def gradient_descent(f, grad_f, x_0, step_size, iterations, x_opt):
= x_0.copy()
x = f(x_opt)
f_opt = [], []
values, gradients abs(f(x) - f_opt))
values.append(
gradients.append(np.linalg.norm(grad_f(x)))for _ in range(iterations):
-= step_size * grad_f(x)
x abs(f(x) - f_opt))
values.append(
gradients.append(np.linalg.norm(grad_f(x)))return values, gradients
def steepest_descent(A, f, grad_f, x_0, iterations, x_opt):
= x_0.copy()
x = f(x_opt)
f_opt = [], []
values, gradients abs(f(x) - f_opt))
values.append(
gradients.append(np.linalg.norm(grad_f(x)))for _ in range(iterations):
= grad_f(x)
grad = np.dot(grad.T, grad) / np.dot(grad.T, np.dot(A, grad))
step_size -= step_size * grad
x abs(f(x) - f_opt))
values.append(
gradients.append(np.linalg.norm(grad))return values, gradients
def conjugate_gradient(A, b, x_0, iterations, x_opt):
= x_0.copy()
x = lambda x: 0.5 * x.T @ A @ x - b.T @ x
f = f(x_opt)
f_opt
= b - np.dot(A, x)
r = r.copy()
d = [f(x) - f_opt], [np.linalg.norm(r)]
values, gradients for _ in range(iterations - 1):
## YOUR CODE HERE ##
pass
return values, gradients
def run_experiment(params):
= generate_problem(n=params["n"], mu=params["mu"], L=params["L"], problem_type=params["problem_type"])
A, b, x_0, x_opt = np.linalg.eigvalsh(A)
eigs = min(eigs), max(eigs)
mu, L
= lambda x: 0.5 * x.T @ A @ x - b.T @ x
f = lambda x: A@x - b
grad_f
if mu <= 1e-2:
= 1/L
alpha else:
= 2/(mu+L) # Step size
alpha = (np.sqrt(L) - np.sqrt(mu))/(np.sqrt(L) + np.sqrt(mu)) # Momentum parameter
beta
= {
results "methods": {
"Gradient Descent": gradient_descent(f, grad_f, x_0, alpha, params["iterations"], x_opt),
"Steepest Descent": steepest_descent(A, f, grad_f, x_0, params["iterations"], x_opt),
"Conjugate Gradients": conjugate_gradient(A, b, x_0, params["iterations"], x_opt),
},"problem":{
"eigs": eigs,
"params": params
}
}return results
def plot_results(results):
= {
linestyles "Gradient Descent": "r-",
"Steepest Descent": "b-.",
"Conjugate Gradients": "g--"
}=(10, 3.5))
plt.figure(figsize= results["problem"]["params"]["mu"]
mu = results["problem"]["params"]["L"]
L = results["problem"]["params"]["n"]
n = results["problem"]["params"]["problem_type"]
problem_type
if mu > 1e-2:
f"Strongly convex quadratics. n={n}, {problem_type} matrix. ")
plt.suptitle(else:
f"Convex quadratics. n={n}, {problem_type} matrix. ")
plt.suptitle(
1, 3, 1)
plt.subplot(= results["problem"]["eigs"]
eigs len(eigs)), eigs)
plt.scatter(np.arange('Dimension')
plt.xlabel('Eigenvalues of A')
plt.ylabel(=":")
plt.grid(linestyle"Eigenvalues")
plt.title(if results["problem"]["params"]["problem_type"] == "Hilbert":
"log")
plt.yscale(
1, 3, 2)
plt.subplot(for method, result_ in results["methods"].items():
0], linestyles[method])
plt.semilogy(result_['Iteration')
plt.xlabel(r'$|f(x) -f^*|$')
plt.ylabel(=":")
plt.grid(linestyle"Function gap")
plt.title(
1, 3, 3)
plt.subplot(for method, result_ in results["methods"].items():
1], linestyles[method], label=method)
plt.semilogy(result_[r'$\|\nabla f(x)\|_2$')
plt.ylabel(=":")
plt.grid(linestyle"Norm of Gradient")
plt.title(
# Place the legend below the plots
='lower center', ncol=4, bbox_to_anchor=(0.5, -0.00))
plt.figlegend(loc# Adjust layout to make space for the legend below
=[0, 0.05, 1, 1])
plt.tight_layout(rect# plt.savefig(f"cg_{problem_type}_{mu}_{L}_{n}.pdf")
plt.show()
f(x)=\dfrac{1}{2}x^TAx-b^Tx\longrightarrow \min_{x\in\mathbb{R}^n}
Add the code for iterations of the conjugate gradient method in the function conjugate_gradient()
:
After implementing the conjugate gradient method, let’s look at the several experimental results of solving a quadratic problem for different matrices A:
# Experiment parameters
= {
params "n": 60,
"mu": 1e-3,
"L": 100,
"iterations": 100,
"problem_type": "random",
}
= run_experiment(params)
results plot_results(results)
# Experiment parameters
= {
params "n": 60,
"mu": 10,
"L": 100,
"iterations": 100,
"problem_type": "random",
}
= run_experiment(params)
results plot_results(results)
# Experiment parameters
= {
params "n": 60,
"mu": 10,
"L": 1000,
"iterations": 500,
"problem_type": "clustered",
}
= run_experiment(params)
results plot_results(results)
# Experiment parameters
= {
params "n": 600,
"mu": 10,
"L": 1000,
"iterations": 500,
"problem_type": "clustered",
}
= run_experiment(params)
results plot_results(results)
# Experiment parameters
= {
params "n": 60,
"mu": 1,
"L": 10,
"iterations": 100,
"problem_type": "Hilbert",
}
= run_experiment(params)
results plot_results(results)