!pip install numpy matplotlib jax scipy scikit-optimize ucimlrepo optax
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cp
import jax
from jax import numpy as jnp, grad
from scipy.optimize import minimize_scalar
import jax.numpy as jnp
from jax import grad, jit, hessian
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time
from ucimlrepo import fetch_ucirepo
from optax.losses import safe_softmax_cross_entropy as cros_entr
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize_scalar
import sklearn.datasets as skldata
# Set a random seed for reproducibility
228)
np.random.seed(228) jax.random.PRNGKey(
Array([ 0, 228], dtype=uint32)
@jit
def logistic_loss(w, X, y, mu=1):
= X.shape
m, n return jnp.sum(jnp.logaddexp(0, -y * (X @ w))) / m + mu / 2 * jnp.sum(w**2)
def generate_problem(m=1000, n=300, mu=1):
= skldata.make_classification(n_classes=2, n_features=n, n_samples=m, n_informative=n//2, random_state=0)
X, y = jnp.array(X)
X = jnp.array(y)
y
= train_test_split(X, y, test_size=0.33, random_state=42)
X_train, X_test, y_train, y_test
return X_train, y_train, X_test, y_test
def compute_optimal(X, y, mu):
= cp.Variable(X.shape[1])
w = cp.Minimize(cp.sum(cp.logistic(cp.multiply(-y, X @ w))) / len(y) + mu / 2 * cp.norm(w, 2)**2)
objective = cp.Problem(objective)
problem
problem.solve()return w.value, problem.value
@jit
def compute_accuracy(w, X, y):
# Compute predicted probabilities using the logistic (sigmoid) function
= jax.nn.sigmoid(X @ w)
preds_probs # Convert probabilities to class predictions: -1 if p < 0.5, else 1
= jnp.where(preds_probs < 0.5, 0, 1)
preds # Calculate accuracy as the average of correct predictions
= jnp.mean(preds == y)
accuracy return accuracy
# @jit
def compute_metrics(trajectory, x_star, f_star, times, X_train, y_train, X_test, y_test, mu):
= lambda w: logistic_loss(w, X_train, y_train, mu)
f = {
metrics "f_gap": [jnp.abs(f(x) - f_star) for x in trajectory],
"x_gap": [jnp.linalg.norm(x - x_star) for x in trajectory],
"time": times,
"train_acc": [compute_accuracy(x, X_train, y_train) for x in trajectory],
"test_acc": [compute_accuracy(x, X_test, y_test) for x in trajectory],
}return metrics
def gradient_descent(w_0, X, y, learning_rate=0.01, num_iters=100, mu=0):
= [w_0]
trajectory = [0]
times = w_0
w = lambda w: logistic_loss(w, X, y, mu)
f = time.time()
iter_start for i in range(num_iters):
= grad(f)(w)
grad_val if learning_rate == "linesearch":
# Simple line search implementation
= lambda alpha: f(w - alpha*grad_val)
phi = minimize_scalar(fun=phi,
result =(1e-3, 2e2)
bounds
)= result.x
step_size else:
= learning_rate
step_size -= step_size * grad_val
w = time.time()
iter_time
trajectory.append(w)- iter_start)
times.append(iter_time return trajectory, times
def run_experiments(params):
= params["mu"]
mu = params["m"], params["n"]
m, n = params["methods"]
methods = {}
results
= generate_problem(m, n, mu)
X_train, y_train, X_test, y_test = X_train.shape[1] # Number of features
n_features "n_features"] = n_features
params[
= jax.random.normal(jax.random.PRNGKey(0), (n_features, ))
x_0 = compute_optimal(X_train, y_train, mu)
x_star, f_star
for method in methods:
= method["learning_rate"]
learning_rate = method["iterations"]
iterations = gradient_descent(x_0, X_train, y_train, learning_rate, iterations, mu)
trajectory, times = method["method"] + " " + str(learning_rate)
label = compute_metrics(trajectory, x_star, f_star, times, X_train, y_train, X_test, y_test, mu)
results[label]
return results, params
def plot_results(results, params):
=(11, 5))
plt.figure(figsize= params["mu"]
mu
if mu > 1e-2:
f"Strongly convex binary logistic regression. mu={mu}.")
plt.suptitle(else:
f"Convex binary logistic regression. mu={mu}.")
plt.suptitle(
2, 4, 1)
plt.subplot(for method, metrics in results.items():
'f_gap'])
plt.plot(metrics['Iteration')
plt.xlabel(r'$|f(x) -f^*|$')
plt.ylabel('log')
plt.yscale(=":")
plt.grid(linestyle
2, 4, 2)
plt.subplot(for method, metrics in results.items():
'x_gap'], label=method)
plt.plot(metrics['Iteration')
plt.xlabel('$\|x_k - x^*\|$')
plt.ylabel('log')
plt.yscale(=":")
plt.grid(linestyle
2, 4, 3)
plt.subplot(for method, metrics in results.items():
"train_acc"])
plt.plot(metrics['Iteration')
plt.xlabel('Train accuracy')
plt.ylabel(# plt.yscale('log')
=":")
plt.grid(linestyle
2, 4, 4)
plt.subplot(for method, metrics in results.items():
"test_acc"])
plt.plot(metrics['Iteration')
plt.xlabel('Test accuracy')
plt.ylabel(# plt.yscale('log')
=":")
plt.grid(linestyle
2, 4, 5)
plt.subplot(for method, metrics in results.items():
"time"], metrics['f_gap'])
plt.plot(metrics['Time')
plt.xlabel(r'$|f(x) -f^*|$')
plt.ylabel('log')
plt.yscale(=":")
plt.grid(linestyle
2, 4, 6)
plt.subplot(for method, metrics in results.items():
"time"], metrics['x_gap'])
plt.plot(metrics['Time')
plt.xlabel('$\|x_k - x^*\|$')
plt.ylabel('log')
plt.yscale(=":")
plt.grid(linestyle
2, 4, 7)
plt.subplot(for method, metrics in results.items():
"time"], metrics["train_acc"])
plt.plot(metrics['Time')
plt.xlabel('Train accuracy')
plt.ylabel(# plt.yscale('log')
=":")
plt.grid(linestyle
2, 4, 8)
plt.subplot(for method, metrics in results.items():
"time"], metrics["test_acc"])
plt.plot(metrics['Time')
plt.xlabel('Test accuracy')
plt.ylabel(# plt.yscale('log')
=":")
plt.grid(linestyle
# Place the legend below the plots
='lower center', ncol=5, bbox_to_anchor=(0.5, -0.00))
plt.figlegend(loc# Adjust layout to make space for the legend below
= ""
filename for method, metrics in results.items():
+= method
filename += f"_{mu}.pdf"
filename =[0, 0.05, 1, 1])
plt.tight_layout(rect
plt.savefig(filename) plt.show()
<>:133: SyntaxWarning: invalid escape sequence '\|'
<>:165: SyntaxWarning: invalid escape sequence '\|'
<>:133: SyntaxWarning: invalid escape sequence '\|'
<>:165: SyntaxWarning: invalid escape sequence '\|'
/var/folders/6l/qhfv4nh50cqfd22s2mp1shlm0000gn/T/ipykernel_87087/2871042674.py:133: SyntaxWarning: invalid escape sequence '\|'
plt.ylabel('$\|x_k - x^*\|$')
/var/folders/6l/qhfv4nh50cqfd22s2mp1shlm0000gn/T/ipykernel_87087/2871042674.py:165: SyntaxWarning: invalid escape sequence '\|'
plt.ylabel('$\|x_k - x^*\|$')
= {
params "mu": 0,
"m": 1000,
"n": 100,
"methods": [
{"method": "GD",
"learning_rate": 3e-1,
"iterations": 2000,
},
{"method": "GD",
"learning_rate": 7e-1,
"iterations": 2000,
},
]
}
= run_experiments(params)
results, params plot_results(results, params)
= {
params "mu": 1e-1,
"m": 1000,
"n": 100,
"methods": [
{"method": "GD",
"learning_rate": 1e-1,
"iterations": 900,
},
{"method": "GD",
"learning_rate": 1.1e-1,
"iterations": 900,
},
]
}
= run_experiments(params)
results, params plot_results(results, params)
Now we also have convergence in terms of distance to the solution, but it does not influence accuracy.
Support Vector Machine
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cp
import jax
from jax import numpy as jnp, grad
from scipy.optimize import minimize_scalar
import jax.numpy as jnp
from jax import grad, jit, hessian
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time
from ucimlrepo import fetch_ucirepo
from optax.losses import safe_softmax_cross_entropy as cros_entr
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize_scalar
import sklearn.datasets as skldata
# Set a random seed for reproducibility
228)
np.random.seed(228) jax.random.PRNGKey(
Array([ 0, 228], dtype=uint32)
# Set a random seed for reproducibility
228)
np.random.seed(228)
jax.random.PRNGKey(
# Generate a synthetic binary classification dataset
def generate_svm_data(m=1000, n=300):
= skldata.make_classification(n_classes=2, n_features=n, n_samples=m,
X, y =n//2, random_state=42)
n_informative= 2 * y - 1 # Convert labels to {-1, 1}
y return X, y
# Solve SVM using convex optimization
def compute_svm_optimal(X, y, C=1.0):
= X.shape
m, n = cp.Variable(n)
w = cp.Variable()
b = cp.Variable(m)
slack
# SVM objective: minimize 1/2 ||w||^2 + C * sum(slack)
= cp.Minimize(0.5 * cp.norm(w, 2)**2 + C * cp.sum(slack))
objective
# Constraints: y_i (x_i^T w + b) >= 1 - slack_i, slack_i >= 0
= [
constraints @ w + b) >= 1 - slack,
cp.multiply(y, X >= 0
slack
]
= cp.Problem(objective, constraints)
problem
problem.solve()
return w.value, b.value, problem.value
# Hinge loss function
@jax.jit
def hinge_loss(w, b, X, y, C=1.0):
= y * (X @ w + b)
margins = jnp.maximum(0, 1 - margins)
loss return 0.5 * jnp.sum(w**2) + C * jnp.mean(loss)
# Compute accuracy
@jax.jit
def compute_accuracy(w, b, X, y):
= jnp.sign(X @ w + b)
preds return jnp.mean(preds == y)
# Perform gradient descent for SVM optimization
def gradient_descent_svm(w_0, b_0, X, y, learning_rate=0.01, num_iters=100, C=1.0):
= [w_0]
trajectory = [0]
times = w_0, b_0
w, b = lambda w, b: hinge_loss(w, b, X, y, C)
f
= time.time()
iter_start for i in range(num_iters):
= grad(f, argnums=(0, 1))(w, b)
grad_w, grad_b -= learning_rate * grad_w
w -= learning_rate * grad_b
b
= time.time()
iter_time
trajectory.append(w)- iter_start)
times.append(iter_time
return trajectory, times
# Run SVM Experiments
def run_experiments_svm(params):
= params["C"]
C = params["m"], params["n"]
m, n = params["methods"]
methods = {}
results
= generate_svm_data(m, n)
X, y = train_test_split(X, y, test_size=0.33, random_state=42)
X_train, X_test, y_train, y_test
# Standardize data
= StandardScaler()
scaler = scaler.fit_transform(X_train)
X_train = scaler.transform(X_test)
X_test
= jax.random.normal(jax.random.PRNGKey(0), (n,))
w_0 = 0.0
b_0
= compute_svm_optimal(X_train, y_train, C)
w_star, b_star, f_star
for method in methods:
= method["learning_rate"]
learning_rate = method["iterations"]
iterations = gradient_descent_svm(w_0, b_0, X_train, y_train, learning_rate, iterations, C)
trajectory, times = method["method"] + " " + str(learning_rate)
label = {
results[label] "f_gap": [jnp.abs(hinge_loss(w, b_0, X_train, y_train, C) - f_star) for w in trajectory],
"x_gap": [jnp.linalg.norm(w - w_star) for w in trajectory],
"time": times,
"train_acc": [compute_accuracy(w, b_0, X_train, y_train) for w in trajectory],
"test_acc": [compute_accuracy(w, b_0, X_test, y_test) for w in trajectory],
}
return results, params
# Plot Results
def plot_results_svm(results, params):
=(11, 5))
plt.figure(figsize= params["C"]
C
f"SVM Training Results. C={C}")
plt.suptitle(
2, 4, 1)
plt.subplot(for method, metrics in results.items():
'f_gap'])
plt.plot(metrics['Iteration')
plt.xlabel(r'$|f(x) -f^*|$')
plt.ylabel('log')
plt.yscale(=":")
plt.grid(linestyle
2, 4, 2)
plt.subplot(for method, metrics in results.items():
'x_gap'], label=method)
plt.plot(metrics['Iteration')
plt.xlabel('$\|x_k - x^*\|$')
plt.ylabel('log')
plt.yscale(=":")
plt.grid(linestyle
2, 4, 3)
plt.subplot(for method, metrics in results.items():
"train_acc"])
plt.plot(metrics['Iteration')
plt.xlabel('Train accuracy')
plt.ylabel(=":")
plt.grid(linestyle
2, 4, 4)
plt.subplot(for method, metrics in results.items():
"test_acc"])
plt.plot(metrics['Iteration')
plt.xlabel('Test accuracy')
plt.ylabel(=":")
plt.grid(linestyle
2, 4, 5)
plt.subplot(for method, metrics in results.items():
"time"], metrics['f_gap'])
plt.plot(metrics['Time')
plt.xlabel(r'$|f(x) -f^*|$')
plt.ylabel('log')
plt.yscale(=":")
plt.grid(linestyle
2, 4, 6)
plt.subplot(for method, metrics in results.items():
"time"], metrics['x_gap'])
plt.plot(metrics['Time')
plt.xlabel('$\|x_k - x^*\|$')
plt.ylabel('log')
plt.yscale(=":")
plt.grid(linestyle
2, 4, 7)
plt.subplot(for method, metrics in results.items():
"time"], metrics["train_acc"])
plt.plot(metrics['Time')
plt.xlabel('Train accuracy')
plt.ylabel(=":")
plt.grid(linestyle
2, 4, 8)
plt.subplot(for method, metrics in results.items():
"time"], metrics["test_acc"])
plt.plot(metrics['Time')
plt.xlabel('Test accuracy')
plt.ylabel(=":")
plt.grid(linestyle
='lower center', ncol=5, bbox_to_anchor=(0.5, -0.00))
plt.figlegend(loc=[0, 0.05, 1, 1])
plt.tight_layout(rect plt.show()
<>:119: SyntaxWarning: invalid escape sequence '\|'
<>:149: SyntaxWarning: invalid escape sequence '\|'
<>:119: SyntaxWarning: invalid escape sequence '\|'
<>:149: SyntaxWarning: invalid escape sequence '\|'
/var/folders/6l/qhfv4nh50cqfd22s2mp1shlm0000gn/T/ipykernel_86548/3657982053.py:119: SyntaxWarning: invalid escape sequence '\|'
plt.ylabel('$\|x_k - x^*\|$')
/var/folders/6l/qhfv4nh50cqfd22s2mp1shlm0000gn/T/ipykernel_86548/3657982053.py:149: SyntaxWarning: invalid escape sequence '\|'
plt.ylabel('$\|x_k - x^*\|$')
= {
params "C": 0,
"m": 1000,
"n": 100,
"methods": [
{"method": "GD",
"learning_rate": 3e-3,
"iterations": 2000,
},
{"method": "GD",
"learning_rate": 7e-3,
"iterations": 2000,
},
]
}
= run_experiments_svm(params)
results, params plot_results_svm(results, params)
Here we only minimize \frac{1}{2} ||w||_2^2 without hinge loss, we have linear convergence both for function error and distance to the solution.
= {
params "C": 0.1,
"m": 1000,
"n": 100,
"methods": [
{"method": "GD",
"learning_rate": 3e-3,
"iterations": 2000,
},
{"method": "GD",
"learning_rate": 7e-3,
"iterations": 2000,
},
]
}
= run_experiments_svm(params)
results, params plot_results_svm(results, params)
Here we minimize regularized hinge loss and see that we have accuracy improvement.