import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt

np.set_printoptions(linewidth=200)

np.random.seed(22)

n = 10
L = np.random.randn(n, n)
A = L@L.T
b = np.random.randn(n)

la.cholesky(A)

L2 = np.random.randn(2, 2)
A2 = L2@L2.T
b2 = np.random.randn(2)

la.cholesky(A2), b2

(array([[1.12829349, 0.        ],
        [1.08007298, 0.50683869]]),
 array([-1.53247466, -0.23412954]))

def plot_bowl(A, b, res=100, ext=12):
    vgrid = np.mgrid[-ext:ext:res*1j,-ext:ext:res*1j]
    
    phi = 1/2*np.einsum("ixy,ij,jxy->xy", vgrid, A, vgrid) - np.einsum("ixy,i->xy", vgrid, b)
    
    plt.contour(vgrid[0], vgrid[1], phi, 50)
    
plot_bowl(A2, b2)

def alpha(A, b, x, s):
    r = b-A@x
    return s@r/(s@A@s)

x2 = np.random.randn(2) * 4
s2 = np.random.randn(2) * 4

plot_bowl(A2, b2)

alpha2 = alpha(A2, b2, x2, s2) 

plt.quiver(x2[0], x2[1], alpha2*s2[0], alpha2*s2[1],
          color='blue', angles='xy', scale_units='xy', scale=1, label='Vector A')

<matplotlib.quiver.Quiver at 0x7fd524089e80>

alphas = np.linspace(-0.5*alpha2, 1.5*alpha2, 100)

x2s = x2.reshape(-1, 1) + alphas * s2.reshape(-1, 1)
phis = 1/2*np.einsum("ia,ij,ja->a", x2s, A2, x2s) - np.einsum("ia,i->a", x2s, b2)
plt.plot(alphas, phis)
plt.vlines(alpha2, -100, 100)

<matplotlib.collections.LineCollection at 0x7fd523ca1fd0>

import matplotlib.pyplot as plt
import numpy as np

def draw_A_orthogonal_pairs(rng, A, b, n_pairs=10, ext=12, scale=1):
    length = ext/10*2*scale
    
    origins = rng.uniform(-ext*0.7, ext*0.7, (n_pairs, 2))
    
    # Generate random angles for the first vector of each pair
    vec = rng.normal(size=(2, n_pairs))
    vec2 = np.empty((2, n_pairs))

    vec2[0] = vec[1]
    vec2[1] = -vec[0]

    vec = vec/la.norm(vec, axis=0)
    vec2 = vec2/la.norm(vec2, axis=0)

    vec = vec*length
    vec2 = vec2*length
    
    Linv = la.inv(la.cholesky(A))
    vec = np.einsum("ij,jn->in", Linv.T, vec)
    vec2 = np.einsum("ij,jn->in", Linv.T, vec2)
    
    plt.quiver(origins[:, 0], origins[:, 1], vec[0], vec[1], 
              color='blue', angles='xy', scale_units='xy', scale=1, label='Vector A')
    plt.quiver(origins[:, 0], origins[:, 1], vec2[0], vec2[1], 
              color='red', angles='xy', scale_units='xy', scale=1, label='Vector B (Orthogonal)')

plt.figure(figsize=(16,8))
plt.subplot(121)
draw_A_orthogonal_pairs(np.random.default_rng(seed=17), np.eye(2), np.zeros(2))
plot_bowl(np.eye(2), np.zeros(2))
plt.gca().set_aspect("equal")

plt.subplot(122)
draw_A_orthogonal_pairs(np.random.default_rng(seed=17), A2, b2)
plot_bowl(A2, np.zeros(2))
plt.gca().set_aspect("equal")

x0 = np.random.randn(n)

# We *could* choose this differently.
# But residual orthogonality would fail if we did.
r0 = A@x0 - b
search_dirs = [r0/(r0@A@r0)**0.5]

for i in range(n-1):
    znext = A@search_dirs[-1]
    coeffs = []
    for s in search_dirs:
        coeff = znext@A@s
        coeffs.append(coeff)
        znext = znext - coeff * s

    znext = znext/(znext@A@znext)**0.5
    search_dirs.append(znext)
    
    print(f"vector {i+1}: {np.array(coeffs).round(3)}")

search_dirs = np.array(search_dirs).T

vector 1: [32.415]
vector 2: [ 5.107 20.682]
vector 3: [0.    6.039 8.121]
vector 4: [-0.    -0.     5.318 16.517]
vector 5: [ 0.     0.    -0.     5.623  7.438]
vector 6: [-0.    -0.     0.    -0.     2.874  3.992]
vector 7: [0.    0.    0.    0.    0.    0.371 5.692]
vector 8: [-0.   -0.   -0.   -0.   -0.   -0.    1.2   0.95]
vector 9: [ 0.     0.     0.     0.     0.     0.    -0.     0.945  1.415]

(search_dirs.T @ A @ search_dirs).round(8)

array([[ 1., -0.,  0., -0.,  0., -0.,  0., -0.,  0., -0.],
       [-0.,  1., -0.,  0., -0.,  0., -0.,  0., -0.,  0.],
       [ 0., -0.,  1., -0.,  0., -0.,  0., -0.,  0., -0.],
       [-0.,  0., -0.,  1.,  0., -0.,  0., -0.,  0., -0.],
       [ 0., -0.,  0.,  0.,  1., -0.,  0., -0.,  0., -0.],
       [-0.,  0., -0., -0., -0.,  1.,  0., -0.,  0., -0.],
       [ 0., -0.,  0.,  0.,  0.,  0.,  1., -0.,  0., -0.],
       [-0.,  0., -0., -0., -0., -0., -0.,  1., -0.,  0.],
       [ 0., -0.,  0.,  0.,  0.,  0.,  0., -0.,  1., -0.],
       [-0.,  0., -0., -0., -0., -0.,  0.,  0., -0.,  1.]])

xtrue = la.solve(A, b)
error = 0 - xtrue
deltas = la.solve(search_dirs, error)

x = x0
xs = [x]

for i in range(n):
    s = search_dirs[:, i]
    myalpha = alpha(A, b, x,  s)
    x = x + myalpha *s
    print(-myalpha, deltas[i])

    xs.append(x)

xs = np.array(xs).T

8.792980908102795 0.162368054958557
-3.2299125200175127 -0.4915826522865855
3.625603649474878 0.910201943084781
-1.8691897927111114 -0.6714702475125636
2.0614773163955378 0.5501212595575367
-1.677718357121346 -0.34407577112268894
2.0849622620094093 2.7790762191123752
-9.371790193365095 -9.597811086041212
6.7685307310186555 6.156774913174951
-25.422639767766295 -25.471364751200788

x - la.solve(A, b)

array([-1.09984910e-10, -4.89535523e-10,  6.83932910e-11, -2.92686764e-10, -1.89260163e-10,  1.48190793e-10,  3.69091424e-11,  2.82227575e-11,  6.47219167e-10,  5.79859716e-10])

errors = xs - xtrue.reshape(-1, 1)

error_norms = np.array([
    err@err
    for err in errors.T])
plt.plot(error_norms)

[<matplotlib.lines.Line2D at 0x7fd521f192b0>]

residuals = A@xs - b.reshape(-1, 1)
(residuals.T @ search_dirs).round(5)

array([[  8.79298,  -3.22991,   3.6256 ,  -1.86919,   2.06148,  -1.67772,   2.08496,  -9.37179,   6.76853, -25.42264],
       [ -0.     ,  -3.22991,   3.6256 ,  -1.86919,   2.06148,  -1.67772,   2.08496,  -9.37179,   6.76853, -25.42264],
       [ -0.     ,   0.     ,   3.6256 ,  -1.86919,   2.06148,  -1.67772,   2.08496,  -9.37179,   6.76853, -25.42264],
       [ -0.     ,   0.     ,  -0.     ,  -1.86919,   2.06148,  -1.67772,   2.08496,  -9.37179,   6.76853, -25.42264],
       [ -0.     ,   0.     ,  -0.     ,  -0.     ,   2.06148,  -1.67772,   2.08496,  -9.37179,   6.76853, -25.42264],
       [ -0.     ,   0.     ,  -0.     ,  -0.     ,  -0.     ,  -1.67772,   2.08496,  -9.37179,   6.76853, -25.42264],
       [ -0.     ,   0.     ,  -0.     ,  -0.     ,  -0.     ,   0.     ,   2.08496,  -9.37179,   6.76853, -25.42264],
       [ -0.     ,   0.     ,  -0.     ,  -0.     ,  -0.     ,  -0.     ,   0.     ,  -9.37179,   6.76853, -25.42264],
       [ -0.     ,   0.     ,  -0.     ,  -0.     ,  -0.     ,  -0.     ,  -0.     ,   0.     ,   6.76853, -25.42264],
       [ -0.     ,   0.     ,  -0.     ,  -0.     ,  -0.     ,  -0.     ,  -0.     ,   0.     ,  -0.     , -25.42264],
       [ -0.     ,   0.     ,  -0.     ,  -0.     ,  -0.     ,  -0.     ,  -0.     ,   0.     ,  -0.     ,   0.     ]])

(residuals.T @ residuals).round(6)

array([[ 2.36115339e+03, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00],
       [-0.00000000e+00,  1.45042220e+02,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  7.07147780e+01,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  3.60416870e+01,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  2.16669240e+01, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  9.94128700e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  0.00000000e+00,  1.29638700e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  2.34461210e+01, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  0.00000000e+00, -0.00000000e+00, -0.00000000e+00,  5.99685950e+01,  0.00000000e+00, -0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  0.00000000e+00, -0.00000000e+00, -0.00000000e+00,  0.00000000e+00,  4.86458400e+00,  0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -0.00000000e+00,  0.00000000e+00, -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

Conjugate Gradient Mechanics¶

Line search¶

$A$-orthogonality¶

Search Directions¶

Compare step sizes with error decomposition¶

Errors¶

Residuals¶