Butterfly Factorization¶

Following the paper "An algorithm for the rapid evaluation of special function transforms", by Michael O’Neil, Franco Woolfe, Vladimir Rokhlin.

import numpy as np
import scipy.linalg as la
import scipy.linalg.interpolative as sli
import matplotlib.pyplot as plt
import scipy.special as sps
import matplotlib.pyplot as plt

Part 1: Checking Kernel Ranks¶

We started with the claim that the numerical rank of the kernel $e^{ixt}$ for $x\in[0,X]$ and $t\in[0,T]$ depends only on the product $XT$:

Xfacs = np.linspace(1/2, 2, 30)
Tfacs = 1/Xfacs

scale = np.pi  # Change me
for Xfac, Tfac in zip(Xfacs, Tfacs):
    x, t = np.mgrid[0:Xfac*scale:200j, 0:Tfac*scale:200j]
    mat = np.exp(1j*x*t)
    
    _, sigma, _ = la.svd(mat)
    print(f"{Xfac:.2f} {Tfac:.2f}\t", np.sum(sigma > 1e-7))

0.50 2.00	 11
0.55 1.81	 11
0.60 1.66	 11
0.66 1.53	 11
0.71 1.41	 11
0.76 1.32	 11
0.81 1.23	 11
0.86 1.16	 11
0.91 1.09	 11
0.97 1.04	 11
1.02 0.98	 11
1.07 0.94	 11
1.12 0.89	 11
1.17 0.85	 11
1.22 0.82	 11
1.28 0.78	 11
1.33 0.75	 11
1.38 0.72	 11
1.43 0.70	 11
1.48 0.67	 11
1.53 0.65	 11
1.59 0.63	 11
1.64 0.61	 11
1.69 0.59	 11
1.74 0.57	 11
1.79 0.56	 11
1.84 0.54	 11
1.90 0.53	 11
1.95 0.51	 11
2.00 0.50	 11

Part 2: The Butterfly Factorization¶

Some Test Matrices¶

A Matrix for the Discrete Fourier Transform¶

nlevels = 9
n = 2**(nlevels + 2)

def make_dft(n, power):
    omega = np.exp(2*np.pi*1j/n)

    ns = np.arange(n)
    exponents = ns.reshape(-1, 1) * ns
    
    return omega**(power*exponents)

dft = make_dft(n, power=1)
idft = make_dft(n, power=-1)

la.norm(np.abs(idft @ dft) - n*np.eye(n))

1.0665713346973323e-07

Verify the FFT property:

# FIXME
quotient = dft[::2, :n//2] / make_dft(n//2, power=1)
plt.imshow(quotient.real)

<matplotlib.image.AxesImage at 0x7f294aed6d10>

Chebyshev Transform¶

k = n-1

i = np.arange(0, k+1)
x = np.linspace(-1, 1, 3000)

nodes = np.cos(i/k*np.pi)

i = np.arange(n, dtype=np.float64)
nodes = np.cos((2*(i+1)-1)/(2*n)*np.pi)
chebyshev_vdm = np.cos(i*np.arccos(nodes.reshape(-1, 1)))

(chebyshev_vdm.T @ chebyshev_vdm).round(2)

array([[2048.,    0.,   -0., ...,    0.,    0.,   -0.],
       [   0., 1024.,    0., ...,    0.,    0.,    0.],
       [  -0.,    0., 1024., ...,    0.,    0.,   -0.],
       ...,
       [   0.,    0.,    0., ..., 1024.,    0.,   -0.],
       [   0.,    0.,    0., ...,    0., 1024.,    0.],
       [  -0.,    0.,   -0., ...,   -0.,    0., 1024.]])

A Random Matrix¶

randmat = np.random.randn(n, n)

A Matrix Slicing Helper¶

class Level:
    def __init__(self, level, nlevels, n=None):
        self.level = level
        self.nlevels = nlevels
        
        if level > nlevels:
            raise ValueError("level too large")
            
        if n is None:
            n = 2**nlevels
        self.n = n
        
    @property
    def nblock_rows(self):
        return 2**self.level
        
    @property
    def block_nrows(self):
        return self.n//self.nblock_rows

    @property
    def nblock_cols(self):
        return 2**(self.nlevels-self.level)
        
    @property
    def block_ncols(self):
        return self.n//self.nblock_cols
    
    def matview(self, bi, bj, mat):
        br = self.block_nrows
        bc = self.block_ncols
        return mat[br*bi:br*(bi+1), bc*bj:bc*(bj+1)]
    
    def rowview(self, bi, vec):
        br = self.block_nrows
        return vec[br*bi:br*(bi+1)]
    
    def colview(self, bj, vec):
        bc = self.block_ncols
        return vec[bc*bj:bc*(bj+1)]

Level(0, nlevels, 256).matview(0, 0, dft).shape

(256, 0)

Rank-Revealing Factorization¶

epsilon = 1e-10

# ID

def id_decomp(A):
    k, idx, proj = sli.interp_decomp(A, epsilon)
    sort_idx = np.argsort(idx)
    B = A[:,idx[:k]]
    P = np.hstack([np.eye(k), proj])[:,np.argsort(idx)]
    return B, P

# Rank-Revealing Truncated QR

def qr_decomp(A):
    q, r, p = la.qr(A, pivoting=True, mode="economic")
    diag_r = np.diag(r)
    r = r[:, np.argsort(p)]
    
    flags = np.abs(diag_r) >= epsilon
    q = q[:, flags]
    r = r[flags]
    return q, r

#decomp = qr_decomp
decomp = id_decomp

def make_low_rank_matrix(n):
    A0 = np.random.randn(n, n)
    U0, sigma0, VT0 = la.svd(A0)
    sigma = np.exp(-np.arange(n))
    return (U0 * sigma).dot(VT0)

Atest = make_low_rank_matrix(100)
Btest, Ptest = decomp(Atest)

la.norm(Atest - Btest@Ptest)/la.norm(Atest)

1.0565353581388183e-10

plt.imshow(Ptest)

<matplotlib.image.AxesImage at 0x7fdb14b90b10>

Precomputation¶

A = dft

# keys: [level][i, j]
Ps = [{} for i in range(nlevels+1)]
Bs = [{} for i in range(nlevels+1)]

Level 0¶

lev = Level(0, nlevels, n)

assert lev.nblock_rows == 1
for i in range(lev.nblock_rows):
    for j in range(lev.nblock_cols):
        Bs[0][i, j], Ps[0][i, j] = decomp(lev.matview(i, j, A))

Levels 1, ..., L¶

for ilev in range(1, nlevels + 1):
    lev = Level(ilev, nlevels, n)

    for j in range(lev.nblock_rows):
        for k in range(lev.nblock_cols):
            # only process even j
            if j % 2 != 0:
                continue
                
            bblock = np.hstack((
                Bs[ilev-1][j//2, 2*k],
                Bs[ilev-1][j//2, 2*k+1],
                ))
            bblock_top = bblock[:lev.block_nrows]
            bblock_bottom = bblock[lev.block_nrows:]
            assert len(bblock_top)*2 == len(bblock)
            
            Bs[ilev][j, k], Ps[ilev][j, k] = decomp(bblock_top)
            Bs[ilev][j+1, k], Ps[ilev][j+1, k] = decomp(bblock_bottom)

            if (j, k) == (0, 0):
                print(f"Level {ilev}: {lev.block_nrows}x{lev.block_ncols}")
                pB = Bs[ilev-1][j//2, 2*k].shape
                print(f"prev level B: {pB[0]}x{pB[1]}")
                print(f"bblock (top): {bblock_top.shape[0]}x{bblock_top.shape[1]}")
                tB = Bs[ilev][j, k].shape
                tP = Ps[ilev][j, k].shape
                print(f"ID: {tB[0]}x{tB[1]} {tP[0]}x{tP[1]}")
                print()

Level 1: 1024x8
prev level B: 2048x4
bblock (top): 1024x8
ID: 1024x8 8x8

Level 2: 512x16
prev level B: 1024x8
bblock (top): 512x16
ID: 512x16 16x16

Level 3: 256x32
prev level B: 512x16
bblock (top): 256x32
ID: 256x17 17x32

Level 4: 128x64
prev level B: 256x17
bblock (top): 128x34
ID: 128x17 17x34

Level 5: 64x128
prev level B: 128x17
bblock (top): 64x34
ID: 64x17 17x34

Level 6: 32x256
prev level B: 64x17
bblock (top): 32x34
ID: 32x17 17x34

Level 7: 16x512
prev level B: 32x17
bblock (top): 16x34
ID: 16x16 16x34

Level 8: 8x1024
prev level B: 16x16
bblock (top): 8x32
ID: 8x8 8x32

Level 9: 4x2048
prev level B: 8x8
bblock (top): 4x16
ID: 4x4 4x16

levels = []
ranks = []

for ilev in range(1, nlevels + 1):
    levels.append(ilev)
    ranks.append(Bs[ilev][0,0].shape[1])
    
plt.plot(levels, ranks, "o-")
plt.grid()
plt.xlabel("Level")
plt.ylabel("Rank")

Text(0, 0.5, 'Rank')

Only the last-level $B$ actually needs to be retained:

LLB = Bs[-1]
del Bs

Matvec¶

First, generate a random input:

x = np.random.randn(n)

Setup¶

# keys: [ilevel][i, j]
betas = [{} for i in range(nlevels+1)]

Level 0¶

lev = Level(0, nlevels, n)

assert lev.nblock_rows == 1
for i in range(lev.nblock_rows):
    for j in range(lev.nblock_cols):
        betas[0][i, j] = Ps[0][i, j] @ lev.colview(j, x)

Level 1, ..., L¶

for ilev in range(1, nlevels + 1):
    lev = Level(ilev, nlevels, n)

    for j in range(lev.nblock_rows):
        for k in range(lev.nblock_cols):
            beta_glued = np.hstack((
                betas[ilev-1][j//2, 2*k],
                betas[ilev-1][j//2, 2*k+1]
                ))
            
            betas[ilev][j, k] = Ps[ilev][j, k] @ beta_glued

Postprocess¶

Ax = np.zeros(n, dtype=np.complex128)

lev = Level(nlevels, nlevels, n)

assert lev.nblock_cols == 1
for j in range(lev.nblock_rows):
    for k in range(lev.nblock_cols):
        lev.rowview(j, Ax)[:] = LLB[j, k] @ betas[nlevels][j, k]

la.norm(Ax - A@x)/la.norm(A@x)

7.199035602677768e-11