# coding: utf-8
# # Interpolative Decomposition
# In[1]:
import numpy as np
import scipy.linalg as la
import matplotlib.pyplot as pt
# ## Obtain a low-rank matrix
# In[5]:
n = 100
A0 = np.random.randn(n, n)
U0, sigma0, VT0 = la.svd(A0)
print(la.norm((U0*sigma0).dot(VT0) - A0))
sigma = np.exp(-np.arange(n))
A = (U0 * sigma).dot(VT0)
pt.semilogy(sigma)
# ## Run the factorization
# In[4]:
import scipy.linalg.interpolative as sli
# Compute a fixed-rank factorization:
#
# (There's also an adaptive, fixed-precision mode.)
# In[17]:
k = 20
idx, proj = sli.interp_decomp(A, k)
# What does `numpy.argsort` do?
# In[25]:
idx
# In[24]:
sort_idx = np.argsort(idx)
# In[23]:
idx[sort_idx]
# Reconstruct the matrix:
# In[30]:
B = A[:,idx[:k]]
P = np.hstack([np.eye(k), proj])[:,np.argsort(idx)]
Aapprox = np.dot(B, P)
# In[31]:
la.norm(A - Aapprox, 2)
# What's the structure of $P$?
#
# (ignoring the column permuation)
# In[29]:
pt.imshow(np.hstack([np.eye(k), proj]))
# * Why don't we use *just* the ID then?
# * Reasonable question, answer is: we should.
# * BUT: The ID as called above is based on the same randomized machinery that we've built up, so it's not like we'd *actually* reduce complexity that way.