In addition to numpy
, we need a module to handle sparse matrics. scipy.sparse
has all of what we need.
import numpy as np
import scipy.sparse
from random import randrange
n = 9
A = np.zeros((n, n))
for i in range((int(n**2*0.3))):
A[randrange(n), randrange(n)] = np.random.randn()
A.round(2)
Now convert this to a CSR matrix:
Acsr = scipy.sparse.csr_matrix(A)
Acsr
Acsr.indptr
Acsr.indices
Acsr.data.round(2)
# in gigabytes:
10**6 * 10**6 * 8 / 1e9
# i.e. 8 Terabytes
occupied = (
10**6 * 4 # (32-bit integers to store row indices)
+ 10**6 * 4 # (one 32-bit integer per row to store column index)
+ 10**6 * 8 # (one 64-bit double precision number per row to store value)
)
# in gigabytes
occupied / 1e9
# I.e. 16 Megabytes