import pyopencl as cl
import numpy as np
import numpy.linalg as la
mf = cl.mem_flags
This notebook demonstrates a simple GPU workflow that touches all essential pieces:
a = np.random.rand(50000).astype(np.float32)
Now create a context ctx
and a command queue queue
:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
Now allocate a buffer. Buffer(context, flags, size=None, hostbuf=None)
a_buf = cl.Buffer(ctx, mf.READ_WRITE, size=a.nbytes)
Then transfer data:
cl.enqueue_copy(queue, a_buf, a)
Here's our kernel source code:
prg = cl.Program(ctx, """
__kernel void twice(__global float *a)
{
int gid = get_global_id(0);
a[gid] = 2*a[gid];
}
""").build()
Run the kernel.
prg.twice(queue, a.shape, None, a_buf)
Copy the data back.
result = np.empty_like(a)
cl.enqueue_copy(queue, result, a_buf)
Check the result.
print(la.norm(result - 2*a), la.norm(a))