#!/usr/bin/env python # coding: utf-8 # # NUMA and Bandwidths # In[1]: get_ipython().system('rm -Rf tmp') get_ipython().system('mkdir -p tmp') # ## Gathering Information # In[2]: get_ipython().run_cell_magic('writefile', 'tmp/numa-info.c', '\n#define _GNU_SOURCE\n#include \n#include \n#include \n#include \n#include \n#include \n#include "timing.h"\n\n\nvoid print_bitmask(const struct bitmask *bm)\n{\n for(size_t i=0; isize; ++i)\n printf("%d", numa_bitmask_isbitset(bm, i));\n}\n\n\nint main(int argc, const char **argv)\n{\n int num_cpus = numa_num_task_cpus();\n printf("num cpus: %d\\n", num_cpus);\n\n printf("numa available: %d\\n", numa_available());\n numa_set_localalloc();\n\n struct bitmask *bm = numa_bitmask_alloc(num_cpus);\n for (int i=0; i<=numa_max_node(); ++i)\n {\n numa_node_to_cpus(i, bm);\n printf("numa node %d ", i);\n print_bitmask(bm);\n printf(" - %g GiB\\n", numa_node_size(i, 0) / (1024.*1024*1024.));\n }\n numa_bitmask_free(bm);\n\n return 0;\n}\n') # In[3]: get_ipython().system(' cd tmp; gcc -O3 -std=gnu99 -fopenmp -I.. numa-info.c -onuma-info -lrt -lnuma') get_ipython().system(' ./tmp/numa-info') # ## A Shared Header # In[4]: get_ipython().run_cell_magic('writefile', 'tmp/numatest.h', '\n#define _GNU_SOURCE\n#include \n#include \n#include \n#include \n#include \n#include \n#include "timing.h"\n\nvoid pin_to_core(size_t core)\n{\n cpu_set_t cpuset;\n CPU_ZERO(&cpuset);\n CPU_SET(core, &cpuset);\n pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);\n}\n\ndouble measure_access(void *x, size_t array_size, size_t ntrips)\n{\n timestamp_type t1;\n get_timestamp(&t1);\n\n for (size_t i = 0; i core 0 : BW %g MB/s\\n",\n i, array_size*ntrips*cache_line_size / t / 1e6);\n }\n#pragma omp barrier\n }\n // }}}\n }\n numa_free(x, array_size);\n\n return 0;\n}\n') # In[6]: get_ipython().system(' cd tmp; gcc -O3 -std=gnu99 -fopenmp -I.. numa-bw-seq.c -onuma-bw-seq -lrt -lnuma') get_ipython().system(' ./tmp/numa-bw-seq') # ## Contention: Everybody # In[7]: get_ipython().run_cell_magic('writefile', 'tmp/numa-bw-all.c', '\n#include "numatest.h"\n\nint main(int argc, const char **argv)\n{\n int num_cpus = numa_num_task_cpus();\n numa_set_localalloc();\n\n char *x;\n const size_t cache_line_size = 64;\n const size_t array_size = 100*1000*1000;\n size_t ntrips = 2;\n\n#pragma omp parallel\n {\n assert(omp_get_num_threads() == num_cpus);\n int tid = omp_get_thread_num();\n\n pin_to_core(tid);\n if(tid == 0)\n x = (char *) numa_alloc_local(array_size);\n\n // {{{ everybody contends for one\n\n {\n if (tid == 0) puts("");\n\n#pragma omp barrier\n double t = measure_access(x, array_size, ntrips);\n#pragma omp barrier\n for (size_t i = 0; i core 0 : BW %g MB/s\\n",\n tid, array_size*ntrips*cache_line_size / t / 1e6);\n#pragma omp barrier\n }\n }\n\n // }}}\n\n }\n numa_free(x, array_size);\n\n return 0;\n}\n') # In[9]: get_ipython().system(' cd tmp; gcc -O3 -std=gnu99 -fopenmp -I.. numa-bw-all.c -onuma-bw-all -lrt -lnuma') get_ipython().system(' ./tmp/numa-bw-all') # ## Contention: Pairs # In[10]: get_ipython().run_cell_magic('writefile', 'tmp/numa-bw-two.c', '\n#include "numatest.h"\n\nint main(int argc, const char **argv)\n{\n int num_cpus = numa_num_task_cpus();\n numa_set_localalloc();\n\n char *x;\n const size_t cache_line_size = 64;\n const size_t array_size = 100*1000*1000;\n size_t ntrips = 2;\n\n#pragma omp parallel\n {\n assert(omp_get_num_threads() == num_cpus);\n int tid = omp_get_thread_num();\n\n pin_to_core(tid);\n if(tid == 0)\n x = (char *) numa_alloc_local(array_size);\n\n // {{{ zero and someone else contending\n\n if (tid == 0) puts("");\n\n#pragma omp barrier\n for (size_t i = 1; i core 0 : BW %g MB/s\\n",\n tid, array_size*ntrips*cache_line_size / t / 1e6);\n }\n#pragma omp barrier\n if (tid == i)\n {\n printf("two-contention core %d -> core 0 : BW %g MB/s\\n\\n",\n tid, array_size*ntrips*cache_line_size / t / 1e6);\n }\n#pragma omp barrier\n }\n }\n numa_free(x, array_size);\n\n return 0;\n}\n') # In[11]: get_ipython().system(' cd tmp; gcc -O3 -std=gnu99 -fopenmp -I.. numa-bw-two.c -onuma-bw-two -lrt -lnuma') get_ipython().system(' ./tmp/numa-bw-two') # Tests based on `numatest.cpp` by James Brock # http://stackoverflow.com/questions/7259363/measuring-numa-non-uniform-memory-access-no-observable-asymmetry-why # # Changes by Andreas Kloeckner, 10/2012: # - Rewritten in C + OpenMP # - Added contention tests