#!/usr/bin/env python
# coding: utf-8

# # Threads vs Cache

# In[1]:


get_ipython().system('rm -Rf tmp')
get_ipython().system('mkdir -p tmp')


# In[4]:


get_ipython().run_cell_magic('writefile', 'tmp/threads-vs-cache.c', '\n// modified from Lin & Snyder\n// http://books.google.com/books?id=iYghAQAAIAA\n\n#include <omp.h>\n#include <stdlib.h>\n#include <stdio.h>\n#include <assert.h>\n#include "timing.h"\n\n\nint main()\n{\n  const int array_size = 256*1000;\n\n  int *array = malloc(array_size * sizeof(int));\n  for (int i = 0; i < array_size; ++i)\n    array[i] = rand() % 10;\n\n  int num_threads = omp_get_max_threads();\n  const int stride = 1;\n  volatile int sums[num_threads*stride];\n\n  timestamp_type t1;\n  get_timestamp(&t1);\n\n#pragma omp parallel default(none) shared(sums,num_threads,array_size) firstprivate(array,stride)\n  {\n    assert(omp_get_num_threads() == num_threads);\n\n    int tid = omp_get_thread_num();\n\n    for (int trp = 0; trp < 5000; ++trp)\n    {\n#pragma omp for\n      for (int i = 0; i < array_size; ++i)\n        sums[tid*stride] += array[i];\n    }\n  }\n\n  timestamp_type t2;\n  get_timestamp(&t2);\n\n  double elapsed = timestamp_diff_in_seconds(t1, t2);\n  printf("elapsed wall time: %g s (%d threads)\\n", elapsed, num_threads);\n\n  free(array);\n  return 0;\n}\n')


# In[5]:


get_ipython().system(' cd tmp; gcc -std=gnu99 -fopenmp -lrt -I.. threads-vs-cache.c -othreads-vs-cache')


# In[11]:


get_ipython().system('OMP_NUM_THREADS=1 ./tmp/threads-vs-cache')


# In[12]:


get_ipython().system('OMP_PLACES=cores OMP_NUM_THREADS=2 ./tmp/threads-vs-cache')


# In[ ]: