Threads vs Cache

In [1]:
!rm -Rf tmp
!mkdir -p tmp
In [9]:
%%writefile tmp/threads-vs-cache.c

// modified from Lin & Snyder
// http://books.google.com/books?id=iYghAQAAIAA

#include <omp.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include "timing.h"


int main()
{
  const int array_size = 256*1000;

  int *array = malloc(array_size * sizeof(int));
  for (int i = 0; i < array_size; ++i)
    array[i] = rand() % 10;

  int num_threads = omp_get_max_threads();
  const int stride = 1;
  volatile int sums[num_threads*stride];

  timestamp_type t1;
  get_timestamp(&t1);

#pragma omp parallel default(none) shared(sums,num_threads) firstprivate(array,stride)
  {
    assert(omp_get_num_threads() == num_threads);

    int tid = omp_get_thread_num();

    for (int trp = 0; trp < 5000; ++trp)
    {
#pragma omp for
      for (int i = 0; i < array_size; ++i)
        sums[tid*stride] += array[i];
    }
  }

  timestamp_type t2;
  get_timestamp(&t2);

  double elapsed = timestamp_diff_in_seconds(t1, t2);
  printf("elapsed wall time: %g s (%d threads)\n", elapsed, num_threads);

  free(array);
  return 0;
}
Writing tmp/threads-vs-cache.c
In [12]:
! cd tmp; gcc -std=gnu99 -fopenmp -lrt -I.. threads-vs-cache.c -othreads-vs-cache
In [17]:
!OMP_NUM_THREADS=1 ./tmp/threads-vs-cache
elapsed wall time: 3.36545 s (1 threads)
In [18]:
!OMP_NUM_THREADS=2 ./tmp/threads-vs-cache
elapsed wall time: 10.0353 s (2 threads)
In [ ]: