Register Pressure

In [1]:
!rm -Rf tmp
!mkdir -p tmp
In [21]:
%%writefile tmp/pressure.c

int main()
{
  int result = 0;

  {
    double 
      x00 = 0,
      x01 = 3,
      x02 = 1,
      x03 = 5,
      x04 = 2,
      x05 = 8,
      x06 = 9,
      x07 = 11,
      x08 = 99,
      x09 = 111,
      x10 = 33+0,
      x11 = 33+3,
      x12 = 33+1,
      x13 = 33+5,
      x14 = 33+2,
      x15 = 33+8,
      x16 = 33+9,
      x17 = 33+11,
      x18 = 33+99,
      x19 = 33+111,
      x20 = 17+0,
      x21 = 17+3,
      x22 = 17+1,
      x23 = 17+5,
      x24 = 17+2,
      x25 = 17+8,
      x26 = 17+9,
      x27 = 17+11,
      x28 = 17+99,
      x29 = 17+111;

      double a = 0;
      for (int i = 0; i< 1000*1000; ++i)
      {
#define ALL (\
    x00 + x01 + x02 + x03 + x04 + x05 + x06 + x07 + x08 + x09 /*+ \
    x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + \
    x20 + x21 + x22 + x23 + x24 + x25 + x26 + x27 + x28 + x29 */\
    )

        x00 += i*ALL;
        x01 += i*ALL;
        x02 += i*ALL;
        x03 += i*ALL;
        x04 += i*ALL;
        x05 += i*ALL;
        x06 += i*ALL;
        x07 += i*ALL;
        x08 += i*ALL;
        x09 += i*ALL;
        /*
        x10 += i*ALL;
        x11 += i*ALL;
        x12 += i*ALL;
        x13 += i*ALL;
        x14 += i*ALL;
        x15 += i*ALL;
        x16 += i*ALL;
        x17 += i*ALL;
        x18 += i*ALL;
        x19 += i*ALL;
        x20 += i*ALL;
        x21 += i*ALL;
        x22 += i*ALL;
        x23 += i*ALL;
        x24 += i*ALL;
        x25 += i*ALL;
        x26 += i*ALL;
        x27 += i*ALL;
        x28 += i*ALL;
        x29 += i*ALL;
        */
      }
      result += ALL;
    }

  return result;
}
Overwriting tmp/pressure.c
In [22]:
!cd tmp; gcc -O -c pressure.c
!objdump --disassemble tmp/pressure.o
tmp/pressure.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <main>:
   0:	b8 00 00 00 00       	mov    $0x0,%eax
   5:	f2 0f 10 0d 00 00 00 	movsd  0x0(%rip),%xmm1        # d <main+0xd>
   c:	00 
   d:	f2 0f 10 15 00 00 00 	movsd  0x0(%rip),%xmm2        # 15 <main+0x15>
  14:	00 
  15:	f2 0f 10 1d 00 00 00 	movsd  0x0(%rip),%xmm3        # 1d <main+0x1d>
  1c:	00 
  1d:	f2 0f 10 2d 00 00 00 	movsd  0x0(%rip),%xmm5        # 25 <main+0x25>
  24:	00 
  25:	f2 0f 10 35 00 00 00 	movsd  0x0(%rip),%xmm6        # 2d <main+0x2d>
  2c:	00 
  2d:	f2 0f 10 3d 00 00 00 	movsd  0x0(%rip),%xmm7        # 35 <main+0x35>
  34:	00 
  35:	f2 44 0f 10 05 00 00 	movsd  0x0(%rip),%xmm8        # 3e <main+0x3e>
  3c:	00 00 
  3e:	f2 44 0f 10 0d 00 00 	movsd  0x0(%rip),%xmm9        # 47 <main+0x47>
  45:	00 00 
  47:	f2 44 0f 10 1d 00 00 	movsd  0x0(%rip),%xmm11        # 50 <main+0x50>
  4e:	00 00 
  50:	66 45 0f ef d2       	pxor   %xmm10,%xmm10
  55:	66 0f ef e4          	pxor   %xmm4,%xmm4
  59:	f2 0f 2a e0          	cvtsi2sd %eax,%xmm4
  5d:	66 41 0f 28 c2       	movapd %xmm10,%xmm0
  62:	f2 41 0f 58 c3       	addsd  %xmm11,%xmm0
  67:	f2 41 0f 58 c1       	addsd  %xmm9,%xmm0
  6c:	f2 41 0f 58 c0       	addsd  %xmm8,%xmm0
  71:	f2 0f 58 c7          	addsd  %xmm7,%xmm0
  75:	f2 0f 58 c6          	addsd  %xmm6,%xmm0
  79:	f2 0f 58 c5          	addsd  %xmm5,%xmm0
  7d:	f2 0f 58 c3          	addsd  %xmm3,%xmm0
  81:	f2 0f 58 c2          	addsd  %xmm2,%xmm0
  85:	f2 0f 58 c1          	addsd  %xmm1,%xmm0
  89:	f2 0f 59 c4          	mulsd  %xmm4,%xmm0
  8d:	f2 44 0f 58 d0       	addsd  %xmm0,%xmm10
  92:	66 41 0f 28 c2       	movapd %xmm10,%xmm0
  97:	f2 41 0f 58 c3       	addsd  %xmm11,%xmm0
  9c:	f2 41 0f 58 c1       	addsd  %xmm9,%xmm0
  a1:	f2 41 0f 58 c0       	addsd  %xmm8,%xmm0
  a6:	f2 0f 58 c7          	addsd  %xmm7,%xmm0
  aa:	f2 0f 58 c6          	addsd  %xmm6,%xmm0
  ae:	f2 0f 58 c5          	addsd  %xmm5,%xmm0
  b2:	f2 0f 58 c3          	addsd  %xmm3,%xmm0
  b6:	f2 0f 58 c2          	addsd  %xmm2,%xmm0
  ba:	f2 0f 58 c1          	addsd  %xmm1,%xmm0
  be:	f2 0f 59 c4          	mulsd  %xmm4,%xmm0
  c2:	f2 44 0f 58 d8       	addsd  %xmm0,%xmm11
  c7:	66 41 0f 28 c2       	movapd %xmm10,%xmm0
  cc:	f2 41 0f 58 c3       	addsd  %xmm11,%xmm0
  d1:	66 44 0f 28 e0       	movapd %xmm0,%xmm12
  d6:	f2 45 0f 58 e1       	addsd  %xmm9,%xmm12
  db:	f2 45 0f 58 e0       	addsd  %xmm8,%xmm12
  e0:	f2 44 0f 58 e7       	addsd  %xmm7,%xmm12
  e5:	f2 44 0f 58 e6       	addsd  %xmm6,%xmm12
  ea:	f2 44 0f 58 e5       	addsd  %xmm5,%xmm12
  ef:	f2 44 0f 58 e3       	addsd  %xmm3,%xmm12
  f4:	f2 44 0f 58 e2       	addsd  %xmm2,%xmm12
  f9:	f2 44 0f 58 e1       	addsd  %xmm1,%xmm12
  fe:	f2 44 0f 59 e4       	mulsd  %xmm4,%xmm12
 103:	f2 45 0f 58 cc       	addsd  %xmm12,%xmm9
 108:	f2 41 0f 58 c1       	addsd  %xmm9,%xmm0
 10d:	66 44 0f 28 e0       	movapd %xmm0,%xmm12
 112:	f2 45 0f 58 e0       	addsd  %xmm8,%xmm12
 117:	f2 44 0f 58 e7       	addsd  %xmm7,%xmm12
 11c:	f2 44 0f 58 e6       	addsd  %xmm6,%xmm12
 121:	f2 44 0f 58 e5       	addsd  %xmm5,%xmm12
 126:	f2 44 0f 58 e3       	addsd  %xmm3,%xmm12
 12b:	f2 44 0f 58 e2       	addsd  %xmm2,%xmm12
 130:	f2 44 0f 58 e1       	addsd  %xmm1,%xmm12
 135:	f2 44 0f 59 e4       	mulsd  %xmm4,%xmm12
 13a:	f2 45 0f 58 c4       	addsd  %xmm12,%xmm8
 13f:	f2 41 0f 58 c0       	addsd  %xmm8,%xmm0
 144:	66 44 0f 28 e0       	movapd %xmm0,%xmm12
 149:	f2 44 0f 58 e7       	addsd  %xmm7,%xmm12
 14e:	f2 44 0f 58 e6       	addsd  %xmm6,%xmm12
 153:	f2 44 0f 58 e5       	addsd  %xmm5,%xmm12
 158:	f2 44 0f 58 e3       	addsd  %xmm3,%xmm12
 15d:	f2 44 0f 58 e2       	addsd  %xmm2,%xmm12
 162:	f2 44 0f 58 e1       	addsd  %xmm1,%xmm12
 167:	f2 44 0f 59 e4       	mulsd  %xmm4,%xmm12
 16c:	f2 41 0f 58 fc       	addsd  %xmm12,%xmm7
 171:	f2 0f 58 c7          	addsd  %xmm7,%xmm0
 175:	66 44 0f 28 e0       	movapd %xmm0,%xmm12
 17a:	f2 44 0f 58 e6       	addsd  %xmm6,%xmm12
 17f:	f2 44 0f 58 e5       	addsd  %xmm5,%xmm12
 184:	f2 44 0f 58 e3       	addsd  %xmm3,%xmm12
 189:	f2 44 0f 58 e2       	addsd  %xmm2,%xmm12
 18e:	f2 44 0f 58 e1       	addsd  %xmm1,%xmm12
 193:	f2 44 0f 59 e4       	mulsd  %xmm4,%xmm12
 198:	f2 41 0f 58 f4       	addsd  %xmm12,%xmm6
 19d:	f2 0f 58 c6          	addsd  %xmm6,%xmm0
 1a1:	66 44 0f 28 e0       	movapd %xmm0,%xmm12
 1a6:	f2 44 0f 58 e5       	addsd  %xmm5,%xmm12
 1ab:	f2 44 0f 58 e3       	addsd  %xmm3,%xmm12
 1b0:	f2 44 0f 58 e2       	addsd  %xmm2,%xmm12
 1b5:	f2 44 0f 58 e1       	addsd  %xmm1,%xmm12
 1ba:	f2 44 0f 59 e4       	mulsd  %xmm4,%xmm12
 1bf:	f2 41 0f 58 ec       	addsd  %xmm12,%xmm5
 1c4:	f2 0f 58 c5          	addsd  %xmm5,%xmm0
 1c8:	66 44 0f 28 e0       	movapd %xmm0,%xmm12
 1cd:	f2 44 0f 58 e3       	addsd  %xmm3,%xmm12
 1d2:	f2 44 0f 58 e2       	addsd  %xmm2,%xmm12
 1d7:	f2 44 0f 58 e1       	addsd  %xmm1,%xmm12
 1dc:	f2 44 0f 59 e4       	mulsd  %xmm4,%xmm12
 1e1:	f2 41 0f 58 dc       	addsd  %xmm12,%xmm3
 1e6:	f2 0f 58 c3          	addsd  %xmm3,%xmm0
 1ea:	66 44 0f 28 e0       	movapd %xmm0,%xmm12
 1ef:	f2 44 0f 58 e2       	addsd  %xmm2,%xmm12
 1f4:	f2 44 0f 58 e1       	addsd  %xmm1,%xmm12
 1f9:	f2 44 0f 59 e4       	mulsd  %xmm4,%xmm12
 1fe:	f2 41 0f 58 d4       	addsd  %xmm12,%xmm2
 203:	f2 0f 58 c2          	addsd  %xmm2,%xmm0
 207:	66 44 0f 28 e0       	movapd %xmm0,%xmm12
 20c:	f2 44 0f 58 e1       	addsd  %xmm1,%xmm12
 211:	f2 41 0f 59 e4       	mulsd  %xmm12,%xmm4
 216:	f2 0f 58 cc          	addsd  %xmm4,%xmm1
 21a:	83 c0 01             	add    $0x1,%eax
 21d:	3d 40 42 0f 00       	cmp    $0xf4240,%eax
 222:	0f 85 2d fe ff ff    	jne    55 <main+0x55>
 228:	f2 0f 58 c1          	addsd  %xmm1,%xmm0
 22c:	f2 0f 58 05 00 00 00 	addsd  0x0(%rip),%xmm0        # 234 <main+0x234>
 233:	00 
 234:	f2 0f 2c c0          	cvttsd2si %xmm0,%eax
 238:	c3                   	retq   
  • Grow the working set by moving the comment markers, observe spills in the assembly listing
  • At what size of the working set do you expect to see no spills?