Why does this memory intensive C++ program get poor memory access speed?
 
MemTest86 and it showed:
Intel Core-i5 750 2.67 Ghz (quad core)
  32K L1 88,893 MB/Sec
 256K L2 37,560 MB/Sec
 8 MB L3 26,145 MB/Sec
 8.0 GB RAM 11,852 MB/Sec
The resulting memory access speed is substantially slower 
than worst case cache hit ratio should provide.
For example I am often getting 117 MB/Sec.  It was compiled 
with the optimize for speed flags under VS C++ 9.0
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#include <time.h>
typedef unsigned int uint32;
uint32 Max = 0x2fffffff;
uint32 GetRandom(uint32 size) {
  return (rand() * (RAND_MAX + 1) + rand()) % size;
}
void Initialize(std::vector<uint32>& Data, uint32 size) {
  for (uint32 N = 0; N < size; N++)
    Data[N] = GetRandom(size);
}
double Process(uint32 size, uint32 RandomSeed = 0) {
  std::vector<uint32> Data;
  double MBperSec;
  double duration;
  clock_t finish;
  Data.resize(size);
  Initialize(Data, size);
  clock_t start = clock();
  uint32 num = 0;
  for (uint32 N = 0; N < Max; N++)
    num = Data[num];
  finish = clock();
  duration = (double)(finish - start) / CLOCKS_PER_SEC;
  MBperSec = (double)(Max * 4) / (duration * 1024 * 1024);
  printf("%4d MegaBytes   %7.2f Seconds   %7.2f Megbytes per 
Second\n",
         (size*4) / 1048576, duration, MBperSec);
  return MBperSec;
}
int main() {
  uint32 Seed = (unsigned)time( NULL );
  Seed = 0x4bae27d4;
  srand(Seed);
  printf("Random Number Seed--->%x\n", Seed);
  for (uint32 size = 13107200; size <= 268435456; size += 
13107200) {
    double AverageMBperSec = 0;
    for (int N = 1; N <= 10; N++)
      AverageMBperSec += Process(size);
    printf("Average Megabytes per Second--->%7.2f\n\n", 
AverageMBperSec / 10.0 );
  }
  return 0;
}