mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2026-01-06 12:20:19 +01:00
various improvements
This commit is contained in:
@@ -29,23 +29,30 @@ size_t code_length = 0;
|
||||
char* code_init = NULL;
|
||||
size_t code_init_length = 0;
|
||||
|
||||
char* code_one_time_init = NULL;
|
||||
size_t code_one_time_init_length = 0;
|
||||
|
||||
struct pfc_config pfc_configs[1000] = {{0}};
|
||||
size_t n_pfc_configs = 0;
|
||||
|
||||
char* pfc_config_file_content = NULL;
|
||||
|
||||
struct msr_config msr_configs[1000] = {{0}};
|
||||
size_t n_msr_configs = 0;
|
||||
char* msr_config_file_content = NULL;
|
||||
unsigned long cur_rdmsr = 0;
|
||||
|
||||
int is_Intel_CPU = 0;
|
||||
int is_AMD_CPU = 0;
|
||||
|
||||
int n_programmable_counters;
|
||||
|
||||
char* runtime_code;
|
||||
char* runtime_one_time_init_code;
|
||||
void* runtime_r14;
|
||||
void* runtime_rbp;
|
||||
void* runtime_rdi;
|
||||
void* runtime_rsi;
|
||||
void* runtime_rsp;
|
||||
void* huge_pages = NULL;
|
||||
int64_t pfc_mem[MAX_PROGRAMMABLE_COUNTERS];
|
||||
void* RSP_mem;
|
||||
|
||||
@@ -199,6 +206,53 @@ void parse_counter_configs() {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __KERNEL__
|
||||
void parse_msr_configs() {
|
||||
n_msr_configs = 0;
|
||||
|
||||
char* line;
|
||||
char* next_line = msr_config_file_content;
|
||||
while ((line = strsep(&next_line, "\n")) != NULL) {
|
||||
if (strlen(line) == 0 || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
char* wrmsr_str = strsep(&line, " \t");
|
||||
|
||||
char* rdmsr_str = strsep(&line, " \t");
|
||||
strreplace(rdmsr_str, 'h', '\0'); strreplace(rdmsr_str, 'H', '\0');
|
||||
|
||||
if (line && strlen(line) > 0) {
|
||||
msr_configs[n_msr_configs].description = line;
|
||||
} else {
|
||||
msr_configs[n_msr_configs].description = rdmsr_str;
|
||||
}
|
||||
|
||||
nb_strtoul(rdmsr_str+4, 16, &(msr_configs[n_msr_configs].rdmsr));
|
||||
|
||||
size_t n_wrmsr = 0;
|
||||
char* tok = wrmsr_str;
|
||||
char* ce;
|
||||
while ((ce = strsep(&tok, ".")) != NULL) {
|
||||
if (n_wrmsr >= 10) {
|
||||
print_error("Error: n_wrmsr >= 10");
|
||||
break;
|
||||
}
|
||||
|
||||
char* msr_str = strsep(&ce, "=")+4;
|
||||
pr_debug("msr_str: %s", msr_str);
|
||||
strreplace(msr_str, 'h', '\0'); strreplace(msr_str, 'H', '\0');
|
||||
nb_strtoul(msr_str, 16, &(msr_configs[n_msr_configs].wrmsr[n_wrmsr]));
|
||||
strreplace(ce, 'h', '\0'); strreplace(ce, 'H', '\0');
|
||||
nb_strtoul(ce, 0, &(msr_configs[n_msr_configs].wrmsr_val[n_wrmsr]));
|
||||
n_wrmsr++;
|
||||
}
|
||||
msr_configs[n_msr_configs].n_wrmsr = n_wrmsr;
|
||||
n_msr_configs++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __KERNEL__
|
||||
uint64_t read_value_from_cmd(char* cmd) {
|
||||
FILE* fp;
|
||||
@@ -340,6 +394,13 @@ void configure_perf_ctrs_programmable(int start, int end, unsigned int usr, unsi
|
||||
}
|
||||
}
|
||||
|
||||
void configure_MSRs(struct msr_config config) {
|
||||
for (size_t i=0; i<config.n_wrmsr; i++) {
|
||||
write_msr(config.wrmsr[i], config.wrmsr_val[i]);
|
||||
}
|
||||
cur_rdmsr = config.rdmsr;
|
||||
}
|
||||
|
||||
void create_runtime_code(char* measurement_template, long local_unroll_count, long local_loop_count) {
|
||||
int templateI = 0;
|
||||
int rci = 0;
|
||||
@@ -377,39 +438,30 @@ void create_runtime_code(char* measurement_template, long local_unroll_count, lo
|
||||
}
|
||||
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_PFC)) {
|
||||
*(void**)(&runtime_code[rci]) = pfc_mem;
|
||||
templateI += 8;
|
||||
rci += 8;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_MSR)) {
|
||||
*(void**)(&runtime_code[rci]) = (void*)cur_rdmsr;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_RSP_ADDRESS)) {
|
||||
*(void**)(&runtime_code[rci]) = &RSP_mem;
|
||||
templateI += 8;
|
||||
rci += 8;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_RUNTIME_R14)) {
|
||||
if (huge_pages) {
|
||||
*(void**)(&runtime_code[rci]) = huge_pages;
|
||||
} else {
|
||||
*(void**)(&runtime_code[rci]) = runtime_r14 + RUNTIME_R_SIZE/2;
|
||||
}
|
||||
templateI += 8;
|
||||
rci += 8;
|
||||
*(void**)(&runtime_code[rci]) = runtime_r14;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_RUNTIME_RBP)) {
|
||||
*(void**)(&runtime_code[rci]) = runtime_rbp + RUNTIME_R_SIZE/2;
|
||||
templateI += 8;
|
||||
rci += 8;
|
||||
*(void**)(&runtime_code[rci]) = runtime_rbp;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_RUNTIME_RDI)) {
|
||||
*(void**)(&runtime_code[rci]) = runtime_rdi + RUNTIME_R_SIZE/2;
|
||||
templateI += 8;
|
||||
rci += 8;
|
||||
*(void**)(&runtime_code[rci]) = runtime_rdi;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_RUNTIME_RSI)) {
|
||||
*(void**)(&runtime_code[rci]) = runtime_rsi + RUNTIME_R_SIZE/2;
|
||||
templateI += 8;
|
||||
rci += 8;
|
||||
*(void**)(&runtime_code[rci]) = runtime_rsi;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_RUNTIME_RSP)) {
|
||||
*(void**)(&runtime_code[rci]) = runtime_rsp + RUNTIME_R_SIZE/2;
|
||||
templateI += 8;
|
||||
rci += 8;
|
||||
*(void**)(&runtime_code[rci]) = runtime_rsp;
|
||||
templateI += 8; rci += 8;
|
||||
} else {
|
||||
runtime_code[rci++] = measurement_template[templateI];
|
||||
templateI++;
|
||||
runtime_code[rci++] = measurement_template[templateI++];
|
||||
}
|
||||
}
|
||||
templateI += 8;
|
||||
@@ -418,6 +470,48 @@ void create_runtime_code(char* measurement_template, long local_unroll_count, lo
|
||||
} while (measurement_template[templateI-1] != '\xC3'); // 0xC3 = ret
|
||||
}
|
||||
|
||||
void create_and_run_one_time_init_code() {
|
||||
if (code_one_time_init_length == 0) return;
|
||||
|
||||
char* template = (char*)&one_time_init_template;
|
||||
int templateI = 0;
|
||||
int rci = 0;
|
||||
|
||||
while (!starts_with_magic_bytes(&template[templateI], MAGIC_BYTES_TEMPLATE_END)) {
|
||||
if (starts_with_magic_bytes(&template[templateI], MAGIC_BYTES_INIT)) {
|
||||
templateI += 8;
|
||||
memcpy(&runtime_one_time_init_code[rci], code_one_time_init, code_one_time_init_length);
|
||||
rci += code_one_time_init_length;
|
||||
} else if (starts_with_magic_bytes(&template[templateI], MAGIC_BYTES_RSP_ADDRESS)) {
|
||||
*(void**)(&runtime_one_time_init_code[rci]) = &RSP_mem;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&template[templateI], MAGIC_BYTES_RUNTIME_R14)) {
|
||||
*(void**)(&runtime_one_time_init_code[rci]) = runtime_r14;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&template[templateI], MAGIC_BYTES_RUNTIME_RBP)) {
|
||||
*(void**)(&runtime_one_time_init_code[rci]) = runtime_rbp;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&template[templateI], MAGIC_BYTES_RUNTIME_RDI)) {
|
||||
*(void**)(&runtime_one_time_init_code[rci]) = runtime_rdi;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&template[templateI], MAGIC_BYTES_RUNTIME_RSI)) {
|
||||
*(void**)(&runtime_one_time_init_code[rci]) = runtime_rsi;
|
||||
templateI += 8; rci += 8;
|
||||
} else if (starts_with_magic_bytes(&template[templateI], MAGIC_BYTES_RUNTIME_RSP)) {
|
||||
*(void**)(&runtime_one_time_init_code[rci]) = runtime_rsp;
|
||||
templateI += 8; rci += 8;
|
||||
} else {
|
||||
runtime_one_time_init_code[rci++] = template[templateI++];
|
||||
}
|
||||
}
|
||||
templateI += 8;
|
||||
do {
|
||||
runtime_one_time_init_code[rci++] = template[templateI++];
|
||||
} while (template[templateI-1] != '\xC3'); // 0xC3 = ret
|
||||
|
||||
((void(*)(void))runtime_one_time_init_code)();
|
||||
}
|
||||
|
||||
void run_warmup_experiment(char* measurement_template) {
|
||||
if (!initial_warm_up_count) return;
|
||||
|
||||
@@ -1112,3 +1206,79 @@ void measurement_RDTSC_template_noMem() {
|
||||
RESTORE_REGS_FLAGS();
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_TEMPLATE_END));
|
||||
}
|
||||
|
||||
void measurement_RDMSR_template() {
|
||||
SAVE_REGS_FLAGS();
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_INIT));
|
||||
asm volatile(
|
||||
".intel_syntax noprefix \n"
|
||||
"push rax \n"
|
||||
"lahf \n"
|
||||
"seto al \n"
|
||||
"push rax \n"
|
||||
"push rcx \n"
|
||||
"push rdx \n"
|
||||
"push r15 \n"
|
||||
"mov r15, "STRINGIFY(MAGIC_BYTES_PFC)" \n"
|
||||
"mov qword ptr [r15], 0 \n"
|
||||
"mov rcx, "STRINGIFY(MAGIC_BYTES_MSR)" \n"
|
||||
"lfence; rdmsr; lfence \n"
|
||||
"shl rdx, 32; or rdx, rax \n"
|
||||
"sub [r15], rdx \n"
|
||||
"lfence \n"
|
||||
"pop r15; lfence \n"
|
||||
"pop rdx; lfence \n"
|
||||
"pop rcx; lfence \n"
|
||||
"pop rax; lfence \n"
|
||||
"cmp al, -127; lfence \n"
|
||||
"sahf; lfence \n"
|
||||
"pop rax; \n"
|
||||
"lfence \n"
|
||||
".att_syntax noprefix ");
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_CODE));
|
||||
asm volatile(
|
||||
".intel_syntax noprefix \n"
|
||||
"lfence \n"
|
||||
"mov rcx, "STRINGIFY(MAGIC_BYTES_MSR)" \n"
|
||||
"lfence; rdmsr; lfence \n"
|
||||
"shl rdx, 32; or rdx, rax \n"
|
||||
"mov r15, "STRINGIFY(MAGIC_BYTES_PFC)" \n"
|
||||
"add [r15], rdx \n"
|
||||
".att_syntax noprefix ");
|
||||
RESTORE_REGS_FLAGS();
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_TEMPLATE_END));
|
||||
}
|
||||
|
||||
void measurement_RDMSR_template_noMem() {
|
||||
SAVE_REGS_FLAGS();
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_INIT));
|
||||
asm volatile(
|
||||
".intel_syntax noprefix \n"
|
||||
"mov r8, 0 \n"
|
||||
"mov rcx, "STRINGIFY(MAGIC_BYTES_MSR)" \n"
|
||||
"lfence; rdmsr; lfence \n"
|
||||
"shl rdx, 32; or rdx, rax \n"
|
||||
"sub r8, rdx \n"
|
||||
"lfence \n"
|
||||
".att_syntax noprefix ");
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_CODE));
|
||||
asm volatile(
|
||||
".intel_syntax noprefix \n"
|
||||
"lfence \n"
|
||||
"mov rcx, "STRINGIFY(MAGIC_BYTES_MSR)" \n"
|
||||
"lfence; rdmsr; lfence \n"
|
||||
"shl rdx, 32; or rdx, rax \n"
|
||||
"add r8, rdx \n"
|
||||
"mov r15, "STRINGIFY(MAGIC_BYTES_PFC)" \n"
|
||||
"mov [r15], r8 \n"
|
||||
".att_syntax noprefix ");
|
||||
RESTORE_REGS_FLAGS();
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_TEMPLATE_END));
|
||||
}
|
||||
|
||||
void one_time_init_template() {
|
||||
SAVE_REGS_FLAGS();
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_INIT));
|
||||
RESTORE_REGS_FLAGS();
|
||||
asm(".quad "STRINGIFY(MAGIC_BYTES_TEMPLATE_END));
|
||||
}
|
||||
@@ -120,6 +120,9 @@ extern size_t code_length;
|
||||
extern char* code_init;
|
||||
extern size_t code_init_length;
|
||||
|
||||
extern char* code_one_time_init;
|
||||
extern size_t code_one_time_init_length;
|
||||
|
||||
struct pfc_config {
|
||||
unsigned long evt_num;
|
||||
unsigned long umask;
|
||||
@@ -134,33 +137,41 @@ struct pfc_config {
|
||||
unsigned int invalid;
|
||||
char* description;
|
||||
};
|
||||
|
||||
extern struct pfc_config pfc_configs[];
|
||||
extern size_t n_pfc_configs;
|
||||
|
||||
extern char* pfc_config_file_content;
|
||||
|
||||
struct msr_config {
|
||||
unsigned long rdmsr;
|
||||
unsigned long wrmsr[10];
|
||||
unsigned long wrmsr_val[10];
|
||||
size_t n_wrmsr;
|
||||
char* description;
|
||||
};
|
||||
extern struct msr_config msr_configs[];
|
||||
extern size_t n_msr_configs;
|
||||
extern char* msr_config_file_content;
|
||||
|
||||
extern int is_Intel_CPU;
|
||||
extern int is_AMD_CPU;
|
||||
|
||||
#define MAX_PROGRAMMABLE_COUNTERS 6
|
||||
extern int n_programmable_counters;
|
||||
|
||||
// Pointer to a memory region that is writable and executable.
|
||||
// Pointers to a memory regions that are writable and executable.
|
||||
extern char* runtime_code;
|
||||
extern char* runtime_one_time_init_code;
|
||||
|
||||
#define RUNTIME_R_SIZE (1024*1024)
|
||||
|
||||
// During measurements, R14, RBP, RDI, RSI, and RSP will contain these addresses plus RUNTIME_R_SIZE/2.
|
||||
// If r14_size is set in the kernel module, R14 will not have this offset.
|
||||
extern void* runtime_r14;
|
||||
extern void* runtime_rbp;
|
||||
extern void* runtime_rdi;
|
||||
extern void* runtime_rsi;
|
||||
extern void* runtime_rsp;
|
||||
|
||||
// If non-null, R14 will contain this address instead of runtime_r14.
|
||||
extern void* huge_pages;
|
||||
|
||||
// Stores performance counter values during measurements.
|
||||
extern int64_t pfc_mem[MAX_PROGRAMMABLE_COUNTERS];
|
||||
|
||||
@@ -178,6 +189,7 @@ extern int cpu;
|
||||
int check_cpuid(void);
|
||||
|
||||
void parse_counter_configs(void);
|
||||
void parse_msr_configs(void);
|
||||
|
||||
uint64_t read_value_from_cmd(char* cmd);
|
||||
|
||||
@@ -191,10 +203,12 @@ void configure_perf_ctrs_FF(unsigned int usr, unsigned int os);
|
||||
// start and end are indices into the pfc_configs array.
|
||||
void configure_perf_ctrs_programmable(int start, int end, unsigned int usr, unsigned int os);
|
||||
|
||||
void configure_MSRs(struct msr_config config);
|
||||
|
||||
void create_runtime_code(char* measurement_template, long local_unroll_count, long local_loop_count);
|
||||
void run_warmup_experiment(char* measurement_template);
|
||||
void run_experiment(char* measurement_template, int64_t* results[], int n_counters, long local_unroll_count, long local_loop_count);
|
||||
void create_and_run_one_time_init_code(void);
|
||||
|
||||
char* compute_result_str(char* buf, size_t buf_len, char* desc, int counter);
|
||||
int64_t get_aggregate_value_100(int64_t* values, size_t length);
|
||||
@@ -213,7 +227,8 @@ void print_all_measurement_results(int64_t* results[], int n_counters);
|
||||
#define MAGIC_BYTES_RUNTIME_RSI 0x70b513b1C2813F04
|
||||
#define MAGIC_BYTES_RUNTIME_RSP 0x80b513b1C2813F04
|
||||
#define MAGIC_BYTES_PFC 0x90b513b1C2813F04
|
||||
#define MAGIC_BYTES_TEMPLATE_END 0xA0b513b1C2813F04
|
||||
#define MAGIC_BYTES_MSR 0xA0b513b1C2813F04
|
||||
#define MAGIC_BYTES_TEMPLATE_END 0xB0b513b1C2813F04
|
||||
|
||||
#define STRINGIFY2(X) #X
|
||||
#define STRINGIFY(X) STRINGIFY2(X)
|
||||
@@ -231,6 +246,9 @@ void measurement_FF_template_AMD(void);
|
||||
void measurement_FF_template_AMD_noMem(void);
|
||||
void measurement_RDTSC_template(void);
|
||||
void measurement_RDTSC_template_noMem(void);
|
||||
void measurement_RDMSR_template(void);
|
||||
void measurement_RDMSR_template_noMem(void);
|
||||
void one_time_init_template(void);
|
||||
|
||||
// RBX, RBP, and R12–R15 are callee saved registers according to the "System V AMD64 ABI" (https://en.wikipedia.org/wiki/X86_calling_conventions)
|
||||
#define SAVE_REGS_FLAGS() \
|
||||
|
||||
Reference in New Issue
Block a user