// nanoBench // // Copyright (C) 2019 Andreas Abel // // This program is free software: you can redistribute it and/or modify it under the terms of version 3 of the GNU Affero General Public License. // // This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License along with this program. If not, see . #define _GNU_SOURCE #include #include #include #include #include #include #include #include "../common/nanoBench.h" void print_usage() { printf("\n"); printf("nanoBench usage:\n"); printf("\n"); printf(" -code : Binary file containing the code to be benchmarked.\n"); printf(" -code_init : Binary file containing code to be executed once before each measurement.\n"); printf(" -code_late_init : Binary file containing code to be executed once immediately before the code to be benchmarked.\n"); printf(" -code_one_time_init : Binary file containing code to be executed once before the first measurement\n"); printf(" -config : File with performance counter event specifications.\n"); printf(" -fixed_counters: Reads the fixed-function performance counters.\n"); printf(" -n_measurements : Number of times the measurements are repeated.\n"); printf(" -unroll_count : Number of copies of the benchmark code inside the inner loop.\n"); printf(" -loop_count : Number of iterations of the inner loop.\n"); printf(" -warm_up_count : Number of runs before the first measurement gets recorded.\n"); printf(" -initial_warm_up_count : Number of runs before any measurement is performed.\n"); printf(" -alignment_offset : Alignment offset.\n"); printf(" -df: Drains front-end buffers between executing code_late_init and code.\n"); printf(" -avg: Selects the arithmetic mean as the aggregate function.\n"); printf(" -median: Selects the median as the aggregate function.\n"); printf(" -min: Selects the minimum as the aggregate function.\n"); printf(" -basic_mode: Enables basic mode.\n"); printf(" -no_mem: The code for reading the perf. ctrs. does not make memory accesses.\n"); printf(" -no_normalization: The measurement results are not divided by the number of repetitions.\n"); printf(" -verbose: Outputs the results of all performance counter readings.\n"); printf(" -cpu : Pins the measurement thread to CPU n. \n"); printf(" -usr : If 1, counts events at a privilege level greater than 0.\n"); printf(" -os : If 1, counts events at a privilege level 0.\n"); printf(" -debug: Generate a breakpoint trap after running the code to be benchmarked.\n"); } size_t mmap_file(char* filename, char** content) { int fd = open(filename, O_RDONLY); size_t len = lseek(fd, 0, SEEK_END); *content = mmap(0, len, PROT_READ, MAP_PRIVATE, fd, 0); if (*content == MAP_FAILED) { fprintf(stderr, "Error reading %s\n", filename); exit(1); } close(fd); return len; } int main(int argc, char **argv) { /************************************* * Parse command-line options ************************************/ char* config_file_name = NULL; bool usr = 1; bool os = 0; struct option long_opts[] = { {"code", required_argument, 0, 'c'}, {"code_init", required_argument, 0, 'i'}, {"code_late_init", required_argument, 0, 't'}, {"code_one_time_init", required_argument, 0, 'o'}, {"config", required_argument, 0, 'f'}, {"fixed_counters", no_argument, &use_fixed_counters, true}, {"n_measurements", required_argument, 0, 'n'}, {"unroll_count", required_argument, 0, 'u'}, {"loop_count", required_argument, 0, 'l'}, {"warm_up_count", required_argument, 0, 'w'}, {"initial_warm_up_count", required_argument, 0, 'a'}, {"alignment_offset", required_argument, 0, 'm'}, {"df", no_argument, &drain_frontend, true}, {"avg", no_argument, &aggregate_function, AVG_20_80}, {"median", no_argument, &aggregate_function, MED}, {"min", no_argument, &aggregate_function, MIN}, {"max", no_argument, &aggregate_function, MAX}, {"basic_mode", no_argument, &basic_mode, true}, {"no_mem", no_argument, &no_mem, true}, {"no_normalization", no_argument, &no_normalization, true}, {"verbose", no_argument, &verbose, true}, {"cpu", required_argument, 0, 'p'}, {"usr", required_argument, 0, 'r'}, {"os", required_argument, 0, 's'}, {"debug", no_argument, &debug, true}, {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; int option = 0; while ((option = getopt_long_only(argc, argv, "", long_opts, NULL)) != -1) { switch (option) { case 0: break; case 'c': code_length = mmap_file(optarg, &code); break; case 'i': code_init_length = mmap_file(optarg, &code_init); break; case 't': code_late_init_length = mmap_file(optarg, &code_late_init); break; case 'o': code_one_time_init_length = mmap_file(optarg, &code_one_time_init); break; case 'f': ; config_file_name = optarg; break; case 'n': n_measurements = atol(optarg); break; case 'u': unroll_count = atol(optarg); if (unroll_count <= 0) { fprintf(stderr, "Error: unroll_count must be > 0\n"); return 1; } break; case 'l': loop_count = atol(optarg); break; case 'w': warm_up_count = atol(optarg); break; case 'a': initial_warm_up_count = atol(optarg); break; case 'm': alignment_offset = (size_t)atol(optarg); break; case 'p': cpu = atol(optarg); break; case 'r': usr = atoi(optarg); break; case 's': os = atoi(optarg); break; default: print_usage(); return 1; } } /************************************* * Check CPUID and parse config file ************************************/ if (check_cpuid()) { return 1; } if (config_file_name) { char* config_mmap; size_t len = mmap_file(config_file_name, &config_mmap); pfc_config_file_content = calloc(len+1, sizeof(char)); memcpy(pfc_config_file_content, config_mmap, len); parse_counter_configs(); } /************************************* * Pin thread to CPU ************************************/ if (cpu == -1) { cpu = sched_getcpu(); } cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(cpu, &mask); if (sched_setaffinity(0, sizeof(cpu_set_t), &mask) == -1) { fprintf(stderr, "Error: Could not pin thread to core %d\n", cpu); return 1; } /************************************* * Allocate memory ************************************/ size_t runtime_code_length = get_required_runtime_code_length(); posix_memalign((void**)&runtime_code, sysconf(_SC_PAGESIZE), runtime_code_length); if (!runtime_code) { fprintf(stderr, "Error: Failed to allocate memory for runtime_code\n"); return 1; } if (mprotect(runtime_code, runtime_code_length, (PROT_READ | PROT_WRITE |PROT_EXEC))) { fprintf(stderr, "Error: mprotect failed\n"); return 1; } size_t runtime_one_time_init_code_length = code_one_time_init_length + 10000; posix_memalign((void**)&runtime_one_time_init_code, sysconf(_SC_PAGESIZE), runtime_one_time_init_code_length); if (!runtime_one_time_init_code) { fprintf(stderr, "Error: Failed to allocate memory for runtime_one_time_init_code\n"); return 1; } if (mprotect(runtime_one_time_init_code, runtime_one_time_init_code_length, (PROT_READ | PROT_WRITE |PROT_EXEC))) { fprintf(stderr, "Error: mprotect failed\n"); return 1; } posix_memalign((void**)&runtime_r14, sysconf(_SC_PAGESIZE), RUNTIME_R_SIZE); posix_memalign((void**)&runtime_rbp, sysconf(_SC_PAGESIZE), RUNTIME_R_SIZE); posix_memalign((void**)&runtime_rdi, sysconf(_SC_PAGESIZE), RUNTIME_R_SIZE); posix_memalign((void**)&runtime_rsi, sysconf(_SC_PAGESIZE), RUNTIME_R_SIZE); posix_memalign((void**)&runtime_rsp, sysconf(_SC_PAGESIZE), RUNTIME_R_SIZE); if (!runtime_r14 || !runtime_rbp || !runtime_rdi || !runtime_rsi || !runtime_rsp) { fprintf(stderr, "Error: Could not allocate memory for runtime_r*\n"); return 1; } runtime_r14 += RUNTIME_R_SIZE/2; runtime_rbp += RUNTIME_R_SIZE/2; runtime_rdi += RUNTIME_R_SIZE/2; runtime_rsi += RUNTIME_R_SIZE/2; runtime_rsp += RUNTIME_R_SIZE/2; for (int i=0; i= 4) { n_used_counters = 4; if (no_mem) { measurement_template = (char*)&measurement_template_Intel_noMem_4; } else { measurement_template = (char*)&measurement_template_Intel_4; } } else { n_used_counters = 2; if (no_mem) { measurement_template = (char*)&measurement_template_Intel_noMem_2; } else { measurement_template = (char*)&measurement_template_Intel_2; } } } size_t next_pfc_config = 0; while (next_pfc_config < n_pfc_configs) { char* pfc_descriptions[MAX_PROGRAMMABLE_COUNTERS] = {0}; next_pfc_config = configure_perf_ctrs_programmable(next_pfc_config, usr, os, n_used_counters, 0, pfc_descriptions); run_experiment(measurement_template, measurement_results_base, n_used_counters, base_unroll_count, base_loop_count); run_experiment(measurement_template, measurement_results, n_used_counters, main_unroll_count, main_loop_count); if (verbose) { printf("\nProgrammable counter results (unroll_count=%ld, loop_count=%ld):\n\n", base_unroll_count, base_loop_count); print_all_measurement_results(measurement_results_base, n_used_counters); printf("Programmable counter results (unroll_count=%ld, loop_count=%ld):\n\n", main_unroll_count, main_loop_count); print_all_measurement_results(measurement_results, n_used_counters); } for (size_t c=0; c < n_used_counters; c++) { if (pfc_descriptions[c]) printf("%s", compute_result_str(buf, sizeof(buf), pfc_descriptions[c], c)); } } /************************************* * Cleanup ************************************/ free(runtime_code); free(runtime_one_time_init_code); free(runtime_r14 - RUNTIME_R_SIZE/2); free(runtime_rbp - RUNTIME_R_SIZE/2); free(runtime_rdi - RUNTIME_R_SIZE/2); free(runtime_rsi - RUNTIME_R_SIZE/2); free(runtime_rsp - RUNTIME_R_SIZE/2); for (int i=0; i