// nanoBench // // Copyright (C) 2019 Andreas Abel // // This program is free software: you can redistribute it and/or modify it under the terms of version 3 of the GNU Affero General Public License. // // This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License along with this program. If not, see . #include #include #include #include #include #include #include #include #include #include <../arch/x86/include/asm/fpu/api.h> #if LINUX_VERSION_CODE <= KERNEL_VERSION(4,12,0) #include #elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) #include int (*set_memory_x)(unsigned long, int) = 0; int (*set_memory_nx)(unsigned long, int) = 0; #else #include #endif #include "../common/nanoBench.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andreas Abel"); // 4 Mb is the maximum that kmalloc supports on my machines #define KMALLOC_MAX (4*1024*1024) char* runtime_code_base = NULL; size_t code_offset = 0; size_t code_memory_size = 0; size_t code_init_memory_size = 0; size_t code_one_time_init_memory_size = 0; size_t pfc_config_memory_size = 0; size_t msr_config_memory_size = 0; size_t runtime_code_base_memory_size = 0; size_t runtime_one_time_init_code_memory_size = 0; void** r14_segments = NULL; size_t n_r14_segments = 0; static int read_file_into_buffer(const char *file_name, char **buf, size_t *buf_len, size_t *buf_memory_size) { struct file *filp = NULL; filp = filp_open(file_name, O_RDONLY, 0); if (IS_ERR(filp)) { pr_debug("Error opening file %s\n", file_name); return -1; } struct path p; struct kstat ks; kern_path(file_name, 0, &p); #if LINUX_VERSION_CODE <= KERNEL_VERSION(4,11,0) if (vfs_getattr(&p, &ks)) { #else if (vfs_getattr(&p, &ks, 0, 0)) { #endif pr_debug("Error getting file attributes\n"); return -1; } size_t file_size = ks.size; *buf_len = file_size; if (file_size + 1 > *buf_memory_size) { kfree(*buf); *buf_memory_size = max(2*(file_size + 1), PAGE_SIZE); *buf = kmalloc(*buf_memory_size, GFP_KERNEL); if (!*buf) { printk(KERN_ERR "Could not allocate memory for %s\n", file_name); *buf_memory_size = 0; filp_close(filp, NULL); return -1; } } loff_t pos = 0; kernel_read(filp, *buf, file_size, &pos); (*buf)[file_size] = '\0'; path_put(&p); filp_close(filp, NULL); return 0; } static ssize_t code_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return 0; } static ssize_t code_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { read_file_into_buffer(buf, &code, &code_length, &code_memory_size); return count; } static struct kobj_attribute code_attribute =__ATTR(code, 0660, code_show, code_store); static ssize_t init_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return 0; } static ssize_t init_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { read_file_into_buffer(buf, &code_init, &code_init_length, &code_init_memory_size); return count; } static struct kobj_attribute code_init_attribute =__ATTR(init, 0660, init_show, init_store); static ssize_t one_time_init_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return 0; } static ssize_t one_time_init_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { read_file_into_buffer(buf, &code_one_time_init, &code_one_time_init_length, &code_one_time_init_memory_size); size_t new_runtime_one_time_init_code_memory_size = 10000 + code_one_time_init_memory_size; if (new_runtime_one_time_init_code_memory_size > runtime_one_time_init_code_memory_size) { runtime_one_time_init_code_memory_size = new_runtime_one_time_init_code_memory_size; vfree(runtime_one_time_init_code); runtime_one_time_init_code = __vmalloc(runtime_one_time_init_code_memory_size, GFP_KERNEL, PAGE_KERNEL_EXEC); if (!runtime_one_time_init_code) { runtime_one_time_init_code_memory_size = 0; pr_debug("failed to allocate executable memory\n"); } } return count; } static struct kobj_attribute code_one_time_init_attribute =__ATTR(one_time_init, 0660, one_time_init_show, one_time_init_store); static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ssize_t count = 0; for (int i=0; i PAGE_SIZE) { return PAGE_SIZE-1; } } return count; } static ssize_t config_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { size_t pfc_config_length; read_file_into_buffer(buf, &pfc_config_file_content, &pfc_config_length, &pfc_config_memory_size); parse_counter_configs(); return count; } static struct kobj_attribute config_attribute =__ATTR(config, 0660, config_show, config_store); static ssize_t msr_config_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ssize_t count = 0; for (int i=0; i 0) { for (int i=0; i runtime_code_base_memory_size) { printk(KERN_ERR "Maximum supported code size %zu kB; requested %zu kB\n", runtime_code_base_memory_size/1024, req_code_length/1024); return -1; } runtime_code = runtime_code_base + code_offset; kernel_fpu_begin(); long base_unroll_count = (basic_mode?0:unroll_count); long main_unroll_count = (basic_mode?unroll_count:2*unroll_count); long base_loop_count = (basic_mode?0:loop_count); long main_loop_count = loop_count; char buf[100]; char* measurement_template; /********************************* * Fixed-function counters. ********************************/ if (is_AMD_CPU) { if (no_mem) { measurement_template = (char*)&measurement_FF_template_AMD_noMem; } else { measurement_template = (char*)&measurement_FF_template_AMD; } } else { if (no_mem) { measurement_template = (char*)&measurement_FF_template_Intel_noMem; } else { measurement_template = (char*)&measurement_FF_template_Intel; } } configure_perf_ctrs_FF(0, 1); create_and_run_one_time_init_code(); run_warmup_experiment(measurement_template); if (is_AMD_CPU) { run_experiment(measurement_template, measurement_results_base, 3, base_unroll_count, base_loop_count); run_experiment(measurement_template, measurement_results, 3, main_unroll_count, main_loop_count); if (verbose) { pr_debug("\nRDTSC, MPERF, and APERF results (unroll_count=%ld, loop_count=%ld):\n\n", base_unroll_count, base_loop_count); print_all_measurement_results(measurement_results_base, 3); pr_debug("RDTSC, MPERF, and and APERF results (unroll_count=%ld, loop_count=%ld):\n\n", main_unroll_count, main_loop_count); print_all_measurement_results(measurement_results, 3); } seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "RDTSC", 0)); seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "MPERF", 1)); seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "APERF", 2)); } else { run_experiment(measurement_template, measurement_results_base, 4, base_unroll_count, base_loop_count); run_experiment(measurement_template, measurement_results, 4, main_unroll_count, main_loop_count); if (verbose) { pr_debug("\nRDTSC and fixed-function counter results (unroll_count=%ld, loop_count=%ld):\n\n", base_unroll_count, base_loop_count); print_all_measurement_results(measurement_results_base, 4); pr_debug("RDTSC and fixed-function counter results (unroll_count=%ld, loop_count=%ld):\n\n", main_unroll_count, main_loop_count); print_all_measurement_results(measurement_results, 4); } seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "RDTSC", 0)); seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "Instructions retired", 1)); seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "Core cycles", 2)); seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "Reference cycles", 3)); } /********************************* * Programmable counters. ********************************/ if (is_AMD_CPU) { if (no_mem) { measurement_template = (char*)&measurement_template_AMD_noMem; } else { measurement_template = (char*)&measurement_template_AMD; } } else { if (no_mem) { if (n_programmable_counters >= 4) { measurement_template = (char*)&measurement_template_Intel_noMem_4; } else { measurement_template = (char*)&measurement_template_Intel_noMem_2; } } else { if (n_programmable_counters >= 4) { measurement_template = (char*)&measurement_template_Intel_4; } else { measurement_template = (char*)&measurement_template_Intel_2; } } } for (size_t i=0; i= KERNEL_VERSION(5, 4, 0) set_memory_x = (void*)kallsyms_lookup_name("set_memory_x"); set_memory_nx = (void*)kallsyms_lookup_name("set_memory_nx"); #endif if (check_cpuid()) { return -1; } for (int i=0; iparent); if (!nb_kobject) { pr_debug("failed to create and add nb\n"); return -1; } int error = sysfs_create_file(nb_kobject, &clear_attribute.attr); error |= sysfs_create_file(nb_kobject, &reset_attribute.attr); error |= sysfs_create_file(nb_kobject, &code_attribute.attr); error |= sysfs_create_file(nb_kobject, &code_init_attribute.attr); error |= sysfs_create_file(nb_kobject, &code_one_time_init_attribute.attr); error |= sysfs_create_file(nb_kobject, &config_attribute.attr); error |= sysfs_create_file(nb_kobject, &msr_config_attribute.attr); error |= sysfs_create_file(nb_kobject, &loop_count_attribute.attr); error |= sysfs_create_file(nb_kobject, &unroll_count_attribute.attr); error |= sysfs_create_file(nb_kobject, &n_measurements_attribute.attr); error |= sysfs_create_file(nb_kobject, &warm_up_attribute.attr); error |= sysfs_create_file(nb_kobject, &initial_warm_up_attribute.attr); error |= sysfs_create_file(nb_kobject, &alignment_offset_attribute.attr); error |= sysfs_create_file(nb_kobject, &agg_attribute.attr); error |= sysfs_create_file(nb_kobject, &basic_mode_attribute.attr); error |= sysfs_create_file(nb_kobject, &no_mem_attribute.attr); error |= sysfs_create_file(nb_kobject, &no_normalization_attribute.attr); error |= sysfs_create_file(nb_kobject, &r14_size_attribute.attr); error |= sysfs_create_file(nb_kobject, &print_r14_attribute.attr); error |= sysfs_create_file(nb_kobject, &code_offset_attribute.attr); error |= sysfs_create_file(nb_kobject, &verbose_attribute.attr); if (error) { pr_debug("failed to create file in /sys/nb/\n"); return error; } struct proc_dir_entry* proc_file_entry = proc_create("nanoBench", 0, NULL, &proc_file_fops); if(proc_file_entry == NULL) { pr_debug("failed to create file in /proc/\n"); return -1; } return 0; } static void __exit nb_exit(void) { kfree(code); kfree(code_init); kfree(code_one_time_init); kfree(pfc_config_file_content); kfree(msr_config_file_content); vfree(runtime_one_time_init_code); vfree(runtime_rbp - RUNTIME_R_SIZE/2); vfree(runtime_rdi - RUNTIME_R_SIZE/2); vfree(runtime_rsi - RUNTIME_R_SIZE/2); vfree(runtime_rsp - RUNTIME_R_SIZE/2); if (runtime_code_base) { set_memory_nx((unsigned long)runtime_code_base, runtime_code_base_memory_size/PAGE_SIZE); kfree(runtime_code_base); } if (n_r14_segments > 0) { for (int i=0; i