// nanoBench
//
// Copyright (C) 2019 Andreas Abel
//
// This program is free software: you can redistribute it and/or modify it under the terms of version 3 of the GNU Affero General Public License.
//
// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License along with this program. If not, see .
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include <../arch/x86/include/asm/fpu/api.h>
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,12,0)
#include
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)
#include
int (*set_memory_x)(unsigned long, int) = 0;
int (*set_memory_nx)(unsigned long, int) = 0;
#else
#include
#endif
#include "../common/nanoBench.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Andreas Abel");
// 4 Mb is the maximum that kmalloc supports on my machines
#define KMALLOC_MAX (4*1024*1024)
char* runtime_code_base = NULL;
size_t code_offset = 0;
size_t code_memory_size = 0;
size_t code_init_memory_size = 0;
size_t code_one_time_init_memory_size = 0;
size_t pfc_config_memory_size = 0;
size_t msr_config_memory_size = 0;
size_t runtime_code_base_memory_size = 0;
size_t runtime_one_time_init_code_memory_size = 0;
void** r14_segments = NULL;
size_t n_r14_segments = 0;
static int read_file_into_buffer(const char *file_name, char **buf, size_t *buf_len, size_t *buf_memory_size) {
struct file *filp = NULL;
filp = filp_open(file_name, O_RDONLY, 0);
if (IS_ERR(filp)) {
pr_debug("Error opening file %s\n", file_name);
return -1;
}
struct path p;
struct kstat ks;
kern_path(file_name, 0, &p);
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,11,0)
if (vfs_getattr(&p, &ks)) {
#else
if (vfs_getattr(&p, &ks, 0, 0)) {
#endif
pr_debug("Error getting file attributes\n");
return -1;
}
size_t file_size = ks.size;
*buf_len = file_size;
if (file_size + 1 > *buf_memory_size) {
kfree(*buf);
*buf_memory_size = max(2*(file_size + 1), PAGE_SIZE);
*buf = kmalloc(*buf_memory_size, GFP_KERNEL);
if (!*buf) {
printk(KERN_ERR "Could not allocate memory for %s\n", file_name);
*buf_memory_size = 0;
filp_close(filp, NULL);
return -1;
}
}
loff_t pos = 0;
kernel_read(filp, *buf, file_size, &pos);
(*buf)[file_size] = '\0';
path_put(&p);
filp_close(filp, NULL);
return 0;
}
static ssize_t code_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
return 0;
}
static ssize_t code_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
read_file_into_buffer(buf, &code, &code_length, &code_memory_size);
return count;
}
static struct kobj_attribute code_attribute =__ATTR(code, 0660, code_show, code_store);
static ssize_t init_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
return 0;
}
static ssize_t init_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
read_file_into_buffer(buf, &code_init, &code_init_length, &code_init_memory_size);
return count;
}
static struct kobj_attribute code_init_attribute =__ATTR(init, 0660, init_show, init_store);
static ssize_t one_time_init_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
return 0;
}
static ssize_t one_time_init_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
read_file_into_buffer(buf, &code_one_time_init, &code_one_time_init_length, &code_one_time_init_memory_size);
size_t new_runtime_one_time_init_code_memory_size = 10000 + code_one_time_init_memory_size;
if (new_runtime_one_time_init_code_memory_size > runtime_one_time_init_code_memory_size) {
runtime_one_time_init_code_memory_size = new_runtime_one_time_init_code_memory_size;
vfree(runtime_one_time_init_code);
runtime_one_time_init_code = __vmalloc(runtime_one_time_init_code_memory_size, GFP_KERNEL, PAGE_KERNEL_EXEC);
if (!runtime_one_time_init_code) {
runtime_one_time_init_code_memory_size = 0;
pr_debug("failed to allocate executable memory\n");
}
}
return count;
}
static struct kobj_attribute code_one_time_init_attribute =__ATTR(one_time_init, 0660, one_time_init_show, one_time_init_store);
static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
ssize_t count = 0;
for (int i=0; i PAGE_SIZE) {
return PAGE_SIZE-1;
}
}
return count;
}
static ssize_t config_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
size_t pfc_config_length;
read_file_into_buffer(buf, &pfc_config_file_content, &pfc_config_length, &pfc_config_memory_size);
parse_counter_configs();
return count;
}
static struct kobj_attribute config_attribute =__ATTR(config, 0660, config_show, config_store);
static ssize_t msr_config_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
ssize_t count = 0;
for (int i=0; i 0) {
for (int i=0; i runtime_code_base_memory_size) {
printk(KERN_ERR "Maximum supported code size %zu kB; requested %zu kB\n", runtime_code_base_memory_size/1024, req_code_length/1024);
return -1;
}
runtime_code = runtime_code_base + code_offset;
kernel_fpu_begin();
long base_unroll_count = (basic_mode?0:unroll_count);
long main_unroll_count = (basic_mode?unroll_count:2*unroll_count);
long base_loop_count = (basic_mode?0:loop_count);
long main_loop_count = loop_count;
char buf[100];
char* measurement_template;
/*********************************
* Fixed-function counters.
********************************/
if (is_AMD_CPU) {
if (no_mem) {
measurement_template = (char*)&measurement_FF_template_AMD_noMem;
} else {
measurement_template = (char*)&measurement_FF_template_AMD;
}
} else {
if (no_mem) {
measurement_template = (char*)&measurement_FF_template_Intel_noMem;
} else {
measurement_template = (char*)&measurement_FF_template_Intel;
}
}
configure_perf_ctrs_FF(0, 1);
create_and_run_one_time_init_code();
run_warmup_experiment(measurement_template);
if (is_AMD_CPU) {
run_experiment(measurement_template, measurement_results_base, 3, base_unroll_count, base_loop_count);
run_experiment(measurement_template, measurement_results, 3, main_unroll_count, main_loop_count);
if (verbose) {
pr_debug("\nRDTSC, MPERF, and APERF results (unroll_count=%ld, loop_count=%ld):\n\n", base_unroll_count, base_loop_count);
print_all_measurement_results(measurement_results_base, 3);
pr_debug("RDTSC, MPERF, and and APERF results (unroll_count=%ld, loop_count=%ld):\n\n", main_unroll_count, main_loop_count);
print_all_measurement_results(measurement_results, 3);
}
seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "RDTSC", 0));
seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "MPERF", 1));
seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "APERF", 2));
} else {
run_experiment(measurement_template, measurement_results_base, 4, base_unroll_count, base_loop_count);
run_experiment(measurement_template, measurement_results, 4, main_unroll_count, main_loop_count);
if (verbose) {
pr_debug("\nRDTSC and fixed-function counter results (unroll_count=%ld, loop_count=%ld):\n\n", base_unroll_count, base_loop_count);
print_all_measurement_results(measurement_results_base, 4);
pr_debug("RDTSC and fixed-function counter results (unroll_count=%ld, loop_count=%ld):\n\n", main_unroll_count, main_loop_count);
print_all_measurement_results(measurement_results, 4);
}
seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "RDTSC", 0));
seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "Instructions retired", 1));
seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "Core cycles", 2));
seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), "Reference cycles", 3));
}
/*********************************
* Programmable counters.
********************************/
if (is_AMD_CPU) {
if (no_mem) {
measurement_template = (char*)&measurement_template_AMD_noMem;
} else {
measurement_template = (char*)&measurement_template_AMD;
}
} else {
if (no_mem) {
if (n_programmable_counters >= 4) {
measurement_template = (char*)&measurement_template_Intel_noMem_4;
} else {
measurement_template = (char*)&measurement_template_Intel_noMem_2;
}
} else {
if (n_programmable_counters >= 4) {
measurement_template = (char*)&measurement_template_Intel_4;
} else {
measurement_template = (char*)&measurement_template_Intel_2;
}
}
}
for (size_t i=0; i= KERNEL_VERSION(5, 4, 0)
set_memory_x = (void*)kallsyms_lookup_name("set_memory_x");
set_memory_nx = (void*)kallsyms_lookup_name("set_memory_nx");
#endif
if (check_cpuid()) {
return -1;
}
for (int i=0; iparent);
if (!nb_kobject) {
pr_debug("failed to create and add nb\n");
return -1;
}
int error = sysfs_create_file(nb_kobject, &clear_attribute.attr);
error |= sysfs_create_file(nb_kobject, &reset_attribute.attr);
error |= sysfs_create_file(nb_kobject, &code_attribute.attr);
error |= sysfs_create_file(nb_kobject, &code_init_attribute.attr);
error |= sysfs_create_file(nb_kobject, &code_one_time_init_attribute.attr);
error |= sysfs_create_file(nb_kobject, &config_attribute.attr);
error |= sysfs_create_file(nb_kobject, &msr_config_attribute.attr);
error |= sysfs_create_file(nb_kobject, &loop_count_attribute.attr);
error |= sysfs_create_file(nb_kobject, &unroll_count_attribute.attr);
error |= sysfs_create_file(nb_kobject, &n_measurements_attribute.attr);
error |= sysfs_create_file(nb_kobject, &warm_up_attribute.attr);
error |= sysfs_create_file(nb_kobject, &initial_warm_up_attribute.attr);
error |= sysfs_create_file(nb_kobject, &alignment_offset_attribute.attr);
error |= sysfs_create_file(nb_kobject, &agg_attribute.attr);
error |= sysfs_create_file(nb_kobject, &basic_mode_attribute.attr);
error |= sysfs_create_file(nb_kobject, &no_mem_attribute.attr);
error |= sysfs_create_file(nb_kobject, &no_normalization_attribute.attr);
error |= sysfs_create_file(nb_kobject, &r14_size_attribute.attr);
error |= sysfs_create_file(nb_kobject, &print_r14_attribute.attr);
error |= sysfs_create_file(nb_kobject, &code_offset_attribute.attr);
error |= sysfs_create_file(nb_kobject, &verbose_attribute.attr);
if (error) {
pr_debug("failed to create file in /sys/nb/\n");
return error;
}
struct proc_dir_entry* proc_file_entry = proc_create("nanoBench", 0, NULL, &proc_file_fops);
if(proc_file_entry == NULL) {
pr_debug("failed to create file in /proc/\n");
return -1;
}
return 0;
}
static void __exit nb_exit(void) {
kfree(code);
kfree(code_init);
kfree(code_one_time_init);
kfree(pfc_config_file_content);
kfree(msr_config_file_content);
vfree(runtime_one_time_init_code);
vfree(runtime_rbp - RUNTIME_R_SIZE/2);
vfree(runtime_rdi - RUNTIME_R_SIZE/2);
vfree(runtime_rsi - RUNTIME_R_SIZE/2);
vfree(runtime_rsp - RUNTIME_R_SIZE/2);
if (runtime_code_base) {
set_memory_nx((unsigned long)runtime_code_base, runtime_code_base_memory_size/PAGE_SIZE);
kfree(runtime_code_base);
}
if (n_r14_segments > 0) {
for (int i=0; i