diff --git a/README.md b/README.md index fd382d6..b95f729 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,7 @@ Both `nanoBench.sh` and `kernel-nanoBench.sh` support the following command-line | `-max` | Selects the maximum as the aggregate function. | | `-basic_mode` | The effect of this option is described in the [Generated Code](#generated-code) section. | | `-no_mem` | If this option is enabled, the code for `read_perf_ctrs` does not make any memory accesses and stores all performance counter values in registers. This can, for example, be useful for benchmarks that require that the state of the data caches does not change after the execution of `code_init`. *If this option is used, the code to be benchmarked must not modify registers* ***R8-R11 (Intel)*** *and* ***R8-R13 (AMD).*** *Furthermore, `read_perf_ctrs` will modify* ***RAX, RCX, and RDX***. | +| `-no_normalization` | If this option is enabled, the measurement results are not divided by the number of repetitions. | | `-cpu ` | Pins the measurement thread to CPU n. `[Default: Pin the thread to the CPU it is currently running on.]` | | `-verbose` | Outputs the results of all performance counter readings. In the user-space version, the results are printed to stdout. The output of the kernel module can be accessed using `dmesg`. | diff --git a/common/nanoBench.c b/common/nanoBench.c index e40da8d..e31b5e4 100644 --- a/common/nanoBench.c +++ b/common/nanoBench.c @@ -19,6 +19,7 @@ long initial_warm_up_count = INITIAL_WARM_UP_COUNT_DEFAULT; size_t alignment_offset = ALIGNMENT_OFFSET_DEFAULT; int no_mem = NO_MEM_DEFAULT; +int no_normalization = NO_NORMALIZATION_DEFAULT; int basic_mode = BASIC_MODE_DEFAULT; int aggregate_function = AGGREGATE_FUNCTION_DEFAULT; int verbose = VERBOSE_DEFAULT; @@ -637,21 +638,23 @@ void run_experiment(char* measurement_template, int64_t* results[], int n_counte } char* compute_result_str(char* buf, size_t buf_len, char* desc, int counter) { - int64_t agg = get_aggregate_value_100(measurement_results[counter], n_measurements); - int64_t agg_base = get_aggregate_value_100(measurement_results_base[counter], n_measurements); + int64_t agg = get_aggregate_value(measurement_results[counter], n_measurements, no_normalization?1:100); + int64_t agg_base = get_aggregate_value(measurement_results_base[counter], n_measurements, no_normalization?1:100); - int64_t n_rep = loop_count * unroll_count; - if (loop_count == 0) { - n_rep = unroll_count; + if (no_normalization) { + snprintf(buf, buf_len, "%s: %lld\n", desc, (long long)(agg-agg_base)); + } else { + int64_t n_rep = loop_count * unroll_count; + if (loop_count == 0) { + n_rep = unroll_count; + } + int64_t result = ((agg-agg_base) + n_rep/2)/n_rep; + snprintf(buf, buf_len, "%s: %s%lld.%.2lld\n", desc, (result<0?"-":""), ll_abs(result/100), ll_abs(result)%100); } - - int64_t result = ((agg-agg_base) + n_rep/2)/n_rep; - - snprintf(buf, buf_len, "%s: %s%lld.%.2lld\n", desc, (result<0?"-":""), ll_abs(result/100), ll_abs(result)%100); return buf; } -int64_t get_aggregate_value_100(int64_t* values, size_t length) { +int64_t get_aggregate_value(int64_t* values, size_t length, size_t scale) { if (aggregate_function == MIN) { int64_t min = values[0]; for (int i=0; i /sys/nb/config shift 2 @@ -65,9 +65,12 @@ while [ "$1" ]; do elif [[ "$1" == -l* ]]; then echo "$2" > /sys/nb/loop_count shift 2 - elif [[ "$1" == -no_mem ]]; then + elif [[ "$1" == -no_m* ]]; then echo "1" > /sys/nb/no_mem shift + elif [[ "$1" == -no_n* ]]; then + echo "1" > /sys/nb/no_normalization + shift elif [[ "$1" == -n* ]]; then echo "$2" > /sys/nb/n_measurements shift 2 @@ -118,8 +121,9 @@ while [ "$1" ]; do echo " -max: Selects the maximum as the aggregate function." echo " -basic_mode: Enables basic mode." echo " -no_mem: The code for reading the perf. ctrs. does not make memory accesses." + echo " -no_normalization: The measurement results are not divided by the number of repetitions." echo " -cpu : Pins the measurement thread to CPU n." - echo " -verbose: Outputs the results of all performance counter readings." + echo " -verbose: Outputs the results of all performance counter readings." exit 0 else echo "Invalid option: $1" diff --git a/kernel/nb_km.c b/kernel/nb_km.c index 68def87..88f3fbd 100644 --- a/kernel/nb_km.c +++ b/kernel/nb_km.c @@ -264,6 +264,15 @@ static ssize_t no_mem_store(struct kobject *kobj, struct kobj_attribute *attr, c } static struct kobj_attribute no_mem_attribute =__ATTR(no_mem, 0660, no_mem_show, no_mem_store); +static ssize_t no_normalization_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { + return sprintf(buf, "%u\n", no_normalization); +} +static ssize_t no_normalization_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { + sscanf(buf, "%u", &no_normalization); + return count; +} +static struct kobj_attribute no_normalization_attribute =__ATTR(no_normalization, 0660, no_normalization_show, no_normalization_store); + static ssize_t agg_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", aggregate_function); } @@ -390,6 +399,7 @@ static ssize_t reset_show(struct kobject *kobj, struct kobj_attribute *attr, cha initial_warm_up_count = INITIAL_WARM_UP_COUNT_DEFAULT; no_mem = NO_MEM_DEFAULT; + no_normalization = NO_NORMALIZATION_DEFAULT; basic_mode = BASIC_MODE_DEFAULT; aggregate_function = AGGREGATE_FUNCTION_DEFAULT; verbose = VERBOSE_DEFAULT; @@ -640,6 +650,7 @@ static int __init nb_init(void) { error |= sysfs_create_file(nb_kobject, &agg_attribute.attr); error |= sysfs_create_file(nb_kobject, &basic_mode_attribute.attr); error |= sysfs_create_file(nb_kobject, &no_mem_attribute.attr); + error |= sysfs_create_file(nb_kobject, &no_normalization_attribute.attr); error |= sysfs_create_file(nb_kobject, &r14_size_attribute.attr); error |= sysfs_create_file(nb_kobject, &print_r14_attribute.attr); error |= sysfs_create_file(nb_kobject, &code_offset_attribute.attr); diff --git a/user/nanoBench_main.c b/user/nanoBench_main.c index e09db9f..7cd3263 100644 --- a/user/nanoBench_main.c +++ b/user/nanoBench_main.c @@ -39,6 +39,7 @@ void print_usage() { printf(" -min: Selects the minimum as the aggregate function.\n"); printf(" -basic_mode: Enables basic mode.\n"); printf(" -no_mem: The code for reading the perf. ctrs. does not make memory accesses.\n"); + printf(" -no_normalization: The measurement results are not divided by the number of repetitions.\n"); printf(" -verbose: Outputs the results of all performance counter readings.\n"); printf(" -cpu : Pins the measurement thread to CPU n. \n"); printf(" -usr : If 1, counts events at a privilege level greater than 0.\n"); @@ -83,6 +84,7 @@ int main(int argc, char **argv) { {"max", no_argument, &aggregate_function, MAX}, {"basic_mode", no_argument, &basic_mode, 1}, {"no_mem", no_argument, &no_mem, 1}, + {"no_normalization", no_argument, &no_normalization, 1}, {"verbose", no_argument, &verbose, 1}, {"cpu", required_argument, 0, 'p'}, {"usr", required_argument, 0, 'r'},