added no_normalization option

2025-12-15 19:10:08 +01:00 · 2020-12-08 17:17:29 +01:00
parent 4a6f3d2d45
commit ef556c41a3
6 changed files with 43 additions and 18 deletions
--- a/README.md
+++ b/README.md
@@ -147,6 +147,7 @@ Both `nanoBench.sh` and `kernel-nanoBench.sh` support the following command-line
 | `-max`                       | Selects the maximum as the aggregate function. |
 | `-basic_mode`                | The effect of this option is described in the [Generated Code](#generated-code) section. |
 | `-no_mem`                    | If this option is enabled, the code for `read_perf_ctrs` does not make any memory accesses and stores all performance counter values in registers. This can, for example, be useful for benchmarks that require that the state of the data caches does not change after the execution of `code_init`. *If this option is used, the code to be benchmarked must not modify registers* ***R8-R11 (Intel)*** *and* ***R8-R13 (AMD).*** *Furthermore, `read_perf_ctrs` will modify* ***RAX, RCX, and RDX***. |
+| `-no_normalization`          | If this option is enabled, the measurement results are not divided by the number of repetitions. |
 | `-cpu <n>`                   | Pins the measurement thread to CPU n. `[Default: Pin the thread to the CPU it is currently running on.]` |
 | `-verbose`                   | Outputs the results of all performance counter readings. In the user-space version, the results are printed to stdout. The output of the kernel module can be accessed using `dmesg`. |

--- a/common/nanoBench.c
+++ b/common/nanoBench.c
@@ -19,6 +19,7 @@ long initial_warm_up_count = INITIAL_WARM_UP_COUNT_DEFAULT;
 size_t alignment_offset = ALIGNMENT_OFFSET_DEFAULT;

 int no_mem = NO_MEM_DEFAULT;
+int no_normalization = NO_NORMALIZATION_DEFAULT;
 int basic_mode = BASIC_MODE_DEFAULT;
 int aggregate_function = AGGREGATE_FUNCTION_DEFAULT;
 int verbose = VERBOSE_DEFAULT;
@@ -637,21 +638,23 @@ void run_experiment(char* measurement_template, int64_t* results[], int n_counte
 }

 char* compute_result_str(char* buf, size_t buf_len, char* desc, int counter) {
-    int64_t agg = get_aggregate_value_100(measurement_results[counter], n_measurements);
-    int64_t agg_base = get_aggregate_value_100(measurement_results_base[counter], n_measurements);
+    int64_t agg = get_aggregate_value(measurement_results[counter], n_measurements, no_normalization?1:100);
+    int64_t agg_base = get_aggregate_value(measurement_results_base[counter], n_measurements, no_normalization?1:100);

-    int64_t n_rep = loop_count * unroll_count;
-    if (loop_count == 0) {
-        n_rep = unroll_count;
+    if (no_normalization) {
+        snprintf(buf, buf_len, "%s: %lld\n", desc, (long long)(agg-agg_base));
+    } else {
+        int64_t n_rep = loop_count * unroll_count;
+        if (loop_count == 0) {
+            n_rep = unroll_count;
+        }
+        int64_t result = ((agg-agg_base) + n_rep/2)/n_rep;
+        snprintf(buf, buf_len, "%s: %s%lld.%.2lld\n", desc, (result<0?"-":""), ll_abs(result/100), ll_abs(result)%100);
    }
-
-    int64_t result = ((agg-agg_base) + n_rep/2)/n_rep;
-
-    snprintf(buf, buf_len, "%s: %s%lld.%.2lld\n", desc, (result<0?"-":""), ll_abs(result/100), ll_abs(result)%100);
    return buf;
 }

-int64_t get_aggregate_value_100(int64_t* values, size_t length) {
+int64_t get_aggregate_value(int64_t* values, size_t length, size_t scale) {
    if (aggregate_function == MIN) {
        int64_t min = values[0];
        for (int i=0; i<length; i++) {
@@ -659,7 +662,7 @@ int64_t get_aggregate_value_100(int64_t* values, size_t length) {
                min = values[i];
            }
        }
-        return min * 100;
+        return min * scale;
    } else if (aggregate_function == MAX)  {
        int64_t max = values[0];
        for (int i=0; i<length; i++) {
@@ -667,7 +670,7 @@ int64_t get_aggregate_value_100(int64_t* values, size_t length) {
                max = values[i];
            }
        }
-        return max * 100;
+        return max * scale;
    } else {
        qsort(values, length, sizeof(int64_t), cmpInt64);

@@ -676,11 +679,11 @@ int64_t get_aggregate_value_100(int64_t* values, size_t length) {
            int64_t sum = 0;
            int count = 0;
            for (int i=length/5; i<length-(length/5); i++, count++) {
-                sum += (values[i] * 100);
+                sum += (values[i] * scale);
            }
            return sum/count;
        } else {
-            return values[length/2] * 100;
+            return values[length/2] * scale;
        }
    }
 }
--- a/common/nanoBench.h
+++ b/common/nanoBench.h
@@ -100,6 +100,10 @@ extern size_t alignment_offset;
 extern int no_mem;
 #define NO_MEM_DEFAULT 0;

+// If enabled, the measurement results are not divided by the number of repetitions.
+extern int no_normalization;
+#define NO_NORMALIZATION_DEFAULT 0;
+
 // If disabled, the first measurement is performed with 2*unroll_count and the second with unroll_count; the reported result is the difference between the two
 // measurements.
 // If enabled, the first measurement is performed with unroll_count and the second with an empty measurement body; the reported result is the difference
@@ -217,7 +221,7 @@ void run_experiment(char* measurement_template, int64_t* results[], int n_counte
 void create_and_run_one_time_init_code(void);

 char* compute_result_str(char* buf, size_t buf_len, char* desc, int counter);
-int64_t get_aggregate_value_100(int64_t* values, size_t length);
+int64_t get_aggregate_value(int64_t* values, size_t length, size_t scale);
 int cmpInt64(const void *a, const void *b);
 long long ll_abs(long long val);

--- a/kernel-nanoBench.sh
+++ b/kernel-nanoBench.sh
@@ -52,7 +52,7 @@ while [ "$1" ]; do
        shift 2
    elif [[ "$1" == -cpu ]]; then
        taskset="taskset -c $2"
-        shift 2    
+        shift 2
    elif [[ "$1" == -con* ]]; then
        echo -n "$2" > /sys/nb/config
        shift 2
@@ -65,9 +65,12 @@ while [ "$1" ]; do
    elif [[ "$1" == -l* ]]; then
        echo "$2" > /sys/nb/loop_count
        shift 2
-    elif [[ "$1" == -no_mem ]]; then
+    elif [[ "$1" == -no_m* ]]; then
        echo "1" > /sys/nb/no_mem
        shift
+    elif [[ "$1" == -no_n* ]]; then
+        echo "1" > /sys/nb/no_normalization
+        shift
    elif [[ "$1" == -n* ]]; then
        echo "$2" > /sys/nb/n_measurements
        shift 2
@@ -118,8 +121,9 @@ while [ "$1" ]; do
        echo "  -max:                       Selects the maximum as the aggregate function."
        echo "  -basic_mode:                Enables basic mode."
        echo "  -no_mem:                    The code for reading the perf. ctrs. does not make memory accesses."
+        echo "  -no_normalization:          The measurement results are not divided by the number of repetitions."
        echo "  -cpu <n>:                   Pins the measurement thread to CPU n."
-        echo "  -verbose:                   Outputs the results of all performance counter readings."        
+        echo "  -verbose:                   Outputs the results of all performance counter readings."
        exit 0
    else
        echo "Invalid option: $1"
--- a/kernel/nb_km.c
+++ b/kernel/nb_km.c
@@ -264,6 +264,15 @@ static ssize_t no_mem_store(struct kobject *kobj, struct kobj_attribute *attr, c
 }
 static struct kobj_attribute no_mem_attribute =__ATTR(no_mem, 0660, no_mem_show, no_mem_store);

+static ssize_t no_normalization_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
+    return sprintf(buf, "%u\n", no_normalization);
+}
+static ssize_t no_normalization_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
+    sscanf(buf, "%u", &no_normalization);
+    return count;
+}
+static struct kobj_attribute no_normalization_attribute =__ATTR(no_normalization, 0660, no_normalization_show, no_normalization_store);
+
 static ssize_t agg_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
    return sprintf(buf, "%d\n", aggregate_function);
 }
@@ -390,6 +399,7 @@ static ssize_t reset_show(struct kobject *kobj, struct kobj_attribute *attr, cha
    initial_warm_up_count = INITIAL_WARM_UP_COUNT_DEFAULT;

    no_mem = NO_MEM_DEFAULT;
+    no_normalization = NO_NORMALIZATION_DEFAULT;
    basic_mode = BASIC_MODE_DEFAULT;
    aggregate_function = AGGREGATE_FUNCTION_DEFAULT;
    verbose = VERBOSE_DEFAULT;
@@ -640,6 +650,7 @@ static int __init nb_init(void) {
    error |= sysfs_create_file(nb_kobject, &agg_attribute.attr);
    error |= sysfs_create_file(nb_kobject, &basic_mode_attribute.attr);
    error |= sysfs_create_file(nb_kobject, &no_mem_attribute.attr);
+    error |= sysfs_create_file(nb_kobject, &no_normalization_attribute.attr);
    error |= sysfs_create_file(nb_kobject, &r14_size_attribute.attr);
    error |= sysfs_create_file(nb_kobject, &print_r14_attribute.attr);
    error |= sysfs_create_file(nb_kobject, &code_offset_attribute.attr);
--- a/user/nanoBench_main.c
+++ b/user/nanoBench_main.c
@@ -39,6 +39,7 @@ void print_usage() {
    printf("  -min:                           Selects the minimum as the aggregate function.\n");
    printf("  -basic_mode:                    Enables basic mode.\n");
    printf("  -no_mem:                        The code for reading the perf. ctrs. does not make memory accesses.\n");
+    printf("  -no_normalization:              The measurement results are not divided by the number of repetitions.\n");
    printf("  -verbose:                       Outputs the results of all performance counter readings.\n");
    printf("  -cpu <n>:                       Pins the measurement thread to CPU n. \n");
    printf("  -usr <n>:                       If 1, counts events at a privilege level greater than 0.\n");
@@ -83,6 +84,7 @@ int main(int argc, char **argv) {
        {"max", no_argument, &aggregate_function, MAX},
        {"basic_mode", no_argument, &basic_mode, 1},
        {"no_mem", no_argument, &no_mem, 1},
+        {"no_normalization", no_argument, &no_normalization, 1},
        {"verbose", no_argument, &verbose, 1},
        {"cpu", required_argument, 0, 'p'},
        {"usr", required_argument, 0, 'r'},