late_init option

This commit is contained in:
Andreas Abel
2020-12-08 20:34:21 +01:00
parent a3def89d67
commit 6d9e1f9e85
8 changed files with 72 additions and 7 deletions

View File

@@ -101,6 +101,7 @@ We will now take a look behind the scenes at the code that *nanoBench* generates
save_regs
code_init
m1 = read_perf_ctrs // stores results in memory, does not modify registers
code_late_init
for j=0 to loop_count // this line is omitted if loop_count=0
code // (copy #1)
code // (copy #2)
@@ -130,9 +131,11 @@ Both `nanoBench.sh` and `kernel-nanoBench.sh` support the following command-line
|------------------------------|-------------|
| `-asm <code>` | Assembler code sequence (in Intel syntax) containing the code to be benchmarked. |
| `-asm_init <code>` | Assembler code sequence (in Intel syntax) that is executed once in the beginning of every benchmark run. |
| `-asm_late_init <code>` | Assembler code sequence (in Intel syntax) that is executed once immediately before the code to be benchmarked. |
| `-asm_one_time_init <code>` | Assembler code sequence (in Intel syntax) that is executed once before the first benchmark run. |
| `-code <filename>` | A binary file containing the code to be benchmarked as raw x86 machine code. *This option cannot be used together with `-asm`.* |
| `-code_init <filename>` | A binary file containing code to be executed once in the beginning of every benchmark run. *This option cannot be used together with `-asm_init`.* |
| `-code_late_init <filename>` | A binary file containing code to be executed once immediately before the code to be benchmarked. *This option cannot be used together with `-asm_late_init`.* |
| `-code_one_time_init <code>` | A binary file containing code to be executed once before the first benchmark run. *This option cannot be used together with `-asm_one_time_init`.*|
| `-config <file>` | File with performance counter event specifications. Details are described [below](#performance-counter-config-files). |
| `-n_measurements <n>` | Number of times the measurements are repeated. `[Default: n=10]` |

View File

@@ -31,6 +31,9 @@ size_t code_length = 0;
char* code_init = NULL;
size_t code_init_length = 0;
char* code_late_init = NULL;
size_t code_late_init_length = 0;
char* code_one_time_init = NULL;
size_t code_one_time_init_length = 0;
@@ -417,7 +420,7 @@ size_t get_required_runtime_code_length() {
req_code_length += 100;
}
}
return code_init_length + 2*unroll_count*req_code_length + 10000;
return code_init_length + code_late_init_length + 2*unroll_count*req_code_length + 10000;
}
size_t get_distance_to_code(char* measurement_template, size_t templateI) {
@@ -465,7 +468,7 @@ void create_runtime_code(char* measurement_template, long local_unroll_count, lo
*(int32_t*)(&runtime_code[rcI]) = (int32_t)local_loop_count; rcI += 4; // mov R15, local_loop_count
}
size_t dist = get_distance_to_code(measurement_template, templateI);
size_t dist = get_distance_to_code(measurement_template, templateI) + code_late_init_length;
size_t nFill = (64 - ((uintptr_t)&runtime_code[rcI+dist] % 64)) % 64;
nFill += alignment_offset;
for (size_t i=0; i<nFill; i++) {
@@ -478,6 +481,11 @@ void create_runtime_code(char* measurement_template, long local_unroll_count, lo
magic_bytes_code_I = templateI;
templateI += 8;
if (code_late_init_length > 0) {
memcpy(&runtime_code[rcI], code_late_init, code_late_init_length);
rcI += code_late_init_length;
}
if (unrollI == 0 && codeI == 0) {
rcI_code_start = rcI;
}

View File

@@ -128,6 +128,9 @@ extern size_t code_length;
extern char* code_init;
extern size_t code_init_length;
extern char* code_late_init;
extern size_t code_late_init_length;
extern char* code_one_time_init;
extern size_t code_one_time_init_length;

View File

@@ -25,6 +25,14 @@ while [ "$1" ]; do
echo -n "asm-init.o" > /sys/nb/init
rm -f asm-init.s asm-init.o
shift 2
elif [[ "$1" == -asm_l* ]]; then
echo ".intel_syntax noprefix" > asm-late-init.s
echo "$2" >> asm-late-init.s
as asm-late-init.s -o asm-late-init.o
objcopy asm-late-init.o -O binary asm-late-init.o
echo -n "asm-late-init.o" > /sys/nb/late_init
rm -f asm-late-init.s asm-late-init.o
shift 2
elif [[ "$1" == -asm_o* ]]; then
echo ".intel_syntax noprefix" > asm-one-time-init.s
echo "$2" >> asm-one-time-init.s
@@ -105,9 +113,11 @@ while [ "$1" ]; do
echo "kernel-nanoBench.sh usage:"
echo
echo " -asm <code>: Assembler code string (in Intel syntax) to be benchmarked."
echo " -asm_init <code>: Assembler code string (in Intel syntax) to be executed once in the beginning"
echo " -asm_init <code>: Assembler code string (in Intel syntax) to be executed once in the beginning."
echo " -asm_late_init <code>: Assembler code string (in Intel syntax) to be executed once immediately before the code to be benchmarked."
echo " -code <filename>: Binary file containing the code to be benchmarked."
echo " -code_init <filename>: Binary file containing code to be executed once in the beginning"
echo " -code_init <filename>: Binary file containing code to be executed once in the beginning."
echo " -code_late_init <filename>: Binary file containing code to be executed once immediately before the code to be benchmarked."
echo " -config <filename>: File with performance counter event specifications."
echo " -n_measurements <n>: Number of times the measurements are repeated."
echo " -unroll_count <n>: Number of copies of the benchmark code inside the inner loop."

View File

@@ -43,6 +43,7 @@ char* runtime_code_base = NULL;
size_t code_offset = 0;
size_t code_memory_size = 0;
size_t code_init_memory_size = 0;
size_t code_late_init_memory_size = 0;
size_t code_one_time_init_memory_size = 0;
size_t pfc_config_memory_size = 0;
size_t msr_config_memory_size = 0;
@@ -115,6 +116,15 @@ static ssize_t init_store(struct kobject *kobj, struct kobj_attribute *attr, con
}
static struct kobj_attribute code_init_attribute =__ATTR(init, 0660, init_show, init_store);
static ssize_t late_init_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
return 0;
}
static ssize_t late_init_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
read_file_into_buffer(buf, &code_late_init, &code_late_init_length, &code_late_init_memory_size);
return count;
}
static struct kobj_attribute code_late_init_attribute =__ATTR(late_init, 0660, late_init_show, late_init_store);
static ssize_t one_time_init_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
return 0;
}
@@ -382,8 +392,10 @@ static ssize_t verbose_store(struct kobject *kobj, struct kobj_attribute *attr,
static struct kobj_attribute verbose_attribute =__ATTR(verbose, 0660, verbose_show, verbose_store);
static ssize_t clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
code_init_length = 0;
code_length = 0;
code_init_length = 0;
code_late_init_length = 0;
code_one_time_init_length = 0;
return 0;
}
static ssize_t clear_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
@@ -403,8 +415,11 @@ static ssize_t reset_show(struct kobject *kobj, struct kobj_attribute *attr, cha
basic_mode = BASIC_MODE_DEFAULT;
aggregate_function = AGGREGATE_FUNCTION_DEFAULT;
verbose = VERBOSE_DEFAULT;
alignment_offset = ALIGNMENT_OFFSET_DEFAULT;
code_init_length = 0;
code_late_init_length = 0;
code_one_time_init_length = 0;
code_length = 0;
code_offset = 0;
n_pfc_configs = 0;
@@ -638,6 +653,7 @@ static int __init nb_init(void) {
error |= sysfs_create_file(nb_kobject, &reset_attribute.attr);
error |= sysfs_create_file(nb_kobject, &code_attribute.attr);
error |= sysfs_create_file(nb_kobject, &code_init_attribute.attr);
error |= sysfs_create_file(nb_kobject, &code_late_init_attribute.attr);
error |= sysfs_create_file(nb_kobject, &code_one_time_init_attribute.attr);
error |= sysfs_create_file(nb_kobject, &config_attribute.attr);
error |= sysfs_create_file(nb_kobject, &msr_config_attribute.attr);
@@ -673,6 +689,7 @@ static int __init nb_init(void) {
static void __exit nb_exit(void) {
kfree(code);
kfree(code_init);
kfree(code_late_init);
kfree(code_one_time_init);
kfree(pfc_config_file_content);
kfree(msr_config_file_content);

View File

@@ -142,6 +142,7 @@ def resetNanoBench():
# code, codeObjFile, codeBinFile cannot be specified at the same time (same for init, initObjFile and initBinFile)
def runNanoBench(code='', codeObjFile=None, codeBinFile=None,
init='', initObjFile=None, initBinFile=None,
lateInit='', lateInitObjFile=None, lateInitBinFile=None,
oneTimeInit='', oneTimeInitObjFile=None, oneTimeInitBinFile=None):
if not ramdiskCreated: createRamdisk()
with open('/sys/nb/clear') as clearFile: clearFile.read()
@@ -164,6 +165,15 @@ def runNanoBench(code='', codeObjFile=None, codeBinFile=None,
elif initBinFile is not None:
writeFile('/sys/nb/init', initBinFile)
if lateInit:
lateInitObjFile = '/tmp/ramdisk/late_init.o'
assemble(lateInit, lateInitObjFile)
if lateInitObjFile is not None:
objcopy(lateInitObjFile, '/tmp/ramdisk/late_init.bin')
writeFile('/sys/nb/late_init', '/tmp/ramdisk/late_init.bin')
elif lateInitBinFile is not None:
writeFile('/sys/nb/late_init', lateInitBinFile)
if oneTimeInit:
oneTimeInitObjFile = '/tmp/ramdisk/one_time_init.o'
assemble(oneTimeInit, oneTimeInitObjFile)

View File

@@ -28,6 +28,13 @@ while [ "$2" ]; do
objcopy asm-init.o -O binary asm-init.bin
args="$args -code_init asm-init.bin"
shift 2
elif [[ "$1" == -asm_l* ]]; then
echo ".intel_syntax noprefix" > asm-late-init.s
echo "$2" >> asm-late-init.s
as asm-late-init.s -o asm-late-init.o || exit
objcopy asm-late-init.o -O binary asm-late-init.bin
args="$args -code_late_init asm-late-init.bin"
shift 2
elif [[ "$1" == -asm_o* ]]; then
echo ".intel_syntax noprefix" > asm-one-time-init.s
echo "$2" >> asm-one-time-init.s
@@ -74,6 +81,8 @@ fi
rm -f asm-code.*
rm -f asm-init.*
rm -f asm-late-init.*
rm -f asm-one-time-init.*
echo $prev_rdpmc > /sys/bus/event_source/devices/cpu/rdpmc
echo $prev_nmi_watchdog > /proc/sys/kernel/nmi_watchdog

View File

@@ -25,7 +25,8 @@ void print_usage() {
printf("nanoBench usage:\n");
printf("\n");
printf(" -code <filename>: Binary file containing the code to be benchmarked.\n");
printf(" -code_init <filename>: Binary file containing code to be executed once before each measurement\n");
printf(" -code_init <filename>: Binary file containing code to be executed once before each measurement.\n");
printf(" -code_late_init <filename>: Binary file containing code to be executed once immediately before the code to be benchmarked.\n");
printf(" -code_one_time_init <filename>: Binary file containing code to be executed once before the first measurement\n");
printf(" -config <filename>: File with performance counter event specifications.\n");
printf(" -n_measurements <n>: Number of times the measurements are repeated.\n");
@@ -70,6 +71,7 @@ int main(int argc, char **argv) {
struct option long_opts[] = {
{"code", required_argument, 0, 'c'},
{"code_init", required_argument, 0, 'i'},
{"code_late_init", required_argument, 0, 't'},
{"code_one_time_init", required_argument, 0, 'o'},
{"config", required_argument, 0, 'f'},
{"n_measurements", required_argument, 0, 'n'},
@@ -105,6 +107,9 @@ int main(int argc, char **argv) {
case 'i':
code_init_length = mmap_file(optarg, &code_init);
break;
case 't':
code_late_init_length = mmap_file(optarg, &code_late_init);
break;
case 'o':
code_one_time_init_length = mmap_file(optarg, &code_one_time_init);
break;