mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2026-01-04 19:40:08 +01:00
late_init option
This commit is contained in:
@@ -101,6 +101,7 @@ We will now take a look behind the scenes at the code that *nanoBench* generates
|
||||
save_regs
|
||||
code_init
|
||||
m1 = read_perf_ctrs // stores results in memory, does not modify registers
|
||||
code_late_init
|
||||
for j=0 to loop_count // this line is omitted if loop_count=0
|
||||
code // (copy #1)
|
||||
code // (copy #2)
|
||||
@@ -130,9 +131,11 @@ Both `nanoBench.sh` and `kernel-nanoBench.sh` support the following command-line
|
||||
|------------------------------|-------------|
|
||||
| `-asm <code>` | Assembler code sequence (in Intel syntax) containing the code to be benchmarked. |
|
||||
| `-asm_init <code>` | Assembler code sequence (in Intel syntax) that is executed once in the beginning of every benchmark run. |
|
||||
| `-asm_late_init <code>` | Assembler code sequence (in Intel syntax) that is executed once immediately before the code to be benchmarked. |
|
||||
| `-asm_one_time_init <code>` | Assembler code sequence (in Intel syntax) that is executed once before the first benchmark run. |
|
||||
| `-code <filename>` | A binary file containing the code to be benchmarked as raw x86 machine code. *This option cannot be used together with `-asm`.* |
|
||||
| `-code_init <filename>` | A binary file containing code to be executed once in the beginning of every benchmark run. *This option cannot be used together with `-asm_init`.* |
|
||||
| `-code_late_init <filename>` | A binary file containing code to be executed once immediately before the code to be benchmarked. *This option cannot be used together with `-asm_late_init`.* |
|
||||
| `-code_one_time_init <code>` | A binary file containing code to be executed once before the first benchmark run. *This option cannot be used together with `-asm_one_time_init`.*|
|
||||
| `-config <file>` | File with performance counter event specifications. Details are described [below](#performance-counter-config-files). |
|
||||
| `-n_measurements <n>` | Number of times the measurements are repeated. `[Default: n=10]` |
|
||||
|
||||
@@ -31,6 +31,9 @@ size_t code_length = 0;
|
||||
char* code_init = NULL;
|
||||
size_t code_init_length = 0;
|
||||
|
||||
char* code_late_init = NULL;
|
||||
size_t code_late_init_length = 0;
|
||||
|
||||
char* code_one_time_init = NULL;
|
||||
size_t code_one_time_init_length = 0;
|
||||
|
||||
@@ -417,7 +420,7 @@ size_t get_required_runtime_code_length() {
|
||||
req_code_length += 100;
|
||||
}
|
||||
}
|
||||
return code_init_length + 2*unroll_count*req_code_length + 10000;
|
||||
return code_init_length + code_late_init_length + 2*unroll_count*req_code_length + 10000;
|
||||
}
|
||||
|
||||
size_t get_distance_to_code(char* measurement_template, size_t templateI) {
|
||||
@@ -465,7 +468,7 @@ void create_runtime_code(char* measurement_template, long local_unroll_count, lo
|
||||
*(int32_t*)(&runtime_code[rcI]) = (int32_t)local_loop_count; rcI += 4; // mov R15, local_loop_count
|
||||
}
|
||||
|
||||
size_t dist = get_distance_to_code(measurement_template, templateI);
|
||||
size_t dist = get_distance_to_code(measurement_template, templateI) + code_late_init_length;
|
||||
size_t nFill = (64 - ((uintptr_t)&runtime_code[rcI+dist] % 64)) % 64;
|
||||
nFill += alignment_offset;
|
||||
for (size_t i=0; i<nFill; i++) {
|
||||
@@ -478,6 +481,11 @@ void create_runtime_code(char* measurement_template, long local_unroll_count, lo
|
||||
magic_bytes_code_I = templateI;
|
||||
templateI += 8;
|
||||
|
||||
if (code_late_init_length > 0) {
|
||||
memcpy(&runtime_code[rcI], code_late_init, code_late_init_length);
|
||||
rcI += code_late_init_length;
|
||||
}
|
||||
|
||||
if (unrollI == 0 && codeI == 0) {
|
||||
rcI_code_start = rcI;
|
||||
}
|
||||
|
||||
@@ -128,6 +128,9 @@ extern size_t code_length;
|
||||
extern char* code_init;
|
||||
extern size_t code_init_length;
|
||||
|
||||
extern char* code_late_init;
|
||||
extern size_t code_late_init_length;
|
||||
|
||||
extern char* code_one_time_init;
|
||||
extern size_t code_one_time_init_length;
|
||||
|
||||
|
||||
@@ -25,6 +25,14 @@ while [ "$1" ]; do
|
||||
echo -n "asm-init.o" > /sys/nb/init
|
||||
rm -f asm-init.s asm-init.o
|
||||
shift 2
|
||||
elif [[ "$1" == -asm_l* ]]; then
|
||||
echo ".intel_syntax noprefix" > asm-late-init.s
|
||||
echo "$2" >> asm-late-init.s
|
||||
as asm-late-init.s -o asm-late-init.o
|
||||
objcopy asm-late-init.o -O binary asm-late-init.o
|
||||
echo -n "asm-late-init.o" > /sys/nb/late_init
|
||||
rm -f asm-late-init.s asm-late-init.o
|
||||
shift 2
|
||||
elif [[ "$1" == -asm_o* ]]; then
|
||||
echo ".intel_syntax noprefix" > asm-one-time-init.s
|
||||
echo "$2" >> asm-one-time-init.s
|
||||
@@ -105,9 +113,11 @@ while [ "$1" ]; do
|
||||
echo "kernel-nanoBench.sh usage:"
|
||||
echo
|
||||
echo " -asm <code>: Assembler code string (in Intel syntax) to be benchmarked."
|
||||
echo " -asm_init <code>: Assembler code string (in Intel syntax) to be executed once in the beginning"
|
||||
echo " -asm_init <code>: Assembler code string (in Intel syntax) to be executed once in the beginning."
|
||||
echo " -asm_late_init <code>: Assembler code string (in Intel syntax) to be executed once immediately before the code to be benchmarked."
|
||||
echo " -code <filename>: Binary file containing the code to be benchmarked."
|
||||
echo " -code_init <filename>: Binary file containing code to be executed once in the beginning"
|
||||
echo " -code_init <filename>: Binary file containing code to be executed once in the beginning."
|
||||
echo " -code_late_init <filename>: Binary file containing code to be executed once immediately before the code to be benchmarked."
|
||||
echo " -config <filename>: File with performance counter event specifications."
|
||||
echo " -n_measurements <n>: Number of times the measurements are repeated."
|
||||
echo " -unroll_count <n>: Number of copies of the benchmark code inside the inner loop."
|
||||
|
||||
@@ -43,6 +43,7 @@ char* runtime_code_base = NULL;
|
||||
size_t code_offset = 0;
|
||||
size_t code_memory_size = 0;
|
||||
size_t code_init_memory_size = 0;
|
||||
size_t code_late_init_memory_size = 0;
|
||||
size_t code_one_time_init_memory_size = 0;
|
||||
size_t pfc_config_memory_size = 0;
|
||||
size_t msr_config_memory_size = 0;
|
||||
@@ -115,6 +116,15 @@ static ssize_t init_store(struct kobject *kobj, struct kobj_attribute *attr, con
|
||||
}
|
||||
static struct kobj_attribute code_init_attribute =__ATTR(init, 0660, init_show, init_store);
|
||||
|
||||
static ssize_t late_init_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
|
||||
return 0;
|
||||
}
|
||||
static ssize_t late_init_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
|
||||
read_file_into_buffer(buf, &code_late_init, &code_late_init_length, &code_late_init_memory_size);
|
||||
return count;
|
||||
}
|
||||
static struct kobj_attribute code_late_init_attribute =__ATTR(late_init, 0660, late_init_show, late_init_store);
|
||||
|
||||
static ssize_t one_time_init_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
|
||||
return 0;
|
||||
}
|
||||
@@ -382,8 +392,10 @@ static ssize_t verbose_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
static struct kobj_attribute verbose_attribute =__ATTR(verbose, 0660, verbose_show, verbose_store);
|
||||
|
||||
static ssize_t clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {
|
||||
code_init_length = 0;
|
||||
code_length = 0;
|
||||
code_init_length = 0;
|
||||
code_late_init_length = 0;
|
||||
code_one_time_init_length = 0;
|
||||
return 0;
|
||||
}
|
||||
static ssize_t clear_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
|
||||
@@ -403,8 +415,11 @@ static ssize_t reset_show(struct kobject *kobj, struct kobj_attribute *attr, cha
|
||||
basic_mode = BASIC_MODE_DEFAULT;
|
||||
aggregate_function = AGGREGATE_FUNCTION_DEFAULT;
|
||||
verbose = VERBOSE_DEFAULT;
|
||||
alignment_offset = ALIGNMENT_OFFSET_DEFAULT;
|
||||
|
||||
code_init_length = 0;
|
||||
code_late_init_length = 0;
|
||||
code_one_time_init_length = 0;
|
||||
code_length = 0;
|
||||
code_offset = 0;
|
||||
n_pfc_configs = 0;
|
||||
@@ -638,6 +653,7 @@ static int __init nb_init(void) {
|
||||
error |= sysfs_create_file(nb_kobject, &reset_attribute.attr);
|
||||
error |= sysfs_create_file(nb_kobject, &code_attribute.attr);
|
||||
error |= sysfs_create_file(nb_kobject, &code_init_attribute.attr);
|
||||
error |= sysfs_create_file(nb_kobject, &code_late_init_attribute.attr);
|
||||
error |= sysfs_create_file(nb_kobject, &code_one_time_init_attribute.attr);
|
||||
error |= sysfs_create_file(nb_kobject, &config_attribute.attr);
|
||||
error |= sysfs_create_file(nb_kobject, &msr_config_attribute.attr);
|
||||
@@ -673,6 +689,7 @@ static int __init nb_init(void) {
|
||||
static void __exit nb_exit(void) {
|
||||
kfree(code);
|
||||
kfree(code_init);
|
||||
kfree(code_late_init);
|
||||
kfree(code_one_time_init);
|
||||
kfree(pfc_config_file_content);
|
||||
kfree(msr_config_file_content);
|
||||
|
||||
@@ -142,6 +142,7 @@ def resetNanoBench():
|
||||
# code, codeObjFile, codeBinFile cannot be specified at the same time (same for init, initObjFile and initBinFile)
|
||||
def runNanoBench(code='', codeObjFile=None, codeBinFile=None,
|
||||
init='', initObjFile=None, initBinFile=None,
|
||||
lateInit='', lateInitObjFile=None, lateInitBinFile=None,
|
||||
oneTimeInit='', oneTimeInitObjFile=None, oneTimeInitBinFile=None):
|
||||
if not ramdiskCreated: createRamdisk()
|
||||
with open('/sys/nb/clear') as clearFile: clearFile.read()
|
||||
@@ -164,6 +165,15 @@ def runNanoBench(code='', codeObjFile=None, codeBinFile=None,
|
||||
elif initBinFile is not None:
|
||||
writeFile('/sys/nb/init', initBinFile)
|
||||
|
||||
if lateInit:
|
||||
lateInitObjFile = '/tmp/ramdisk/late_init.o'
|
||||
assemble(lateInit, lateInitObjFile)
|
||||
if lateInitObjFile is not None:
|
||||
objcopy(lateInitObjFile, '/tmp/ramdisk/late_init.bin')
|
||||
writeFile('/sys/nb/late_init', '/tmp/ramdisk/late_init.bin')
|
||||
elif lateInitBinFile is not None:
|
||||
writeFile('/sys/nb/late_init', lateInitBinFile)
|
||||
|
||||
if oneTimeInit:
|
||||
oneTimeInitObjFile = '/tmp/ramdisk/one_time_init.o'
|
||||
assemble(oneTimeInit, oneTimeInitObjFile)
|
||||
|
||||
@@ -28,6 +28,13 @@ while [ "$2" ]; do
|
||||
objcopy asm-init.o -O binary asm-init.bin
|
||||
args="$args -code_init asm-init.bin"
|
||||
shift 2
|
||||
elif [[ "$1" == -asm_l* ]]; then
|
||||
echo ".intel_syntax noprefix" > asm-late-init.s
|
||||
echo "$2" >> asm-late-init.s
|
||||
as asm-late-init.s -o asm-late-init.o || exit
|
||||
objcopy asm-late-init.o -O binary asm-late-init.bin
|
||||
args="$args -code_late_init asm-late-init.bin"
|
||||
shift 2
|
||||
elif [[ "$1" == -asm_o* ]]; then
|
||||
echo ".intel_syntax noprefix" > asm-one-time-init.s
|
||||
echo "$2" >> asm-one-time-init.s
|
||||
@@ -74,6 +81,8 @@ fi
|
||||
|
||||
rm -f asm-code.*
|
||||
rm -f asm-init.*
|
||||
rm -f asm-late-init.*
|
||||
rm -f asm-one-time-init.*
|
||||
|
||||
echo $prev_rdpmc > /sys/bus/event_source/devices/cpu/rdpmc
|
||||
echo $prev_nmi_watchdog > /proc/sys/kernel/nmi_watchdog
|
||||
|
||||
@@ -25,7 +25,8 @@ void print_usage() {
|
||||
printf("nanoBench usage:\n");
|
||||
printf("\n");
|
||||
printf(" -code <filename>: Binary file containing the code to be benchmarked.\n");
|
||||
printf(" -code_init <filename>: Binary file containing code to be executed once before each measurement\n");
|
||||
printf(" -code_init <filename>: Binary file containing code to be executed once before each measurement.\n");
|
||||
printf(" -code_late_init <filename>: Binary file containing code to be executed once immediately before the code to be benchmarked.\n");
|
||||
printf(" -code_one_time_init <filename>: Binary file containing code to be executed once before the first measurement\n");
|
||||
printf(" -config <filename>: File with performance counter event specifications.\n");
|
||||
printf(" -n_measurements <n>: Number of times the measurements are repeated.\n");
|
||||
@@ -70,6 +71,7 @@ int main(int argc, char **argv) {
|
||||
struct option long_opts[] = {
|
||||
{"code", required_argument, 0, 'c'},
|
||||
{"code_init", required_argument, 0, 'i'},
|
||||
{"code_late_init", required_argument, 0, 't'},
|
||||
{"code_one_time_init", required_argument, 0, 'o'},
|
||||
{"config", required_argument, 0, 'f'},
|
||||
{"n_measurements", required_argument, 0, 'n'},
|
||||
@@ -105,6 +107,9 @@ int main(int argc, char **argv) {
|
||||
case 'i':
|
||||
code_init_length = mmap_file(optarg, &code_init);
|
||||
break;
|
||||
case 't':
|
||||
code_late_init_length = mmap_file(optarg, &code_late_init);
|
||||
break;
|
||||
case 'o':
|
||||
code_one_time_init_length = mmap_file(optarg, &code_one_time_init);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user