diff --git a/README.md b/README.md index da2cd42..5b7829a 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,7 @@ Both `nanoBench.sh` and `kernel-nanoBench.sh` support the following command-line | `-cpu ` | Pins the measurement thread to CPU n. `[Default: Pin the thread to the CPU it is currently running on.]` | | `-verbose` | Outputs the results of all performance counter readings. In the user-space version, the results are printed to stdout. The output of the kernel module can be accessed using `dmesg`. | -1 As an extension, the tool also supports statements of the form `|n` (with 1≤n≤15) that are translated to n-byte NOPs. +1 As an extension, the tool also supports statements of the form `|n` (with 1≤n≤15) that are translated to n-byte NOPs, and statements of the form `n*|x|` that unroll x n times (nesting is not supported). The following parameters are only supported by `nanoBench.sh`. diff --git a/kernelNanoBench.py b/kernelNanoBench.py index a08c911..85ed75c 100644 --- a/kernelNanoBench.py +++ b/kernelNanoBench.py @@ -1,5 +1,6 @@ import atexit import os +import re import subprocess import sys @@ -36,7 +37,7 @@ def assemble(code, objFile, asmFile='/tmp/ramdisk/asm.s'): code = code.replace('|3', '.byte 0x0f,0x1f,0x00;') code = code.replace('|2', '.byte 0x66,0x90;') code = code.replace('|1', 'nop;') - code = code.replace('|', '') + code = re.sub(r'(\d*)\*\|(.*?)\|', lambda m: int(m.group(1))*(m.group(2)+';'), code) code = '.intel_syntax noprefix;' + code + ';1:;.att_syntax prefix\n' with open(asmFile, 'w') as f: f.write(code); diff --git a/utils.sh b/utils.sh index 2da727b..6d9dee3 100644 --- a/utils.sh +++ b/utils.sh @@ -1,9 +1,8 @@ assemble() { asm=$1 filename=$2 - echo ".intel_syntax noprefix" > asm-tmp.s - echo "$asm" >> asm-tmp.s - sed -i " + + asm=`sed " s/|15/.byte 0x66,0x66,0x66,0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00;/g s/|14/.byte 0x66,0x66,0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00;/g s/|13/.byte 0x66,0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00;/g @@ -19,8 +18,12 @@ assemble() { s/|3/.byte 0x0f,0x1f,0x00;/g s/|2/.byte 0x66,0x90;/g s/|1/nop;/g - s/|//g - " asm-tmp.s + " <<< "$asm"` + + asm=`python3 -c 'import sys, re; print(re.sub(r"(\d*)\*\|(.*?)\|", lambda m: int(m.group(1))*(m.group(2)+";"), sys.argv[1]))' "$asm"` + + echo ".intel_syntax noprefix" > asm-tmp.s + echo "$asm" >> asm-tmp.s as asm-tmp.s -o asm-tmp.o || exit objcopy -j .text -O binary asm-tmp.o "$filename" rm asm-tmp.*