added debug mode

This commit is contained in:
Andreas Abel
2019-02-26 19:06:32 +01:00
parent 379bf1b88c
commit e75204a3cb
5 changed files with 34 additions and 6 deletions

View File

@@ -138,7 +138,7 @@ Both `nanoBench.sh` and `kernel-nanoBench.sh` support the following command-line
| `-loop_count <n>` | Number of iterations of the inner loop. If n>0, the code to be benchmarked **must not modify R15**, as this register contains the loop counter. If n=0, the instructions for the loop are omitted; the loop body is then executed once. `[Default: n=0]` |
| `-warm_up_count <n>` | Number of runs of the generated benchmark code sequence (in each invocation of `run(...)`) before the first measurement result gets recorded . This can, for example, be useful for excluding outliers due to cold caches. `[Default: n=5]` |
| `-initial_warm_up_count <n>` | Number of runs of the benchmark code sequence before the first invocation of `run(...)`. This can be useful for benchmarking instructions that require a warm-up period before they can execute at full speed, like [AVX2 instructions on some microarchitectures](https://www.agner.org/optimize/blog/read.php?i=415). `[Default: n=0]` |
| `-avg` | Selects the arithmetic mean (excluding the top and bottom 20% of the values) as the aggregate function. `[This is the default]` |
| `-avg` | Selects the arithmetic mean (excluding the top and bottom 20% of the values) as the aggregate function. `[This is the default]` |
| `-median` | Selects the median as the aggregate function. |
| `-min` | Selects the minimum as the aggregate function. |
| `-basic_mode` | The effect of this option is described in the [Generated Code](#generated-code) section. |
@@ -152,6 +152,7 @@ The following parameters are only supported by `nanoBench.sh`.
| `-cpu <n>` | Pins the measurement thread to CPU n. `[Default: Pin the thread to the CPU it is currently running on.]` |
| `-usr <n>` | If n=1, performance events are counted when the processor is operating at a privilege level greater than 0. `[Default: n=1]` |
| `-os <n>` | If n=1, performance events are counted when the processor is operating at privilege level 0. `[Default: n=0]` |
| `-debug` | Enables the debug mode (see [below](#debug-mode)). |
## Performance Counter Config Files
@@ -164,6 +165,10 @@ The format of the entries in the configuration files is
You can find details on the meanings of the different parts of the entries in chapters 18 and 19 of [Intel's System Programming Guide](https://software.intel.com/sites/default/files/managed/a4/60/325384-sdm-vol-3abcd.pdf).
## Debug Mode
If the debug mode is enabled, the [generated code](#generated-code) contains a breakpoint right before the line `m2 = read_perf_ctrs`, and *nanoBench* is run using *gdb*. This makes it possible to analyze the effect of the code to be benchmarked on registers and on the memory. The command `info all-registers` can, for example, be used to display the current values of all registers.
## Supported Platforms
*nanoBench* should work with all Intel processors supporting architectural performance monitoring version ≥ 3, as well as with AMD Family 17h processors.

View File

@@ -21,6 +21,7 @@ int no_mem = NO_MEM_DEFAULT;
int basic_mode = BASIC_MODE_DEFAULT;
int aggregate_function = AGGREGATE_FUNCTION_DEFAULT;
int verbose = VERBOSE_DEFAULT;
int debug = DEBUG_DEFAULT;
char* code = NULL;
size_t code_length = 0;
@@ -361,10 +362,14 @@ void create_runtime_code(char* measurement_template, long local_unroll_count, lo
rci += code_length;
}
runtime_code[rci++] = '\x49'; runtime_code[rci++] = '\xFF'; runtime_code[rci++] = '\xCF'; //dec R15
runtime_code[rci++] = '\x49'; runtime_code[rci++] = '\xFF'; runtime_code[rci++] = '\xCF'; // dec R15
runtime_code[rci++] = '\x0F'; runtime_code[rci++] = '\x85';
*(int32_t*)(&runtime_code[rci]) = (int32_t)(rci_loop_start-rci-4); rci += 4; // jnz loop_start
}
if (debug) {
runtime_code[rci++] = '\xCC'; // INT3
}
} else if (starts_with_magic_bytes(&measurement_template[templateI], MAGIC_BYTES_PFC)) {
*(void**)(&runtime_code[rci]) = pfc_mem;
templateI += 8;
@@ -385,7 +390,7 @@ void create_runtime_code(char* measurement_template, long local_unroll_count, lo
templateI += 8;
do {
runtime_code[rci++] = measurement_template[templateI++];
} while (measurement_template[templateI-1] != '\xc3'); // 0xc3 = ret
} while (measurement_template[templateI-1] != '\xC3'); // 0xC3 = ret
}
void run_warmup_experiment(char* measurement_template) {

View File

@@ -110,6 +110,10 @@ extern int aggregate_function;
extern int verbose;
#define VERBOSE_DEFAULT 0;
// Whether to generate a breakpoint trap after executing the code to be benchmarked.
extern int debug;
#define DEBUG_DEFAULT 0;
extern char* code;
extern size_t code_length;
@@ -262,6 +266,7 @@ void measurement_RDTSC_template_noMem(void);
"mov r11, 0\n" \
"mov r12, 0\n" \
"mov r13, 0\n" \
"mov r15, 0\n" \
"mov r14, rsp\n" \
"add r14, 0x1000\n" \
"mov rdi, rsp\n" \

View File

@@ -12,6 +12,13 @@ if ! command -v rdmsr &>/dev/null; then
exit 1
fi
debug=false
for p in "$@"; do
if [[ "$p" == -d* ]]; then
debug=true
fi
done
args=''
while [ "$2" ]; do
if [ "$1" == '-asm' ]; then
@@ -27,7 +34,7 @@ while [ "$2" ]; do
as asm-init.s -o asm-init.o || exit
objcopy asm-init.o -O binary asm-init.bin
args="$args -code_init asm-init.bin"
shift 2
shift 2
else
args="$args $1"
shift
@@ -52,7 +59,11 @@ iTCO_vendor_support_prev_loaded=$?
prev_nmi_watchdog=$(cat /proc/sys/kernel/nmi_watchdog)
echo 0 > /proc/sys/kernel/nmi_watchdog
user/nanoBench $@
if [ "$debug" = true ]; then
gdb -ex=run --args user/nanoBench $@
else
user/nanoBench $@
fi
rm -f asm-code.*
rm -f asm-init.*

View File

@@ -41,6 +41,7 @@ void print_usage() {
printf(" -cpu <n>: Pins the measurement thread to CPU n. \n");
printf(" -usr <n>: If 1, counts events at a privilege level greater than 0.\n");
printf(" -os <n>: If 1, counts events at a privilege level 0.\n");
printf(" -debug: Generate a breakpoint trap after running the code to be benchmarked.\n");
}
size_t mmap_file(char* filename, char** content) {
@@ -77,10 +78,11 @@ int main(int argc, char **argv) {
{"min", no_argument, &aggregate_function, MIN},
{"basic_mode", no_argument, &basic_mode, 1},
{"no_mem", no_argument, &no_mem, 1},
{"verbose", no_argument, &verbose, 1},
{"verbose", no_argument, &verbose, 1},
{"cpu", required_argument, 0, 'p'},
{"usr", required_argument, 0, 'r'},
{"os", required_argument, 0, 's'},
{"debug", no_argument, &debug, 1},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};