mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2025-12-13 10:10:04 +01:00
APIC interrupts
This commit is contained in:
@@ -49,6 +49,12 @@ unsigned long cur_rdmsr = 0;
|
||||
|
||||
bool is_Intel_CPU = false;
|
||||
bool is_AMD_CPU = false;
|
||||
bool supports_tsc_deadline = false;
|
||||
int displ_family;
|
||||
int displ_model;
|
||||
int Intel_perf_mon_ver = -1;
|
||||
int Intel_FF_ctr_width = -1;
|
||||
int Intel_programmable_ctr_width = -1;
|
||||
|
||||
int n_programmable_counters;
|
||||
|
||||
@@ -111,11 +117,11 @@ bool check_cpuid() {
|
||||
print_user_verbose("Brand: %s\n", proc_brand_string);
|
||||
|
||||
__cpuid(0x01, eax, ebx, ecx, edx);
|
||||
unsigned int displ_family = ((eax >> 8) & 0xF);
|
||||
displ_family = ((eax >> 8) & 0xF);
|
||||
if (displ_family == 0x0F) {
|
||||
displ_family += ((eax >> 20) & 0xFF);
|
||||
}
|
||||
unsigned int displ_model = ((eax >> 4) & 0xF);
|
||||
displ_model = ((eax >> 4) & 0xF);
|
||||
if (displ_family == 0x06 || displ_family == 0x0F) {
|
||||
displ_model += ((eax >> 12) & 0xF0);
|
||||
}
|
||||
@@ -125,21 +131,29 @@ bool check_cpuid() {
|
||||
if (strcmp(proc_vendor_string, "GenuineIntel") == 0) {
|
||||
is_Intel_CPU = true;
|
||||
|
||||
__cpuid(0x01, eax, ebx, ecx, edx);
|
||||
supports_tsc_deadline = (ecx >> 24) & 1;
|
||||
|
||||
__cpuid(0x0A, eax, ebx, ecx, edx);
|
||||
unsigned int perf_mon_ver = (eax & 0xFF);
|
||||
print_user_verbose("Performance monitoring version: %u\n", perf_mon_ver);
|
||||
if (perf_mon_ver < 2) {
|
||||
Intel_perf_mon_ver = (eax & 0xFF);
|
||||
print_user_verbose("Performance monitoring version: %d\n", Intel_perf_mon_ver);
|
||||
if (Intel_perf_mon_ver < 2) {
|
||||
print_error("Error: performance monitoring version >= 2 required\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
print_user_verbose("Number of fixed-function performance counters: %u\n", edx & 0x1F);
|
||||
n_programmable_counters = ((eax >> 8) & 0xFF);
|
||||
print_user_verbose("Number of general-purpose performance counters: %u\n", n_programmable_counters);
|
||||
if (n_programmable_counters < 2) {
|
||||
print_error("Error: only %u programmable counters available; nanoBench requires at least 2\n", n_programmable_counters);
|
||||
return true;
|
||||
}
|
||||
print_user_verbose("Bit widths of general-purpose performance counters: %u\n", ((eax >> 16) & 0xFF));
|
||||
|
||||
Intel_FF_ctr_width = (edx >> 5) & 0xFF;
|
||||
Intel_programmable_ctr_width = (eax >> 16) & 0xFF;
|
||||
print_user_verbose("Bit widths of fixed-function performance counters: %u\n", Intel_FF_ctr_width);
|
||||
print_user_verbose("Bit widths of general-purpose performance counters: %u\n", Intel_programmable_ctr_width);
|
||||
} else if (strcmp(proc_vendor_string, "AuthenticAMD") == 0) {
|
||||
is_AMD_CPU = true;
|
||||
n_programmable_counters = 6;
|
||||
@@ -676,26 +690,13 @@ void run_initial_warmup_experiment() {
|
||||
void run_experiment(char* measurement_template, int64_t* results[], int n_counters, long local_unroll_count, long local_loop_count) {
|
||||
create_runtime_code(measurement_template, local_unroll_count, local_loop_count);
|
||||
|
||||
#ifdef __KERNEL__
|
||||
get_cpu();
|
||||
unsigned long flags;
|
||||
raw_local_irq_save(flags);
|
||||
#endif
|
||||
|
||||
for (long ri=-warm_up_count; ri<n_measurements; ri++) {
|
||||
((void(*)(void))runtime_code)();
|
||||
|
||||
// ignore "warm-up" runs (ri<0), but don't execute different branches
|
||||
long ri_ = (ri>=0)?ri:0;
|
||||
for (int c=0; c<n_counters; c++) {
|
||||
results[c][ri_] = pfc_mem[c];
|
||||
results[c][max(0L, ri)] = pfc_mem[c];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __KERNEL__
|
||||
raw_local_irq_restore(flags);
|
||||
put_cpu();
|
||||
#endif
|
||||
}
|
||||
|
||||
char* compute_result_str(char* buf, size_t buf_len, char* desc, int counter) {
|
||||
|
||||
@@ -176,6 +176,12 @@ extern char* msr_config_file_content;
|
||||
|
||||
extern bool is_Intel_CPU;
|
||||
extern bool is_AMD_CPU;
|
||||
extern bool supports_tsc_deadline;
|
||||
extern int displ_family;
|
||||
extern int displ_model;
|
||||
extern int Intel_perf_mon_ver;
|
||||
extern int Intel_FF_ctr_width;
|
||||
extern int Intel_programmable_ctr_width;
|
||||
|
||||
#define MAX_PROGRAMMABLE_COUNTERS 8
|
||||
extern int n_programmable_counters;
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm-generic/io.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
@@ -19,7 +21,6 @@
|
||||
#include <linux/version.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <../arch/x86/include/asm/fpu/api.h>
|
||||
#include <asm-generic/io.h>
|
||||
|
||||
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,12,0)
|
||||
#include <asm/cacheflush.h>
|
||||
@@ -499,7 +500,56 @@ static ssize_t reset_store(struct kobject *kobj, struct kobj_attribute *attr, co
|
||||
}
|
||||
static struct kobj_attribute reset_attribute =__ATTR(reset, 0660, reset_show, reset_store);
|
||||
|
||||
static int show(struct seq_file *m, void *v) {
|
||||
uint32_t prev_LVTT = 0;
|
||||
uint32_t prev_LVTTHMR = 0;
|
||||
uint32_t prev_LVTPC = 0;
|
||||
uint32_t prev_LVT0 = 0;
|
||||
uint32_t prev_LVT1 = 0;
|
||||
uint32_t prev_LVTERR = 0;
|
||||
uint64_t prev_deadline = 0;
|
||||
|
||||
static void restore_interrupts_preemption(void) {
|
||||
apic_write(APIC_LVTT, prev_LVTT);
|
||||
apic_write(APIC_LVTTHMR, prev_LVTTHMR);
|
||||
apic_write(APIC_LVTPC, prev_LVTPC);
|
||||
apic_write(APIC_LVT0, prev_LVT0);
|
||||
apic_write(APIC_LVT1, prev_LVT1);
|
||||
apic_write(APIC_LVTERR, prev_LVTERR);
|
||||
if (supports_tsc_deadline) write_msr(MSR_IA32_TSC_DEADLINE, prev_deadline);
|
||||
prev_LVTT = prev_LVTTHMR = prev_LVTPC = prev_LVT0 = prev_LVT1 = prev_LVTERR = prev_deadline = 0;
|
||||
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static void disable_interrupts_preemption(void) {
|
||||
if (prev_LVTT || prev_LVTTHMR || prev_LVTPC || prev_LVT0 || prev_LVT1 || prev_LVTERR) {
|
||||
// The previous call to disable_interrupts_preemption() was not followed by a call to restore_interrupts_preemption().
|
||||
restore_interrupts_preemption();
|
||||
}
|
||||
|
||||
// disable preemption
|
||||
get_cpu();
|
||||
|
||||
// We mask interrupts in the APIC LVT. We do not mask all maskable interrupts using the cli instruction, as on some
|
||||
// microarchitectures, pending interrupts that are masked via the cli instruction can reduce the retirement rate
|
||||
// (e.g., on ICL to 4 uops/cycle).
|
||||
prev_LVTT = apic_read(APIC_LVTT);
|
||||
prev_LVTTHMR = apic_read(APIC_LVTTHMR);
|
||||
prev_LVTPC = apic_read(APIC_LVTPC);
|
||||
prev_LVT0 = apic_read(APIC_LVT0);
|
||||
prev_LVT1 = apic_read(APIC_LVT1);
|
||||
prev_LVTERR = apic_read(APIC_LVTERR);
|
||||
if (supports_tsc_deadline) prev_deadline = read_msr(MSR_IA32_TSC_DEADLINE);
|
||||
|
||||
apic_write(APIC_LVTT, prev_LVTT | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTTHMR, prev_LVTTHMR | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTPC, prev_LVTPC | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT0, prev_LVT0 | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVT1, prev_LVT1 | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTERR, prev_LVTERR | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
static int run_nanoBench(struct seq_file *m, void *v) {
|
||||
for (int i=0; i<MAX_PROGRAMMABLE_COUNTERS; i++) {
|
||||
if (!measurement_results[i] || !measurement_results_base[i]) {
|
||||
pr_err("Could not allocate memory for measurement_results\n");
|
||||
@@ -515,6 +565,7 @@ static int show(struct seq_file *m, void *v) {
|
||||
runtime_code = runtime_code_base + code_offset;
|
||||
|
||||
kernel_fpu_begin();
|
||||
disable_interrupts_preemption();
|
||||
|
||||
long base_unroll_count = (basic_mode?0:unroll_count);
|
||||
long main_unroll_count = (basic_mode?unroll_count:2*unroll_count);
|
||||
@@ -654,26 +705,27 @@ static int show(struct seq_file *m, void *v) {
|
||||
seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), msr_configs[i].description, 0));
|
||||
}
|
||||
|
||||
restore_interrupts_preemption();
|
||||
kernel_fpu_end();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int open(struct inode *inode, struct file *file) {
|
||||
return single_open(file, show, NULL);
|
||||
static int open_nanoBench(struct inode *inode, struct file *file) {
|
||||
return single_open_size(file, run_nanoBench, NULL, (n_pfc_configs+4*use_fixed_counters)*128);
|
||||
}
|
||||
|
||||
// since 5.6 the struct for fileops has changed
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
|
||||
static const struct proc_ops proc_file_fops = {
|
||||
static const struct proc_ops proc_file_fops_nanoBench = {
|
||||
.proc_lseek = seq_lseek,
|
||||
.proc_open = open,
|
||||
.proc_open = open_nanoBench,
|
||||
.proc_read = seq_read,
|
||||
.proc_release = single_release,
|
||||
};
|
||||
#else
|
||||
static const struct file_operations proc_file_fops = {
|
||||
static const struct file_operations proc_file_fops_nanoBench = {
|
||||
.llseek = seq_lseek,
|
||||
.open = open,
|
||||
.open = open_nanoBench,
|
||||
.owner = THIS_MODULE,
|
||||
.read = seq_read,
|
||||
.release = single_release,
|
||||
@@ -768,7 +820,7 @@ static int __init nb_init(void) {
|
||||
return error;
|
||||
}
|
||||
|
||||
struct proc_dir_entry* proc_file_entry = proc_create("nanoBench", 0, NULL, &proc_file_fops);
|
||||
struct proc_dir_entry* proc_file_entry = proc_create("nanoBench", 0, NULL, &proc_file_fops_nanoBench);
|
||||
if(proc_file_entry == NULL) {
|
||||
pr_err("failed to create file in /proc/\n");
|
||||
return -1;
|
||||
|
||||
Reference in New Issue
Block a user