APIC interrupts

2025-12-13 10:10:04 +01:00 · 2021-12-11 00:51:31 +01:00
parent daf178a42b
commit 81914f9166
3 changed files with 88 additions and 29 deletions
--- a/common/nanoBench.c
+++ b/common/nanoBench.c
@@ -49,6 +49,12 @@ unsigned long cur_rdmsr = 0;

 bool is_Intel_CPU = false;
 bool is_AMD_CPU = false;
+bool supports_tsc_deadline = false;
+int displ_family;
+int displ_model;
+int Intel_perf_mon_ver = -1;
+int Intel_FF_ctr_width = -1;
+int Intel_programmable_ctr_width = -1;

 int n_programmable_counters;

@@ -111,11 +117,11 @@ bool check_cpuid() {
    print_user_verbose("Brand: %s\n", proc_brand_string);

    __cpuid(0x01, eax, ebx, ecx, edx);
-    unsigned int displ_family = ((eax >> 8) & 0xF);
+    displ_family = ((eax >> 8) & 0xF);
    if (displ_family == 0x0F) {
        displ_family += ((eax >> 20) & 0xFF);
    }
-    unsigned int displ_model = ((eax >> 4) & 0xF);
+    displ_model = ((eax >> 4) & 0xF);
    if (displ_family == 0x06 || displ_family == 0x0F) {
        displ_model += ((eax >> 12) & 0xF0);
    }
@@ -125,21 +131,29 @@ bool check_cpuid() {
    if (strcmp(proc_vendor_string, "GenuineIntel") == 0) {
        is_Intel_CPU = true;

+        __cpuid(0x01, eax, ebx, ecx, edx);
+        supports_tsc_deadline = (ecx >> 24) & 1;
+
        __cpuid(0x0A, eax, ebx, ecx, edx);
-        unsigned int perf_mon_ver = (eax & 0xFF);
-        print_user_verbose("Performance monitoring version: %u\n", perf_mon_ver);
-        if (perf_mon_ver < 2) {
+        Intel_perf_mon_ver = (eax & 0xFF);
+        print_user_verbose("Performance monitoring version: %d\n", Intel_perf_mon_ver);
+        if (Intel_perf_mon_ver < 2) {
            print_error("Error: performance monitoring version >= 2 required\n");
            return true;
        }

+        print_user_verbose("Number of fixed-function performance counters: %u\n", edx & 0x1F);
        n_programmable_counters = ((eax >> 8) & 0xFF);
        print_user_verbose("Number of general-purpose performance counters: %u\n", n_programmable_counters);
        if (n_programmable_counters < 2) {
            print_error("Error: only %u programmable counters available; nanoBench requires at least 2\n", n_programmable_counters);
            return true;
        }
-        print_user_verbose("Bit widths of general-purpose performance counters: %u\n", ((eax >> 16) & 0xFF));
+
+        Intel_FF_ctr_width = (edx >> 5) & 0xFF;
+        Intel_programmable_ctr_width = (eax >> 16) & 0xFF;
+        print_user_verbose("Bit widths of fixed-function performance counters: %u\n", Intel_FF_ctr_width);
+        print_user_verbose("Bit widths of general-purpose performance counters: %u\n", Intel_programmable_ctr_width);
    } else if (strcmp(proc_vendor_string, "AuthenticAMD") == 0) {
        is_AMD_CPU = true;
        n_programmable_counters = 6;
@@ -676,26 +690,13 @@ void run_initial_warmup_experiment() {
 void run_experiment(char* measurement_template, int64_t* results[], int n_counters, long local_unroll_count, long local_loop_count) {
    create_runtime_code(measurement_template, local_unroll_count, local_loop_count);

-    #ifdef __KERNEL__
-        get_cpu();
-        unsigned long flags;
-        raw_local_irq_save(flags);
-    #endif
-
    for (long ri=-warm_up_count; ri<n_measurements; ri++) {
        ((void(*)(void))runtime_code)();

-        // ignore "warm-up" runs (ri<0), but don't execute different branches
-        long ri_ = (ri>=0)?ri:0;
        for (int c=0; c<n_counters; c++) {
-                results[c][ri_] = pfc_mem[c];
+            results[c][max(0L, ri)] = pfc_mem[c];
        }
    }
-
-    #ifdef __KERNEL__
-        raw_local_irq_restore(flags);
-        put_cpu();
-    #endif
 }

 char* compute_result_str(char* buf, size_t buf_len, char* desc, int counter) {
--- a/common/nanoBench.h
+++ b/common/nanoBench.h
@@ -176,6 +176,12 @@ extern char* msr_config_file_content;

 extern bool is_Intel_CPU;
 extern bool is_AMD_CPU;
+extern bool supports_tsc_deadline;
+extern int displ_family;
+extern int displ_model;
+extern int Intel_perf_mon_ver;
+extern int Intel_FF_ctr_width;
+extern int Intel_programmable_ctr_width;

 #define MAX_PROGRAMMABLE_COUNTERS 8
 extern int n_programmable_counters;
--- a/kernel/nb_km.c
+++ b/kernel/nb_km.c
@@ -9,6 +9,8 @@
 //
 // You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.

+#include <asm/apic.h>
+#include <asm-generic/io.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -19,7 +21,6 @@
 #include <linux/version.h>
 #include <linux/vmalloc.h>
 #include <../arch/x86/include/asm/fpu/api.h>
-#include <asm-generic/io.h>

 #if LINUX_VERSION_CODE <= KERNEL_VERSION(4,12,0)
 #include <asm/cacheflush.h>
@@ -499,7 +500,56 @@ static ssize_t reset_store(struct kobject *kobj, struct kobj_attribute *attr, co
 }
 static struct kobj_attribute reset_attribute =__ATTR(reset, 0660, reset_show, reset_store);

-static int show(struct seq_file *m, void *v) {
+uint32_t prev_LVTT = 0;
+uint32_t prev_LVTTHMR = 0;
+uint32_t prev_LVTPC = 0;
+uint32_t prev_LVT0 = 0;
+uint32_t prev_LVT1 = 0;
+uint32_t prev_LVTERR = 0;
+uint64_t prev_deadline = 0;
+
+static void restore_interrupts_preemption(void) {
+    apic_write(APIC_LVTT, prev_LVTT);
+    apic_write(APIC_LVTTHMR, prev_LVTTHMR);
+    apic_write(APIC_LVTPC, prev_LVTPC);
+    apic_write(APIC_LVT0, prev_LVT0);
+    apic_write(APIC_LVT1, prev_LVT1);
+    apic_write(APIC_LVTERR, prev_LVTERR);
+    if (supports_tsc_deadline) write_msr(MSR_IA32_TSC_DEADLINE, prev_deadline);
+    prev_LVTT = prev_LVTTHMR = prev_LVTPC = prev_LVT0 = prev_LVT1 = prev_LVTERR = prev_deadline = 0;
+
+    put_cpu();
+}
+
+static void disable_interrupts_preemption(void) {
+    if (prev_LVTT || prev_LVTTHMR || prev_LVTPC || prev_LVT0 || prev_LVT1 || prev_LVTERR) {
+        // The previous call to disable_interrupts_preemption() was not followed by a call to restore_interrupts_preemption().
+        restore_interrupts_preemption();
+    }
+
+    // disable preemption
+    get_cpu();
+
+    // We mask interrupts in the APIC LVT. We do not mask all maskable interrupts using the cli instruction, as on some
+    // microarchitectures, pending interrupts that are masked via the cli instruction can reduce the retirement rate
+    // (e.g., on ICL to 4 uops/cycle).
+    prev_LVTT = apic_read(APIC_LVTT);
+    prev_LVTTHMR = apic_read(APIC_LVTTHMR);
+    prev_LVTPC = apic_read(APIC_LVTPC);
+    prev_LVT0 = apic_read(APIC_LVT0);
+    prev_LVT1 = apic_read(APIC_LVT1);
+    prev_LVTERR = apic_read(APIC_LVTERR);
+    if (supports_tsc_deadline) prev_deadline = read_msr(MSR_IA32_TSC_DEADLINE);
+
+    apic_write(APIC_LVTT, prev_LVTT | APIC_LVT_MASKED);
+    apic_write(APIC_LVTTHMR, prev_LVTTHMR | APIC_LVT_MASKED);
+    apic_write(APIC_LVTPC, prev_LVTPC | APIC_LVT_MASKED);
+    apic_write(APIC_LVT0, prev_LVT0 | APIC_LVT_MASKED);
+    apic_write(APIC_LVT1, prev_LVT1 | APIC_LVT_MASKED);
+    apic_write(APIC_LVTERR, prev_LVTERR | APIC_LVT_MASKED);
+}
+
+static int run_nanoBench(struct seq_file *m, void *v) {
    for (int i=0; i<MAX_PROGRAMMABLE_COUNTERS; i++) {
        if (!measurement_results[i] || !measurement_results_base[i]) {
            pr_err("Could not allocate memory for measurement_results\n");
@@ -515,6 +565,7 @@ static int show(struct seq_file *m, void *v) {
    runtime_code = runtime_code_base + code_offset;

    kernel_fpu_begin();
+    disable_interrupts_preemption();

    long base_unroll_count = (basic_mode?0:unroll_count);
    long main_unroll_count = (basic_mode?unroll_count:2*unroll_count);
@@ -654,26 +705,27 @@ static int show(struct seq_file *m, void *v) {
        seq_printf(m, "%s", compute_result_str(buf, sizeof(buf), msr_configs[i].description, 0));
    }

+    restore_interrupts_preemption();
    kernel_fpu_end();
    return 0;
 }

-static int open(struct inode *inode, struct  file *file) {
-    return single_open(file, show, NULL);
+static int open_nanoBench(struct inode *inode, struct  file *file) {
+    return single_open_size(file, run_nanoBench, NULL, (n_pfc_configs+4*use_fixed_counters)*128);
 }

 // since 5.6 the struct for fileops has changed
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0)
-static const struct proc_ops proc_file_fops = {
+static const struct proc_ops proc_file_fops_nanoBench = {
    .proc_lseek = seq_lseek,
-    .proc_open = open,
+    .proc_open = open_nanoBench,
    .proc_read = seq_read,
    .proc_release = single_release,
 };
 #else
-static const struct file_operations proc_file_fops = {
+static const struct file_operations proc_file_fops_nanoBench = {
    .llseek = seq_lseek,
-    .open = open,
+    .open = open_nanoBench,
    .owner = THIS_MODULE,
    .read = seq_read,
    .release = single_release,
@@ -768,7 +820,7 @@ static int __init nb_init(void) {
        return error;
    }

-    struct proc_dir_entry* proc_file_entry = proc_create("nanoBench", 0, NULL, &proc_file_fops);
+    struct proc_dir_entry* proc_file_entry = proc_create("nanoBench", 0, NULL, &proc_file_fops_nanoBench);
    if(proc_file_entry == NULL) {
        pr_err("failed to create file in /proc/\n");
        return -1;