diff --git a/configs/cfg_AlderLakeE_all.txt b/configs/cfg_AlderLakeE_all.txt index a66336b..3b0467e 100644 --- a/configs/cfg_AlderLakeE_all.txt +++ b/configs/cfg_AlderLakeE_all.txt @@ -1,4 +1,4 @@ -# Based on https://download.01.org/perfmon/ADL/alderlake_gracemont_core_v1.03.json +# Based on https://download.01.org/perfmon/ADL/alderlake_gracemont_core_v1.04.json # Applies to processors with family-model in {6-97, 6-9A} # Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready. @@ -16,6 +16,12 @@ # Counts the number of cycles that uops are blocked due to a load buffer full condition. 04.02 MEM_SCHEDULER_BLOCK.LD_BUF +# Counts the number of cycles that uops are blocked due to an RSV full condition. +04.04 MEM_SCHEDULER_BLOCK.RSV + +# Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full. +04.07 MEM_SCHEDULER_BLOCK.ALL + # Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a store address match when load subsequently retires. 05.84 LD_HEAD.ST_ADDR_AT_RET diff --git a/configs/cfg_AlderLakeE_common.txt b/configs/cfg_AlderLakeE_common.txt index 0d4f194..8bf5eff 100644 --- a/configs/cfg_AlderLakeE_common.txt +++ b/configs/cfg_AlderLakeE_common.txt @@ -1,4 +1,4 @@ -# Based on https://download.01.org/perfmon/ADL/alderlake_gracemont_core_v1.03.json +# Based on https://download.01.org/perfmon/ADL/alderlake_gracemont_core_v1.04.json # Applies to processors with family-model in {6-97, 6-9A} 3C.00 CORE_CYCLES diff --git a/configs/cfg_AlderLakeP_all.txt b/configs/cfg_AlderLakeP_all.txt index 63cfd09..cd9f15b 100644 --- a/configs/cfg_AlderLakeP_all.txt +++ b/configs/cfg_AlderLakeP_all.txt @@ -1,4 +1,4 @@ -# Based on https://download.01.org/perfmon/ADL/alderlake_goldencove_core_v1.03.json +# Based on https://download.01.org/perfmon/ADL/alderlake_goldencove_core_v1.04.json # Applies to processors with family-model in {6-97, 6-9A} # False dependencies in MOB due to partial compare on address. @@ -28,6 +28,15 @@ # Instruction fetch requests that miss the ITLB and hit the STLB. 11.20 ITLB_MISSES.STLB_HIT +# Page walks completed due to a demand data load to a 4K page. +12.02 DTLB_LOAD_MISSES.WALK_COMPLETED_4K + +# Page walks completed due to a demand data load to a 2M/4M page. +12.04 DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + +# Page walks completed due to a demand data load to a 1G page. +12.08 DTLB_LOAD_MISSES.WALK_COMPLETED_1G + # Load miss in all TLB levels causes a page walk that completes. (All page sizes) 12.0E DTLB_LOAD_MISSES.WALK_COMPLETED @@ -40,6 +49,15 @@ # Loads that miss the DTLB and hit the STLB. 12.20 DTLB_LOAD_MISSES.STLB_HIT +# Page walks completed due to a demand data store to a 4K page. +13.02 DTLB_STORE_MISSES.WALK_COMPLETED_4K + +# Page walks completed due to a demand data store to a 2M/4M page. +13.04 DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + +# Page walks completed due to a demand data store to a 1G page. +13.08 DTLB_STORE_MISSES.WALK_COMPLETED_1G + # Store misses in all TLB levels causes a page walk that completes. (All page sizes) 13.0E DTLB_STORE_MISSES.WALK_COMPLETED @@ -355,12 +373,15 @@ AD.80 INT_MISC.CLEAR_RESTEER_CYCLES # Uops that RAT issues to RS AE.01 UOPS_ISSUED.ANY -# tbd +# TBD B0.01.CMSK=1 ARITH.FPDIV_ACTIVE # This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE B0.01.CMSK=1 ARITH.FP_DIVIDER_ACTIVE +# This event counts the cycles the integer divider is busy. +B0.08 ARITH.IDIV_ACTIVE + # This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE B0.08.CMSK=1 ARITH.INT_DIVIDER_ACTIVE @@ -628,7 +649,7 @@ CD.01.MSR_3F6H=0x8.CTR=1.TakenAlone MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8 # Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. CD.01.MSR_3F6H=0x80.CTR=1.TakenAlone MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128 -# Counts the number of retired instructions with at least 1 store uop. +# Retired instructions with at least 1 store uop. This PEBS event is the trigger for stores sampled by the PEBS Store Facility. CD.02.CTR=0 MEM_TRANS_RETIRED.STORE_SAMPLE # Retired load instructions that miss the STLB. @@ -706,17 +727,11 @@ E0.20 MISC2_RETIRED.LFENCE # Retired memory uops for any access E5.03 MEM_UOP_RETIRED.ANY -# TBD -E7.01 INT_VEC_RETIRED.ADD_128 +# integer ADD, SUB, SAD 128-bit vector instructions. +E7.03 INT_VEC_RETIRED.ADD_128 -# TBD -E7.02 INT_VEC_RETIRED.HADD_128 - -# TBD -E7.04 INT_VEC_RETIRED.ADD_256 - -# TBD -E7.08 INT_VEC_RETIRED.HADD_256 +# integer ADD, SUB, SAD 256-bit vector instructions. +E7.0C INT_VEC_RETIRED.ADD_256 # TBD E7.10 INT_VEC_RETIRED.VNNI_128 diff --git a/configs/cfg_AlderLakeP_common.txt b/configs/cfg_AlderLakeP_common.txt index cc12f5b..d57c917 100644 --- a/configs/cfg_AlderLakeP_common.txt +++ b/configs/cfg_AlderLakeP_common.txt @@ -1,4 +1,4 @@ -# Based on https://download.01.org/perfmon/ADL/alderlake_goldencove_core_v1.03.json +# Based on https://download.01.org/perfmon/ADL/alderlake_goldencove_core_v1.04.json # Applies to processors with family-model in {6-97, 6-9A} 3C.00 CORE_CYCLES diff --git a/nanoBench.sh b/nanoBench.sh index cafbf4e..f0c570f 100755 --- a/nanoBench.sh +++ b/nanoBench.sh @@ -51,8 +51,15 @@ done args="$args $1" set "$args" -prev_rdpmc=$(cat /sys/bus/event_source/devices/cpu/rdpmc) -echo 2 > /sys/bus/event_source/devices/cpu/rdpmc || exit +if [ -d "/sys/bus/event_source/devices/cpu" ]; then + prev_rdpmc=$(cat /sys/bus/event_source/devices/cpu/rdpmc) + echo 2 > /sys/bus/event_source/devices/cpu/rdpmc || exit 1 +else + prev_rdpmc_atom=$(cat /sys/bus/event_source/devices/cpu_atom/rdpmc) + prev_rdpmc_core=$(cat /sys/bus/event_source/devices/cpu_core/rdpmc) + echo 2 > /sys/bus/event_source/devices/cpu_atom/rdpmc || exit 1 + echo 2 > /sys/bus/event_source/devices/cpu_core/rdpmc || exit 1 +fi modprobe --first-time msr &>/dev/null msr_prev_loaded=$? @@ -77,9 +84,15 @@ fi rm -f asm-*.bin -echo $prev_rdpmc > /sys/bus/event_source/devices/cpu/rdpmc echo $prev_nmi_watchdog > /proc/sys/kernel/nmi_watchdog +if [ -d "/sys/bus/event_source/devices/cpu" ]; then + echo $prev_rdpmc > /sys/bus/event_source/devices/cpu/rdpmc +else + echo $prev_rdpmc_atom > /sys/bus/event_source/devices/cpu_atom/rdpmc + echo $prev_rdpmc_core > /sys/bus/event_source/devices/cpu_core/rdpmc +fi + if [[ $msr_prev_loaded == 0 ]]; then modprobe -r msr fi