mirror of
https://github.com/andreas-abel/nanoBench.git
synced 2025-12-13 10:10:04 +01:00
support for Goldmont
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# Based on https://download.01.org/perfmon/GLM/goldmont_core_v13.json
|
||||
# Based on https://download.01.org/perfmon/GLP/goldmontplus_core_v1.01.json
|
||||
# Applies to processors with family-model in {6-7A}
|
||||
|
||||
3C.00 CORE_CYCLES
|
||||
|
||||
251
configs/cfg_Goldmont_all_core.txt
Normal file
251
configs/cfg_Goldmont_all_core.txt
Normal file
@@ -0,0 +1,251 @@
|
||||
# Based on https://download.01.org/perfmon/GLM/goldmont_core_v13.json
|
||||
# Applies to processors with family-model in {6-5C, 6-5F}
|
||||
|
||||
# Loads blocked due to store data not ready (Precise event capable)
|
||||
03.01 LD_BLOCKS.DATA_UNKNOWN
|
||||
|
||||
# Loads blocked due to store forward restriction (Precise event capable)
|
||||
03.02 LD_BLOCKS.STORE_FORWARD
|
||||
|
||||
# Loads blocked because address has 4k partial address false dependence (Precise event capable)
|
||||
03.04 LD_BLOCKS.4K_ALIAS
|
||||
|
||||
# Loads blocked because address in not in the UTLB (Precise event capable)
|
||||
03.08 LD_BLOCKS.UTLB_MISS
|
||||
|
||||
# Loads blocked (Precise event capable)
|
||||
03.10 LD_BLOCKS.ALL_BLOCK
|
||||
|
||||
# Duration of D-side page-walks in cycles
|
||||
05.01 PAGE_WALKS.D_SIDE_CYCLES
|
||||
|
||||
# Duration of I-side pagewalks in cycles
|
||||
05.02 PAGE_WALKS.I_SIDE_CYCLES
|
||||
|
||||
# Duration of page-walks in cycles
|
||||
05.03 PAGE_WALKS.CYCLES
|
||||
|
||||
# Uops issued to the back end per cycle
|
||||
0E.00 UOPS_ISSUED.ANY
|
||||
|
||||
# Load uops that split a page (Precise event capable)
|
||||
13.02 MISALIGN_MEM_REF.LOAD_PAGE_SPLIT
|
||||
|
||||
# Store uops that split a page (Precise event capable)
|
||||
13.04 MISALIGN_MEM_REF.STORE_PAGE_SPLIT
|
||||
|
||||
# L2 cache request misses
|
||||
2E.41 LONGEST_LAT_CACHE.MISS
|
||||
|
||||
# L2 cache requests
|
||||
2E.4F LONGEST_LAT_CACHE.REFERENCE
|
||||
|
||||
# Requests rejected by the XQ
|
||||
30.00 L2_REJECT_XQ.ALL
|
||||
|
||||
# Requests rejected by the L2Q
|
||||
31.00 CORE_REJECT_L2Q.ALL
|
||||
|
||||
# Core cycles when core is not halted
|
||||
3C.00 CPU_CLK_UNHALTED.CORE_P
|
||||
|
||||
# Reference cycles when core is not halted
|
||||
3C.01 CPU_CLK_UNHALTED.REF
|
||||
|
||||
# L1 Cache evictions for dirty data
|
||||
51.01 DL1.DIRTY_EVICTION
|
||||
|
||||
# References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture
|
||||
80.01 ICACHE.HIT
|
||||
|
||||
# References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture
|
||||
80.02 ICACHE.MISSES
|
||||
|
||||
# References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture
|
||||
80.03 ICACHE.ACCESSES
|
||||
|
||||
# ITLB misses
|
||||
81.04 ITLB.MISS
|
||||
|
||||
# Cycles code-fetch stalled due to any reason.
|
||||
86.00 FETCH_STALL.ALL
|
||||
|
||||
# Cycles code-fetch stalled due to an outstanding ITLB miss.
|
||||
86.01 FETCH_STALL.ITLB_FILL_PENDING_CYCLES
|
||||
|
||||
# Cycles code-fetch stalled due to an outstanding ICache miss.
|
||||
86.02 FETCH_STALL.ICACHE_FILL_PENDING_CYCLES
|
||||
|
||||
# Uops requested but not-delivered to the back-end per cycle
|
||||
9C.00 UOPS_NOT_DELIVERED.ANY
|
||||
|
||||
# Instructions retired (Precise event capable)
|
||||
C0.00 INST_RETIRED.ANY_P
|
||||
|
||||
# Uops retired (Precise event capable)
|
||||
C2.00 UOPS_RETIRED.ANY
|
||||
|
||||
# MS uops retired (Precise event capable)
|
||||
C2.01 UOPS_RETIRED.MS
|
||||
|
||||
# Floating point divide uops retired. (Precise Event Capable)
|
||||
C2.08 UOPS_RETIRED.FPDIV
|
||||
|
||||
# Integer divide uops retired. (Precise Event Capable)
|
||||
C2.10 UOPS_RETIRED.IDIV
|
||||
|
||||
# All machine clears
|
||||
C3.00 MACHINE_CLEARS.ALL
|
||||
|
||||
# Self-Modifying Code detected
|
||||
C3.01 MACHINE_CLEARS.SMC
|
||||
|
||||
# Machine clears due to memory ordering issue
|
||||
C3.02 MACHINE_CLEARS.MEMORY_ORDERING
|
||||
|
||||
# Machine clears due to FP assists
|
||||
C3.04 MACHINE_CLEARS.FP_ASSIST
|
||||
|
||||
# Machine clears due to memory disambiguation
|
||||
C3.08 MACHINE_CLEARS.DISAMBIGUATION
|
||||
|
||||
# Retired branch instructions (Precise event capable)
|
||||
C4.00 BR_INST_RETIRED.ALL_BRANCHES
|
||||
|
||||
# Retired conditional branch instructions (Precise event capable)
|
||||
C4.7E BR_INST_RETIRED.JCC
|
||||
|
||||
# Retired taken branch instructions (Precise event capable)
|
||||
C4.80 BR_INST_RETIRED.ALL_TAKEN_BRANCHES
|
||||
|
||||
# Retired far branch instructions (Precise event capable)
|
||||
C4.BF BR_INST_RETIRED.FAR_BRANCH
|
||||
|
||||
# Retired instructions of near indirect Jmp or call (Precise event capable)
|
||||
C4.EB BR_INST_RETIRED.NON_RETURN_IND
|
||||
|
||||
# Retired near return instructions (Precise event capable)
|
||||
C4.F7 BR_INST_RETIRED.RETURN
|
||||
|
||||
# Retired near call instructions (Precise event capable)
|
||||
C4.F9 BR_INST_RETIRED.CALL
|
||||
|
||||
# Retired near indirect call instructions (Precise event capable)
|
||||
C4.FB BR_INST_RETIRED.IND_CALL
|
||||
|
||||
# Retired near relative call instructions (Precise event capable)
|
||||
C4.FD BR_INST_RETIRED.REL_CALL
|
||||
|
||||
# Retired conditional branch instructions that were taken (Precise event capable)
|
||||
C4.FE BR_INST_RETIRED.TAKEN_JCC
|
||||
|
||||
# Retired mispredicted branch instructions (Precise event capable)
|
||||
C5.00 BR_MISP_RETIRED.ALL_BRANCHES
|
||||
|
||||
# Retired mispredicted conditional branch instructions (Precise event capable)
|
||||
C5.7E BR_MISP_RETIRED.JCC
|
||||
|
||||
# Retired mispredicted instructions of near indirect Jmp or near indirect call. (Precise event capable)
|
||||
C5.EB BR_MISP_RETIRED.NON_RETURN_IND
|
||||
|
||||
# Retired mispredicted near return instructions (Precise event capable)
|
||||
C5.F7 BR_MISP_RETIRED.RETURN
|
||||
|
||||
# Retired mispredicted near indirect call instructions (Precise event capable)
|
||||
C5.FB BR_MISP_RETIRED.IND_CALL
|
||||
|
||||
# Retired mispredicted conditional branch instructions that were taken (Precise event capable)
|
||||
C5.FE BR_MISP_RETIRED.TAKEN_JCC
|
||||
|
||||
# Unfilled issue slots per cycle
|
||||
CA.00 ISSUE_SLOTS_NOT_CONSUMED.ANY
|
||||
|
||||
# Unfilled issue slots per cycle because of a full resource in the backend
|
||||
CA.01 ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL
|
||||
|
||||
# Unfilled issue slots per cycle to recover
|
||||
CA.02 ISSUE_SLOTS_NOT_CONSUMED.RECOVERY
|
||||
|
||||
# Hardware interrupts received
|
||||
CB.01 HW_INTERRUPTS.RECEIVED
|
||||
|
||||
# Cycles hardware interrupts are masked
|
||||
CB.02 HW_INTERRUPTS.MASKED
|
||||
|
||||
# Cycles pending interrupts are masked
|
||||
CB.04 HW_INTERRUPTS.PENDING_AND_MASKED
|
||||
|
||||
# Cycles a divider is busy
|
||||
CD.00 CYCLES_DIV_BUSY.ALL
|
||||
|
||||
# Cycles the integer divide unit is busy
|
||||
CD.01 CYCLES_DIV_BUSY.IDIV
|
||||
|
||||
# Cycles the FP divide unit is busy
|
||||
CD.02 CYCLES_DIV_BUSY.FPDIV
|
||||
|
||||
# Load uops retired that missed the DTLB (Precise event capable)
|
||||
D0.11 MEM_UOPS_RETIRED.DTLB_MISS_LOADS
|
||||
|
||||
# Store uops retired that missed the DTLB (Precise event capable)
|
||||
D0.12 MEM_UOPS_RETIRED.DTLB_MISS_STORES
|
||||
|
||||
# Memory uops retired that missed the DTLB (Precise event capable)
|
||||
D0.13 MEM_UOPS_RETIRED.DTLB_MISS
|
||||
|
||||
# Locked load uops retired (Precise event capable)
|
||||
D0.21 MEM_UOPS_RETIRED.LOCK_LOADS
|
||||
|
||||
# Load uops retired that split a cache-line (Precise event capable)
|
||||
D0.41 MEM_UOPS_RETIRED.SPLIT_LOADS
|
||||
|
||||
# Stores uops retired that split a cache-line (Precise event capable)
|
||||
D0.42 MEM_UOPS_RETIRED.SPLIT_STORES
|
||||
|
||||
# Memory uops retired that split a cache-line (Precise event capable)
|
||||
D0.43 MEM_UOPS_RETIRED.SPLIT
|
||||
|
||||
# Load uops retired (Precise event capable)
|
||||
D0.81 MEM_UOPS_RETIRED.ALL_LOADS
|
||||
|
||||
# Store uops retired (Precise event capable)
|
||||
D0.82 MEM_UOPS_RETIRED.ALL_STORES
|
||||
|
||||
# Memory uops retired (Precise event capable)
|
||||
D0.83 MEM_UOPS_RETIRED.ALL
|
||||
|
||||
# Load uops retired that hit L1 data cache (Precise event capable)
|
||||
D1.01 MEM_LOAD_UOPS_RETIRED.L1_HIT
|
||||
|
||||
# Load uops retired that hit L2 (Precise event capable)
|
||||
D1.02 MEM_LOAD_UOPS_RETIRED.L2_HIT
|
||||
|
||||
# Load uops retired that missed L1 data cache (Precise event capable)
|
||||
D1.08 MEM_LOAD_UOPS_RETIRED.L1_MISS
|
||||
|
||||
# Load uops retired that missed L2 (Precise event capable)
|
||||
D1.10 MEM_LOAD_UOPS_RETIRED.L2_MISS
|
||||
|
||||
# Memory uop retired where cross core or cross module HITM occurred (Precise event capable)
|
||||
D1.20 MEM_LOAD_UOPS_RETIRED.HITM
|
||||
|
||||
# Loads retired that hit WCB (Precise event capable)
|
||||
D1.40 MEM_LOAD_UOPS_RETIRED.WCB_HIT
|
||||
|
||||
# Loads retired that came from DRAM (Precise event capable)
|
||||
D1.80 MEM_LOAD_UOPS_RETIRED.DRAM_HIT
|
||||
|
||||
# BACLEARs asserted for any branch type
|
||||
E6.01 BACLEARS.ALL
|
||||
|
||||
# BACLEARs asserted for return branch
|
||||
E6.08 BACLEARS.RETURN
|
||||
|
||||
# BACLEARs asserted for conditional branch
|
||||
E6.10 BACLEARS.COND
|
||||
|
||||
# MS decode starts
|
||||
E7.01 MS_DECODED.MS_ENTRY
|
||||
|
||||
# Decode restrictions due to predicting wrong instruction length
|
||||
E9.01 DECODE_RESTRICTION.PREDECODE_WRONG
|
||||
248
configs/cfg_Goldmont_all_offcore.txt
Normal file
248
configs/cfg_Goldmont_all_offcore.txt
Normal file
@@ -0,0 +1,248 @@
|
||||
# Based on https://download.01.org/perfmon/GLM/goldmont_core_v13.json
|
||||
# Applies to processors with family-model in {6-5C, 6-5F}
|
||||
|
||||
# Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.
|
||||
B7.01.MSR_RSP0=0x0000010400.TakenAlone OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE
|
||||
|
||||
# Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.
|
||||
B7.01.MSR_RSP0=0x0000018000.TakenAlone OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE
|
||||
|
||||
# Counts demand cacheable data reads of full cache lines that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000040001.TakenAlone OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT
|
||||
|
||||
# Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000040002.TakenAlone OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT
|
||||
|
||||
# Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000040004.TakenAlone OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT
|
||||
|
||||
# Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000040008.TakenAlone OFFCORE_RESPONSE.COREWB.L2_HIT
|
||||
|
||||
# Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000040010.TakenAlone OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT
|
||||
|
||||
# Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000040020.TakenAlone OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT
|
||||
|
||||
# Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000040022.TakenAlone OFFCORE_RESPONSE.ANY_RFO.L2_HIT
|
||||
|
||||
# Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000040800.TakenAlone OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT
|
||||
|
||||
# Counts data cache lines requests by software prefetch instructions that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000041000.TakenAlone OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT
|
||||
|
||||
# Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000042000.TakenAlone OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT
|
||||
|
||||
# Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000043010.TakenAlone OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT
|
||||
|
||||
# Counts data reads (demand & prefetch) that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000043091.TakenAlone OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT
|
||||
|
||||
# Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x00000432b7.TakenAlone OFFCORE_RESPONSE.ANY_READ.L2_HIT
|
||||
|
||||
# Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000044000.TakenAlone OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT
|
||||
|
||||
# Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000044800.TakenAlone OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT
|
||||
|
||||
# Counts requests to the uncore subsystem that hit the L2 cache.
|
||||
B7.01.MSR_RSP0=0x0000048000.TakenAlone OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT
|
||||
|
||||
# Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200000001.TakenAlone OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200000002.TakenAlone OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200000004.TakenAlone OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200000008.TakenAlone OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200000010.TakenAlone OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200000020.TakenAlone OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200000022.TakenAlone OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200000800.TakenAlone OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200001000.TakenAlone OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200002000.TakenAlone OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200003010.TakenAlone OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200003091.TakenAlone OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x02000032b7.TakenAlone OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200004000.TakenAlone OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module.
|
||||
B7.01.MSR_RSP0=0x0200008000.TakenAlone OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED
|
||||
|
||||
# Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400000001.TakenAlone OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400000002.TakenAlone OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400000004.TakenAlone OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400000008.TakenAlone OFFCORE_RESPONSE.COREWB.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400000010.TakenAlone OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400000020.TakenAlone OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400000022.TakenAlone OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400000800.TakenAlone OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400001000.TakenAlone OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400002000.TakenAlone OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400003010.TakenAlone OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400003091.TakenAlone OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x04000032b7.TakenAlone OFFCORE_RESPONSE.ANY_READ.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400004000.TakenAlone OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x0400008000.TakenAlone OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD
|
||||
|
||||
# Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000000001.TakenAlone OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000000002.TakenAlone OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000000008.TakenAlone OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000000010.TakenAlone OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000000020.TakenAlone OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000000022.TakenAlone OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000000800.TakenAlone OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000001000.TakenAlone OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000002000.TakenAlone OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000003010.TakenAlone OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000003091.TakenAlone OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x10000032b7.TakenAlone OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000004000.TakenAlone OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.
|
||||
B7.01.MSR_RSP0=0x1000008000.TakenAlone OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE
|
||||
|
||||
# Counts demand cacheable data reads of full cache lines that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000001.TakenAlone OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.ANY
|
||||
|
||||
# Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000002.TakenAlone OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.ANY
|
||||
|
||||
# Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000004.TakenAlone OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.ANY
|
||||
|
||||
# Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000008.TakenAlone OFFCORE_RESPONSE.COREWB.L2_MISS.ANY
|
||||
|
||||
# Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000010.TakenAlone OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.ANY
|
||||
|
||||
# Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000020.TakenAlone OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.ANY
|
||||
|
||||
# Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000022.TakenAlone OFFCORE_RESPONSE.ANY_RFO.L2_MISS.ANY
|
||||
|
||||
# Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000080.TakenAlone OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY
|
||||
|
||||
# Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000100.TakenAlone OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY
|
||||
|
||||
# Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600000800.TakenAlone OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY
|
||||
|
||||
# Counts data cache lines requests by software prefetch instructions that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600001000.TakenAlone OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY
|
||||
|
||||
# Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600002000.TakenAlone OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY
|
||||
|
||||
# Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600003010.TakenAlone OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.ANY
|
||||
|
||||
# Counts data reads (demand & prefetch) that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600003091.TakenAlone OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.ANY
|
||||
|
||||
# Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x36000032b7.TakenAlone OFFCORE_RESPONSE.ANY_READ.L2_MISS.ANY
|
||||
|
||||
# Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600004000.TakenAlone OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY
|
||||
|
||||
# Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.
|
||||
B7.01.MSR_RSP0=0x3600004800.TakenAlone OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY
|
||||
|
||||
# Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received.
|
||||
B7.01.MSR_RSP0=0x4000000001.TakenAlone OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING
|
||||
|
||||
# Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received.
|
||||
B7.01.MSR_RSP0=0x4000000002.TakenAlone OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING
|
||||
|
||||
# Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received.
|
||||
B7.01.MSR_RSP0=0x4000000004.TakenAlone OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING
|
||||
15
configs/cfg_Goldmont_common.txt
Normal file
15
configs/cfg_Goldmont_common.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
# Based on https://download.01.org/perfmon/GLM/goldmont_core_v13.json
|
||||
# Applies to processors with family-model in {6-5C, 6-5F}
|
||||
|
||||
3C.00 CORE_CYCLES
|
||||
C0.00 INST_RETIRED
|
||||
0E.00 UOPS_ISSUED.ANY
|
||||
C2.00 UOPS_RETIRED.ANY
|
||||
C2.01 UOPS_RETIRED.MS
|
||||
C4.00 BR_INST_RETIRED.ALL_BRANCHES
|
||||
C5.00 BR_MISP_RETIRED.ALL_BRANCHES
|
||||
D0.81 MEM_UOPS_RETIRED.ALL_LOADS
|
||||
D0.82 MEM_UOPS_RETIRED.ALL_STORES
|
||||
D1.01 MEM_LOAD_UOPS_RETIRED.L1_HIT
|
||||
D1.02 MEM_LOAD_UOPS_RETIRED.L2_HIT
|
||||
D1.80 MEM_LOAD_UOPS_RETIRED.DRAM_HIT
|
||||
@@ -22,19 +22,19 @@ def getEventConfig(event):
|
||||
if event == 'L1_HIT':
|
||||
if arch in ['Core', 'EnhancedCore']: return '40.0E ' + event # L1D_CACHE_LD.MES
|
||||
if arch in ['NHM', 'WSM']: return 'CB.01 ' + event
|
||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.01 ' + event
|
||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.01 ' + event
|
||||
if event == 'L1_MISS':
|
||||
if arch in ['Core', 'EnhancedCore']: return 'CB.01.CTR=0 ' + event
|
||||
if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.08 ' + event
|
||||
if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.08 ' + event
|
||||
if arch in ['ZEN+']: return '064.70 ' + event
|
||||
if event == 'L2_HIT':
|
||||
if arch in ['Core', 'EnhancedCore']: return '29.7E ' + event # L2_LD.THIS_CORE.ALL_INCL.MES
|
||||
if arch in ['NHM', 'WSM']: return 'CB.02 ' + event
|
||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.02 ' + event
|
||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.02 ' + event
|
||||
if arch in ['ZEN+']: return '064.70 ' + event
|
||||
if event == 'L2_MISS':
|
||||
if arch in ['Core', 'EnhancedCore']: return 'CB.04.CTR=0 ' + event
|
||||
if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P']: return 'D1.10 ' + event
|
||||
if arch in ['IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'TGL', 'ADL-P', 'GLM', 'GLP']: return 'D1.10 ' + event
|
||||
if arch in ['ZEN+']: return '064.08 ' + event
|
||||
if event == 'L3_HIT':
|
||||
if arch in ['NHM', 'WSM']: return 'CB.04 ' + event
|
||||
|
||||
@@ -208,7 +208,7 @@ def runExperiment(instrNode, instrCode, init=None, unrollCount=500, loopCount=0,
|
||||
if 'RDTSC' in evt: continue
|
||||
if evt == 'UOPS':
|
||||
if arch in ['CON', 'WOL']: evt = 'RS_UOPS_DISPATCHED'
|
||||
elif arch in ['NHM', 'WSM', 'GLP']: evt = 'UOPS_RETIRED.ANY'
|
||||
elif arch in ['NHM', 'WSM', 'GLM', 'GLP']: evt = 'UOPS_RETIRED.ANY'
|
||||
elif arch in ['SNB', 'ADL-E']: evt = 'UOPS_RETIRED.ALL'
|
||||
elif arch in ['HSW']: evt = 'UOPS_EXECUTED.CORE'
|
||||
elif arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P']: evt = 'UOPS_EXECUTED.THREAD'
|
||||
@@ -275,7 +275,7 @@ def getEventConfig(event):
|
||||
if arch in ['CON', 'WOL']: return 'A0.00' # RS_UOPS_DISPATCHED
|
||||
if arch in ['NHM', 'WSM', 'SNB' ]: return 'C2.01' # UOPS_RETIRED.ANY
|
||||
if arch in ['SNB']: return 'C2.01' # UOPS_RETIRED.ALL
|
||||
if arch in ['GLP', 'ADL-E']: return 'C2.00' # UOPS_RETIRED.ALL
|
||||
if arch in ['GLM', 'GLP', 'ADL-E']: return 'C2.00' # UOPS_RETIRED.ALL
|
||||
if arch in ['HSW']: return 'B1.02' # UOPS_EXECUTED.CORE; note: may undercount due to erratum HSD30
|
||||
if arch in ['IVB', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL', 'ADL-P']: return 'B1.01' # UOPS_EXECUTED.THREAD
|
||||
if arch in ['ZEN+', 'ZEN2', 'ZEN3']: return '0C1.00'
|
||||
@@ -289,7 +289,7 @@ def getEventConfig(event):
|
||||
if arch in ['NHM', 'WSM']: return 'D1.02'
|
||||
if arch in ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', 'CNL', 'ICL', 'CLX', 'TGL', 'RKL']: return '79.30'
|
||||
if arch in ['ADL-P']: return '79.20'
|
||||
if arch in ['GLP', 'ADL-E']: return 'C2.01'
|
||||
if arch in ['GLM', 'GLP', 'ADL-E']: return 'C2.01'
|
||||
if event == 'UOPS_PORT_0':
|
||||
if arch in ['CON', 'WOL']: return 'A1.01.CTR=0'
|
||||
if arch in ['NHM', 'WSM']: return 'B1.01'
|
||||
|
||||
Reference in New Issue
Block a user