# Based on https://download.01.org/perfmon/NHM-EP/NehalemEP_core_V2.json # Applies to processors with family-model in {6-1E, 6-1F, 6-1A} # All Store buffer stall cycles 04.07 SB_DRAIN.ANY # Loads delayed with at-Retirement block code 06.04 STORE_BLOCKS.AT_RET # Cacheable loads delayed with L1D block code 06.08 STORE_BLOCKS.L1D_BLOCK # False dependencies due to partial address aliasing 07.01 PARTIAL_ADDRESS_ALIAS # DTLB load misses 08.01 DTLB_LOAD_MISSES.ANY # DTLB load miss page walks complete 08.02 DTLB_LOAD_MISSES.WALK_COMPLETED # DTLB second level hit 08.10 DTLB_LOAD_MISSES.STLB_HIT # DTLB load miss caused by low part of address 08.20 DTLB_LOAD_MISSES.PDE_MISS # Instructions retired which contains a load (Precise Event) 0B.01 MEM_INST_RETIRED.LOADS # Instructions retired which contains a store (Precise Event) 0B.02 MEM_INST_RETIRED.STORES # Memory instructions retired above 0 clocks (Precise Event) 0B.10.MSR_3F6H=0x0.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0 # Memory instructions retired above 16 clocks (Precise Event) 0B.10.MSR_3F6H=0x10.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16 # Memory instructions retired above 256 clocks (Precise Event) 0B.10.MSR_3F6H=0x100.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256 # Memory instructions retired above 4096 clocks (Precise Event) 0B.10.MSR_3F6H=0x1000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096 # Memory instructions retired above 32 clocks (Precise Event) 0B.10.MSR_3F6H=0x20.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32 # Memory instructions retired above 512 clocks (Precise Event) 0B.10.MSR_3F6H=0x200.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512 # Memory instructions retired above 8192 clocks (Precise Event) 0B.10.MSR_3F6H=0x2000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192 # Memory instructions retired above 4 clocks (Precise Event) 0B.10.MSR_3F6H=0x4.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4 # Memory instructions retired above 64 clocks (Precise Event) 0B.10.MSR_3F6H=0x40.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64 # Memory instructions retired above 1024 clocks (Precise Event) 0B.10.MSR_3F6H=0x400.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024 # Memory instructions retired above 16384 clocks (Precise Event) 0B.10.MSR_3F6H=0x4000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384 # Memory instructions retired above 8 clocks (Precise Event) 0B.10.MSR_3F6H=0x8.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8 # Memory instructions retired above 128 clocks (Precise Event) 0B.10.MSR_3F6H=0x80.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128 # Memory instructions retired above 2048 clocks (Precise Event) 0B.10.MSR_3F6H=0x800.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048 # Memory instructions retired above 32768 clocks (Precise Event) 0B.10.MSR_3F6H=0x8000.CTR=3.TakenAlone MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768 # Retired stores that miss the DTLB (Precise Event) 0C.01 MEM_STORE_RETIRED.DTLB_MISS # Uops issued 0E.01 UOPS_ISSUED.ANY # Cycles Uops were issued on either thread 0E.01.CMSK=1.AnyT UOPS_ISSUED.CYCLES_ALL_THREADS # Cycles no Uops were issued on any thread 0E.01.CMSK=1.AnyT.INV UOPS_ISSUED.CORE_STALL_CYCLES # Cycles no Uops were issued 0E.01.CMSK=1.INV UOPS_ISSUED.STALL_CYCLES # Fused Uops issued 0E.02 UOPS_ISSUED.FUSED # Load instructions retired that HIT modified data in sibling core (Precise Event) 0F.02 MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM # Load instructions retired remote cache HIT data source (Precise Event) 0F.08 MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT # Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event) 0F.10 MEM_UNCORE_RETIRED.REMOTE_DRAM # Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event) 0F.20 MEM_UNCORE_RETIRED.LOCAL_DRAM # Load instructions retired IO (Precise Event) 0F.80 MEM_UNCORE_RETIRED.UNCACHEABLE # Computational floating-point operations executed 10.01 FP_COMP_OPS_EXE.X87 # MMX Uops 10.02 FP_COMP_OPS_EXE.MMX # SSE and SSE2 FP Uops 10.04 FP_COMP_OPS_EXE.SSE_FP # SSE2 integer Uops 10.08 FP_COMP_OPS_EXE.SSE2_INTEGER # SSE FP packed Uops 10.10 FP_COMP_OPS_EXE.SSE_FP_PACKED # SSE FP scalar Uops 10.20 FP_COMP_OPS_EXE.SSE_FP_SCALAR # SSE* FP single precision Uops 10.40 FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION # SSE* FP double precision Uops 10.80 FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION # 128 bit SIMD integer multiply operations 12.01 SIMD_INT_128.PACKED_MPY # 128 bit SIMD integer shift operations 12.02 SIMD_INT_128.PACKED_SHIFT # 128 bit SIMD integer pack operations 12.04 SIMD_INT_128.PACK # 128 bit SIMD integer unpack operations 12.08 SIMD_INT_128.UNPACK # 128 bit SIMD integer logical operations 12.10 SIMD_INT_128.PACKED_LOGICAL # 128 bit SIMD integer arithmetic operations 12.20 SIMD_INT_128.PACKED_ARITH # 128 bit SIMD integer shuffle/move operations 12.40 SIMD_INT_128.SHUFFLE_MOVE # Loads dispatched that bypass the MOB 13.01 LOAD_DISPATCH.RS # Loads dispatched from stage 305 13.02 LOAD_DISPATCH.RS_DELAYED # Loads dispatched from the MOB 13.04 LOAD_DISPATCH.MOB # All loads dispatched 13.07 LOAD_DISPATCH.ANY # Cycles the divider is busy 14.01 ARITH.CYCLES_DIV_BUSY # Divide Operations executed 14.01.CMSK=1.EDG.INV ARITH.DIV # Multiply operations executed 14.02 ARITH.MUL # Instructions written to instruction queue. 17.01 INST_QUEUE_WRITES # Instructions that must be decoded by decoder 0 18.01 INST_DECODED.DEC0 # Two Uop instructions decoded 19.01 TWO_UOP_INSTS_DECODED # Cycles instructions are written to the instruction queue 1E.01 INST_QUEUE_WRITE_CYCLES # Loops that can't stream from the instruction queue 20.01 LSD_OVERFLOW # L2 load hits 24.01 L2_RQSTS.LD_HIT # L2 load misses 24.02 L2_RQSTS.LD_MISS # L2 requests 24.03 L2_RQSTS.LOADS # L2 RFO hits 24.04 L2_RQSTS.RFO_HIT # L2 RFO misses 24.08 L2_RQSTS.RFO_MISS # L2 RFO requests 24.0C L2_RQSTS.RFOS # L2 instruction fetch hits 24.10 L2_RQSTS.IFETCH_HIT # L2 instruction fetch misses 24.20 L2_RQSTS.IFETCH_MISS # L2 instruction fetches 24.30 L2_RQSTS.IFETCHES # L2 prefetch hits 24.40 L2_RQSTS.PREFETCH_HIT # L2 prefetch misses 24.80 L2_RQSTS.PREFETCH_MISS # All L2 misses 24.AA L2_RQSTS.MISS # All L2 prefetches 24.C0 L2_RQSTS.PREFETCHES # All L2 requests 24.FF L2_RQSTS.REFERENCES # L2 data demand loads in I state (misses) 26.01 L2_DATA_RQSTS.DEMAND.I_STATE # L2 data demand loads in S state 26.02 L2_DATA_RQSTS.DEMAND.S_STATE # L2 data demand loads in E state 26.04 L2_DATA_RQSTS.DEMAND.E_STATE # L2 data demand loads in M state 26.08 L2_DATA_RQSTS.DEMAND.M_STATE # L2 data demand requests 26.0F L2_DATA_RQSTS.DEMAND.MESI # L2 data prefetches in the I state (misses) 26.10 L2_DATA_RQSTS.PREFETCH.I_STATE # L2 data prefetches in the S state 26.20 L2_DATA_RQSTS.PREFETCH.S_STATE # L2 data prefetches in E state 26.40 L2_DATA_RQSTS.PREFETCH.E_STATE # L2 data prefetches in M state 26.80 L2_DATA_RQSTS.PREFETCH.M_STATE # All L2 data prefetches 26.F0 L2_DATA_RQSTS.PREFETCH.MESI # All L2 data requests 26.FF L2_DATA_RQSTS.ANY # L2 demand store RFOs in I state (misses) 27.01 L2_WRITE.RFO.I_STATE # L2 demand store RFOs in S state 27.02 L2_WRITE.RFO.S_STATE # L2 demand store RFOs in M state 27.08 L2_WRITE.RFO.M_STATE # All L2 demand store RFOs that hit the cache 27.0E L2_WRITE.RFO.HIT # All L2 demand store RFOs 27.0F L2_WRITE.RFO.MESI # L2 demand lock RFOs in I state (misses) 27.10 L2_WRITE.LOCK.I_STATE # L2 demand lock RFOs in S state 27.20 L2_WRITE.LOCK.S_STATE # L2 demand lock RFOs in E state 27.40 L2_WRITE.LOCK.E_STATE # L2 demand lock RFOs in M state 27.80 L2_WRITE.LOCK.M_STATE # All demand L2 lock RFOs that hit the cache 27.E0 L2_WRITE.LOCK.HIT # All demand L2 lock RFOs 27.F0 L2_WRITE.LOCK.MESI # L1 writebacks to L2 in I state (misses) 28.01 L1D_WB_L2.I_STATE # L1 writebacks to L2 in S state 28.02 L1D_WB_L2.S_STATE # L1 writebacks to L2 in E state 28.04 L1D_WB_L2.E_STATE # L1 writebacks to L2 in M state 28.08 L1D_WB_L2.M_STATE # All L1 writebacks to L2 28.0F L1D_WB_L2.MESI # Longest latency cache miss 2E.41 LONGEST_LAT_CACHE.MISS # Longest latency cache reference 2E.4F LONGEST_LAT_CACHE.REFERENCE # Cycles when thread is not halted (programmable counter) 3C.00 CPU_CLK_UNHALTED.THREAD_P # Total CPU cycles 3C.00.CMSK=2.INV CPU_CLK_UNHALTED.TOTAL_CYCLES # Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter) 3C.01 CPU_CLK_UNHALTED.REF_P # L1 data cache read in I state (misses) 40.01.CTR=0 L1D_CACHE_LD.I_STATE # L1 data cache read in S state 40.02.CTR=0 L1D_CACHE_LD.S_STATE # L1 data cache read in E state 40.04.CTR=0 L1D_CACHE_LD.E_STATE # L1 data cache read in M state 40.08.CTR=0 L1D_CACHE_LD.M_STATE # L1 data cache reads 40.0F.CTR=0 L1D_CACHE_LD.MESI # L1 data cache stores in S state 41.02.CTR=0 L1D_CACHE_ST.S_STATE # L1 data cache stores in E state 41.04.CTR=0 L1D_CACHE_ST.E_STATE # L1 data cache stores in M state 41.08.CTR=0 L1D_CACHE_ST.M_STATE # L1 data cache load lock hits 42.01.CTR=0 L1D_CACHE_LOCK.HIT # L1 data cache load locks in S state 42.02.CTR=0 L1D_CACHE_LOCK.S_STATE # L1 data cache load locks in E state 42.04.CTR=0 L1D_CACHE_LOCK.E_STATE # L1 data cache load locks in M state 42.08.CTR=0 L1D_CACHE_LOCK.M_STATE # All references to the L1 data cache 43.01.CTR=0 L1D_ALL_REF.ANY # L1 data cacheable reads and writes 43.02.CTR=0 L1D_ALL_REF.CACHEABLE # DTLB misses 49.01 DTLB_MISSES.ANY # DTLB miss page walks 49.02 DTLB_MISSES.WALK_COMPLETED # DTLB first level misses but second level hit 49.10 DTLB_MISSES.STLB_HIT # Load operations conflicting with software prefetches 4C.01.CTR=0 LOAD_HIT_PRE # L1D hardware prefetch requests 4E.01.CTR=0 L1D_PREFETCH.REQUESTS # L1D hardware prefetch misses 4E.02.CTR=0 L1D_PREFETCH.MISS # L1D hardware prefetch requests triggered 4E.04.CTR=0 L1D_PREFETCH.TRIGGERS # L1 data cache lines allocated 51.01.CTR=0 L1D.REPL # L1D cache lines allocated in the M state 51.02.CTR=0 L1D.M_REPL # L1D cache lines replaced in M state 51.04.CTR=0 L1D.M_EVICT # L1D snoop eviction of cache lines in M state 51.08.CTR=0 L1D.M_SNOOP_EVICT # L1D prefetch load lock accepted in fill buffer 52.01.CTR=0 L1D_CACHE_PREFETCH_LOCK_FB_HIT # L1D load lock accepted in fill buffer 53.01.CTR=0 L1D_CACHE_LOCK_FB_HIT # Cycles L1D and L2 locked 63.01.CTR=0 CACHE_LOCK_CYCLES.L1D_L2 # Cycles L1D locked 63.02.CTR=0 CACHE_LOCK_CYCLES.L1D # I/O transactions 6C.01 IO_TRANSACTIONS # L1I instruction fetch hits 80.01 L1I.HITS # L1I instruction fetch misses 80.02 L1I.MISSES # L1I Instruction fetches 80.03 L1I.READS # L1I instruction fetch stall cycles 80.04 L1I.CYCLES_STALLED # Large ITLB hit 82.01 LARGE_ITLB.HIT # ITLB miss 85.01 ITLB_MISSES.ANY # ITLB miss page walks 85.02 ITLB_MISSES.WALK_COMPLETED # Length Change Prefix stall cycles 87.01 ILD_STALL.LCP # Stall cycles due to BPU MRU bypass 87.02 ILD_STALL.MRU # Instruction Queue full stall cycles 87.04 ILD_STALL.IQ_FULL # Regen stall cycles 87.08 ILD_STALL.REGEN # Any Instruction Length Decoder stall cycles 87.0F ILD_STALL.ANY # Conditional branch instructions executed 88.01 BR_INST_EXEC.COND # Unconditional branches executed 88.02 BR_INST_EXEC.DIRECT # Indirect non call branches executed 88.04 BR_INST_EXEC.INDIRECT_NON_CALL # All non call branches executed 88.07 BR_INST_EXEC.NON_CALLS # Indirect return branches executed 88.08 BR_INST_EXEC.RETURN_NEAR # Unconditional call branches executed 88.10 BR_INST_EXEC.DIRECT_NEAR_CALL # Indirect call branches executed 88.20 BR_INST_EXEC.INDIRECT_NEAR_CALL # Call branches executed 88.30 BR_INST_EXEC.NEAR_CALLS # Taken branches executed 88.40 BR_INST_EXEC.TAKEN # Branch instructions executed 88.7F BR_INST_EXEC.ANY # Mispredicted conditional branches executed 89.01 BR_MISP_EXEC.COND # Mispredicted unconditional branches executed 89.02 BR_MISP_EXEC.DIRECT # Mispredicted indirect non call branches executed 89.04 BR_MISP_EXEC.INDIRECT_NON_CALL # Mispredicted non call branches executed 89.07 BR_MISP_EXEC.NON_CALLS # Mispredicted return branches executed 89.08 BR_MISP_EXEC.RETURN_NEAR # Mispredicted non call branches executed 89.10 BR_MISP_EXEC.DIRECT_NEAR_CALL # Mispredicted indirect call branches executed 89.20 BR_MISP_EXEC.INDIRECT_NEAR_CALL # Mispredicted call branches executed 89.30 BR_MISP_EXEC.NEAR_CALLS # Mispredicted taken branches executed 89.40 BR_MISP_EXEC.TAKEN # Mispredicted branches executed 89.7F BR_MISP_EXEC.ANY # Resource related stall cycles A2.01 RESOURCE_STALLS.ANY # Load buffer stall cycles A2.02 RESOURCE_STALLS.LOAD # Reservation Station full stall cycles A2.04 RESOURCE_STALLS.RS_FULL # Store buffer stall cycles A2.08 RESOURCE_STALLS.STORE # ROB full stall cycles A2.10 RESOURCE_STALLS.ROB_FULL # FPU control word write stall cycles A2.20 RESOURCE_STALLS.FPCW # MXCSR rename stall cycles A2.40 RESOURCE_STALLS.MXCSR # Other Resource related stall cycles A2.80 RESOURCE_STALLS.OTHER # Macro-fused instructions decoded A6.01 MACRO_INSTS.FUSIONS_DECODED # Instruction queue forced BACLEAR A7.01 BACLEAR_FORCE_IQ # Cycles when uops were delivered by the LSD A8.01.CMSK=1 LSD.ACTIVE # Cycles no uops were delivered by the LSD A8.01.CMSK=1.INV LSD.INACTIVE # ITLB flushes AE.01 ITLB_FLUSH # Offcore L1 data cache writebacks B0.40 OFFCORE_REQUESTS.L1D_WRITEBACK # Uops executed on port 0 B1.01 UOPS_EXECUTED.PORT0 # Uops executed on port 1 B1.02 UOPS_EXECUTED.PORT1 # Uops executed on port 2 (core count) B1.04.AnyT UOPS_EXECUTED.PORT2_CORE # Uops executed on port 3 (core count) B1.08.AnyT UOPS_EXECUTED.PORT3_CORE # Uops executed on port 4 (core count) B1.10.AnyT UOPS_EXECUTED.PORT4_CORE # Cycles Uops executed on ports 0-4 (core count) B1.1F.CMSK=1.AnyT UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5 # Uops executed on ports 0-4 (core count) B1.1F.CMSK=1.AnyT.EDG.INV UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5 # Cycles no Uops issued on ports 0-4 (core count) B1.1F.CMSK=1.AnyT.INV UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5 # Uops executed on port 5 B1.20 UOPS_EXECUTED.PORT5 # Cycles Uops executed on any port (core count) B1.3F.CMSK=1.AnyT UOPS_EXECUTED.CORE_ACTIVE_CYCLES # Uops executed on any port (core count) B1.3F.CMSK=1.AnyT.EDG.INV UOPS_EXECUTED.CORE_STALL_COUNT # Cycles no Uops issued on any port (core count) B1.3F.CMSK=1.AnyT.INV UOPS_EXECUTED.CORE_STALL_CYCLES # Uops issued on ports 0, 1 or 5 B1.40 UOPS_EXECUTED.PORT015 # Cycles no Uops issued on ports 0, 1 or 5 B1.40.CMSK=1.INV UOPS_EXECUTED.PORT015_STALL_CYCLES # Uops issued on ports 2, 3 or 4 B1.80.AnyT UOPS_EXECUTED.PORT234_CORE # Offcore requests blocked due to Super Queue full B2.01 OFFCORE_REQUESTS_SQ_FULL # Thread responded HIT to snoop B8.01 SNOOP_RESPONSE.HIT # Thread responded HITE to snoop B8.02 SNOOP_RESPONSE.HITE # Thread responded HITM to snoop B8.04 SNOOP_RESPONSE.HITM # Instructions retired (Programmable counter and Precise Event) C0.01 INST_RETIRED.ANY_P # Total cycles (Precise Event) C0.01.CMSK=16.INV INST_RETIRED.TOTAL_CYCLES # Total cycles (Precise Event) C0.01.CMSK=16.INV INST_RETIRED.TOTAL_CYCLES_PS # Retired floating-point operations (Precise Event) C0.02 INST_RETIRED.X87 # Retired MMX instructions (Precise Event) C0.04 INST_RETIRED.MMX # Uops retired (Precise Event) C2.01 UOPS_RETIRED.ANY # Cycles Uops are being retired C2.01.CMSK=1 UOPS_RETIRED.ACTIVE_CYCLES # Cycles Uops are not retiring (Precise Event) C2.01.CMSK=1.INV UOPS_RETIRED.STALL_CYCLES # Total cycles using precise uop retired event (Precise Event) C2.01.CMSK=16.INV UOPS_RETIRED.TOTAL_CYCLES # Retirement slots used (Precise Event) C2.02 UOPS_RETIRED.RETIRE_SLOTS # Macro-fused Uops retired (Precise Event) C2.04 UOPS_RETIRED.MACRO_FUSED # Cycles machine clear asserted C3.01 MACHINE_CLEARS.CYCLES # Execution pipeline restart due to Memory ordering conflicts C3.02 MACHINE_CLEARS.MEM_ORDER # Self-Modifying Code detected C3.04 MACHINE_CLEARS.SMC # Retired conditional branch instructions (Precise Event) C4.01 BR_INST_RETIRED.CONDITIONAL # Retired near call instructions (Precise Event) C4.02 BR_INST_RETIRED.NEAR_CALL # Retired branch instructions (Precise Event) C4.04 BR_INST_RETIRED.ALL_BRANCHES # Mispredicted near retired calls (Precise Event) C5.02 BR_MISP_RETIRED.NEAR_CALL # SIMD Packed-Single Uops retired (Precise Event) C7.01 SSEX_UOPS_RETIRED.PACKED_SINGLE # SIMD Scalar-Single Uops retired (Precise Event) C7.02 SSEX_UOPS_RETIRED.SCALAR_SINGLE # SIMD Packed-Double Uops retired (Precise Event) C7.04 SSEX_UOPS_RETIRED.PACKED_DOUBLE # SIMD Scalar-Double Uops retired (Precise Event) C7.08 SSEX_UOPS_RETIRED.SCALAR_DOUBLE # SIMD Vector Integer Uops retired (Precise Event) C7.10 SSEX_UOPS_RETIRED.VECTOR_INTEGER # Retired instructions that missed the ITLB (Precise Event) C8.20 ITLB_MISS_RETIRED # Retired loads that hit the L1 data cache (Precise Event) CB.01 MEM_LOAD_RETIRED.L1D_HIT # Retired loads that hit the L2 cache (Precise Event) CB.02 MEM_LOAD_RETIRED.L2_HIT # Retired loads that hit valid versions in the LLC cache (Precise Event) CB.04 MEM_LOAD_RETIRED.LLC_UNSHARED_HIT # Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event) CB.08 MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM # Retired loads that miss the LLC cache (Precise Event) CB.10 MEM_LOAD_RETIRED.LLC_MISS # Retired loads that miss L1D and hit an previously allocated LFB (Precise Event) CB.40 MEM_LOAD_RETIRED.HIT_LFB # Retired loads that miss the DTLB (Precise Event) CB.80 MEM_LOAD_RETIRED.DTLB_MISS # Transitions from MMX to Floating Point instructions CC.01 FP_MMX_TRANS.TO_FP # Transitions from Floating Point to MMX instructions CC.02 FP_MMX_TRANS.TO_MMX # All Floating Point to and from MMX transitions CC.03 FP_MMX_TRANS.ANY # Instructions decoded D0.01 MACRO_INSTS.DECODED # Cycles no Uops are decoded D1.01.CMSK=1.INV UOPS_DECODED.STALL_CYCLES # Uops decoded by Microcode Sequencer D1.02.CMSK=1 UOPS_DECODED.MS_CYCLES_ACTIVE # Stack pointer instructions decoded D1.04 UOPS_DECODED.ESP_FOLDING # Stack pointer sync operations D1.08 UOPS_DECODED.ESP_SYNC # Flag stall cycles D2.01 RAT_STALLS.FLAGS # Partial register stall cycles D2.02 RAT_STALLS.REGISTERS # ROB read port stalls cycles D2.04 RAT_STALLS.ROB_READ_PORT # Scoreboard stall cycles D2.08 RAT_STALLS.SCOREBOARD # All RAT stall cycles D2.0F RAT_STALLS.ANY # Segment rename stall cycles D4.01 SEG_RENAME_STALLS # ES segment renames D5.01 ES_REG_RENAMES # Uop unfusions due to FP exceptions DB.01 UOP_UNFUSION # Branch instructions decoded E0.01 BR_INST_DECODED # Branch prediction unit missed call or return E5.01 BPU_MISSED_CALL_RET # BACLEAR asserted, regardless of cause E6.01 BACLEAR.CLEAR # BACLEAR asserted with bad target address E6.02 BACLEAR.BAD_TARGET # Early Branch Prediciton Unit clears E8.01 BPU_CLEARS.EARLY # Late Branch Prediction Unit clears E8.02 BPU_CLEARS.LATE # L2 Load transactions F0.01 L2_TRANSACTIONS.LOAD # L2 RFO transactions F0.02 L2_TRANSACTIONS.RFO # L2 instruction fetch transactions F0.04 L2_TRANSACTIONS.IFETCH # L2 prefetch transactions F0.08 L2_TRANSACTIONS.PREFETCH # L1D writeback to L2 transactions F0.10 L2_TRANSACTIONS.L1D_WB # L2 fill transactions F0.20 L2_TRANSACTIONS.FILL # L2 writeback to LLC transactions F0.40 L2_TRANSACTIONS.WB # All L2 transactions F0.80 L2_TRANSACTIONS.ANY # L2 lines allocated in the S state F1.02 L2_LINES_IN.S_STATE # L2 lines allocated in the E state F1.04 L2_LINES_IN.E_STATE # L2 lines alloacated F1.07 L2_LINES_IN.ANY # L2 lines evicted by a demand request F2.01 L2_LINES_OUT.DEMAND_CLEAN # L2 modified lines evicted by a demand request F2.02 L2_LINES_OUT.DEMAND_DIRTY # L2 lines evicted by a prefetch request F2.04 L2_LINES_OUT.PREFETCH_CLEAN # L2 modified lines evicted by a prefetch request F2.08 L2_LINES_OUT.PREFETCH_DIRTY # L2 lines evicted F2.0F L2_LINES_OUT.ANY # Super Queue lock splits across a cache line F4.10 SQ_MISC.SPLIT_LOCK # Super Queue full stall cycles F6.01 SQ_FULL_STALL_CYCLES # X87 Floating point assists (Precise Event) F7.01 FP_ASSIST.ALL # X87 Floating point assists for invalid output value (Precise Event) F7.02 FP_ASSIST.OUTPUT # X87 Floating poiint assists for invalid input value (Precise Event) F7.04 FP_ASSIST.INPUT # SIMD integer 64 bit packed multiply operations FD.01 SIMD_INT_64.PACKED_MPY # SIMD integer 64 bit shift operations FD.02 SIMD_INT_64.PACKED_SHIFT # SIMD integer 64 bit pack operations FD.04 SIMD_INT_64.PACK # SIMD integer 64 bit unpack operations FD.08 SIMD_INT_64.UNPACK # SIMD integer 64 bit logical operations FD.10 SIMD_INT_64.PACKED_LOGICAL # SIMD integer 64 bit arithmetic operations FD.20 SIMD_INT_64.PACKED_ARITH # SIMD integer 64 bit shuffle/move operations FD.40 SIMD_INT_64.SHUFFLE_MOVE