diff --git a/osaca/__init__.py b/osaca/__init__.py index 11d27f8..b650ceb 100644 --- a/osaca/__init__.py +++ b/osaca/__init__.py @@ -1 +1 @@ -__version__ = '0.1' +__version__ = '0.2' diff --git a/osaca/data/skl_data.csv b/osaca/data/skl_data.csv index e2753d2..124e188 100644 --- a/osaca/data/skl_data.csv +++ b/osaca/data/skl_data.csv @@ -1,66 +1,66 @@ instr,TP,LT,ports -jae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -ja-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jcxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jecxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -je-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jg-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jmp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jna-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jne-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jng-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jnz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jpe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jpo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" -jz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0.0, 0, 0)" +jae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +ja-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jcxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jecxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +je-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jg-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jmp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jna-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jne-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jng-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jnz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jpe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jpo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" +jz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0.0, 0, 0)" addl-r32_mem,0.5,1.0,"(-1,)" -add-mem_imd,1.0,5.0,"(0.5,0.5,0.6666666666666666,0.6666666666666666,1.0,0.5,0.5,0.6666666666666666)" +add-mem_imd,1.0,5.0,"(0.5, 0,0.5,0.6666666666666666,0.6666666666666666,1.0,0.5,0.5,0.6666666666666666)" addpd-xmm_mem,1.0,4.0,"(-1,)" -addq-r64_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" +addq-r64_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" addq-r64_mem,0.5,1.0,"(-1,)" -add-r32_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -addl-r32_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" +add-r32_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +addl-r32_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" add-r32_r32,0.25,-1.0,"(-1,)" -add-r64_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -add-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" +add-r64_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +add-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" addsd-xmm_mem,1.0,4.0,"(-1,)" addsd-xmm_xmm,1.0,4.0,"(-1,)" -and-r32_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -cmpl-r32_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -cmpq-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -cmp-r32_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -cmp-r32_mem,0.5,-1.0,"(0.25, 0.25, 0.5, 0.5, 0, 0.25, 0.25, 0)" -cmpl-mem_r32,0.5,-1.0,"(0.25, 0.25, 0.5, 0.5, 0, 0.25, 0.25, 0)" +and-r32_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +cmpl-r32_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +cmpq-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +cmp-r32_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +cmp-r32_mem,0.5,-1.0,"(0.25, 0, 0.25, 0.5, 0.5, 0, 0.25, 0.25, 0)" +cmpl-mem_r32,0.5,-1.0,"(0.25, 0, 0.25, 0.5, 0.5, 0, 0.25, 0.25, 0)" cmp-r32_r32,0.25,1.0,"(-1,)" cmp-r64_imd,0.25,1.0,"(-1,)" -cmp-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" +cmp-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" imulq-r64_r64_imd,1.0,3.0,"(-1,)" imul-r64_r64,1.0,3.0,"(-1,)" -incq-r64,0.25,1.0,"(0.25, 0.25, 0, 0, 0, 0.25, 0.25, 0)" -incl-r32,0.25,1.0,"(0.25, 0.25, 0, 0, 0, 0.25, 0.25, 0)" +incq-r64,0.25,1.0,"(0.25, 0, 0.25, 0, 0, 0, 0.25, 0.25, 0)" +incl-r32,0.25,1.0,"(0.25, 0, 0.25, 0, 0, 0, 0.25, 0.25, 0)" inc-r64,0.25,1.0,"(-1,)" lea-r32_mem,1.0,-1.0,"(-1,)" lea-r64_mem-,1.0,-1.0,"(-1,)" @@ -68,21 +68,21 @@ lea-r64_mem,1.0,1.0,"(-1,)" movl-mem_r32,1.0,2.0,"(-1,)" movl-r32_imd,0.25,1.0,"(-1,)" movl-r32_mem,0.5,2.0,"(-1,)" -mov-mem_imd,1.0,2.0,"(0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)" -mov-mem_r32,1.0,2.0,"(0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)" -mov-mem_r64,1.0,2.0,"(0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)" +mov-mem_imd,1.0,2.0,"(0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)" +mov-mem_r32,1.0,2.0,"(0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)" +mov-mem_r64,1.0,2.0,"(0, 0,0,0.3333333333333333,0.3333333333333333,1.0,0,0,0.3333333333333333)" movq_r64_xmm,1.0,-1.0,"(-1,)" mov-r32_imd,0.25,1.0,"(-1,)" -mov-r32_mem,0.5,2.0,"(0,0,0.5,0.5,0,0,0,0)" -mov-r32_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -mov-r64_imd,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" +mov-r32_mem,0.5,2.0,"(0, 0,0,0.5,0.5,0,0,0,0)" +mov-r32_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +mov-r64_imd,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" mov_r64_mem,0.5,-1.0,"(-1,)" -mov-r64_mem,0.5,2.0,"(0,0,0.5,0.5,0,0,0,0)" -mov-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" +mov-r64_mem,0.5,2.0,"(0, 0,0,0.5,0.5,0,0,0,0)" +mov-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" movsd-mem_xmm,1.0,3.0,"(-1,)" movsd-xmm_mem,0.5,3.0,"(-1,)" -movslq-r64_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -movsx-r64_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" +movslq-r64_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +movsx-r64_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" movups-mem_xmm,1.0,3.0,"(-1,)" movups-xmm_mem,0.5,2.0,"(-1,)" mulpd-xmm_mem,1.0,4.0,"(-1,)" @@ -90,40 +90,38 @@ mulsd-xmm_mem,1.0,4.0,"(-1,)" mulsd-xmm_xmm,1.0,4.0,"(-1,)" prefetcht0-mem,0.5,-1.0,"(-1,)" prefetchw-mem,1.0,-1.0,"(-1,)" -shl-r64_imd,0.5,1.0,"(0.5,0,0,0,0,0,0.5,0)" +shl-r64_imd,0.5,1.0,"(0.5, 0,0,0,0,0,0,0.5,0)" sub-r32_imd,0.25,1.0,"(-1,)" -sub-r64_r64,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" +sub-r64_r64,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" vaddpd-ymm_ymm_mem,0.5,4.0,"(-1,)" vaddps-ymm_ymm_ymm,0.5,4.0,"(-1,)" -vaddsd-xmm_xmm_mem,0.5,4.0,"(0.5,0.5,0.5,0.5,0,0,0,0)" -vaddsd-xmm_xmm_xmm,0.5,4.0,"(0.5,0.5,0,0,0,0,0,0)" +vaddsd-xmm_xmm_mem,0.5,4.0,"(0.5, 0,0.5,0.5,0.5,0,0,0,0)" +vaddsd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0,0.5,0,0,0,0,0,0)" vaddss-xmm_xmm_xmm,0.5,4.0,"(-1,)" -vcvtsi2ss-xmm_xmm_r32,1.0,-1.0,"(0.5,0.5,0,0,0,1.0,0,0)" +vcvtsi2ss-xmm_xmm_r32,1.0,-1.0,"(0.5, 0,0.5,0,0,0,1.0,0,0)" vcvtss2si-r32_xmm,1.0,-1.0,"(-1,)" -vdivpd-ymm_ymm_ymm,8.0,14.0,"(8.0, 0, 0, 0, 0, 0, 0, 0)" -vdivps-ymm_ymm_ymm,5.0,11.0,"(5.0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd132pd-xmm_xmm_mem,0.5,4.0,"(0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0)" -vfmadd132sd-xmm_xmm_mem,0.5,4.0,"(0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0)" -vfmadd213pd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vfmadd213pd-ymm_ymm_mem,0.5,4.0,"(0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0)" -vfmadd132pd-ymm_ymm_mem,0.5,4.0,"(0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0)" -vfmadd213pd-ymm_ymm_ymm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vfmadd213ps-xmm_xmm_xmm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" +vfmadd132pd-xmm_xmm_mem,0.5,4.0,"(0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)" +vfmadd132sd-xmm_xmm_mem,0.5,4.0,"(0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)" +vfmadd213pd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vfmadd213pd-ymm_ymm_mem,0.5,4.0,"(0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)" +vfmadd132pd-ymm_ymm_mem,0.5,4.0,"(0.5, 0, 0.5, 0.5, 0.5, 0, 0, 0, 0)" +vfmadd213pd-ymm_ymm_ymm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vfmadd213ps-xmm_xmm_xmm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" vfmadd213sd,0.5,5.0,"(-1,)" vfmadd213ss,0.5,5.0,"(-1,)" vinsertf128-ymm_ymm_imd,1.0,3.0,"(-1,)" -vmovapd-mem_ymm,1.0,-1.0,"(0, 0, 0, 0, 1.0, 0, 0, 0)" -vmovapd-ymm_mem,0.5,-1.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0)" -vmovaps-mem_xmm,1.0,3.0,"(0, 0, 0.5, 0.5, 1.0, 0, 0, 0)" -vmovaps-xmm_mem,0.5,2.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0)" -vmovsd-mem_xmm,1.0,3.0,"(0,0,0.5,0.5,1.0,0,0,0)" -vmovsd-xmm_mem,0.5,3.0,"(0,0,0.5,0.5,0,0,0,0)" -vmovupd-mem_ymm,1.0,3.0,"(0,0,0.5,0.5,1.0,0,0,0)" +vmovapd-mem_ymm,1.0,-1.0,"(0, 0, 0, 0, 0, 1.0, 0, 0, 0)" +vmovapd-ymm_mem,0.5,-1.0,"(0, 0, 0, 0.5, 0.5, 0, 0, 0, 0)" +vmovaps-mem_xmm,1.0,3.0,"(0, 0, 0, 0.5, 0.5, 1.0, 0, 0, 0)" +vmovaps-xmm_mem,0.5,2.0,"(0, 0, 0, 0.5, 0.5, 0, 0, 0, 0)" +vmovsd-mem_xmm,1.0,3.0,"(0, 0,0,0.5,0.5,1.0,0,0,0)" +vmovsd-xmm_mem,0.5,3.0,"(0, 0,0,0.5,0.5,0,0,0,0)" +vmovupd-mem_ymm,1.0,3.0,"(0, 0,0,0.5,0.5,1.0,0,0,0)" vmovupd-ymm_mem,0.5,-1.0,"(-1,)" -vmulpd-ymm_ymm_ymm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vmulps-ymm_ymm_ymm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vmulsd-xmm_xmm_mem,0.5,4.0,"(0.5,0.5,0.5,0.5,0,0,0,0)" -vmulsd-xmm_xmm_xmm,0.5,4.0,"(0.5,0.5,0,0,0,0,0,0)" +vmulpd-ymm_ymm_ymm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vmulps-ymm_ymm_ymm,0.5,4.0,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vmulsd-xmm_xmm_mem,0.5,4.0,"(0.5, 0,0.5,0.5,0.5,0,0,0,0)" +vmulsd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0,0.5,0,0,0,0,0,0)" vmulss-xmm_xmm_xmm,0.5,4.0,"(-1,)" vrcpps-avx,1.0,4.0,"(-1,)" vsqrtpd-avx,12.0,21.0,"(-1,)" @@ -132,22 +130,24 @@ vsubpd-ymm_ymm_mem,0.5,4.0,"(-1,)" vsubsd-xmm_xmm_mem,0.5,4.0,"(-1,)" vsubsd-xmm_xmm_xmm,0.5,4.0,"(-1,)" vsubss-xmm_xmm_xmm,0.5,4.0,"(-1,)" -vxorps-xmm_xmm_xmm,0.3333333333333333,1.0,"(0.33,0.33,0,0,0,0.33,0,0)" -xor-r32_r32,0.25,1.0,"(0.25,0.25,0,0,0,0.25,0.25,0)" -cmpl-r32_imd,0.25,1.0,"(0.25, 0.25,0,0,0,0.25,0.25,0)" -vaddpd-xmm_xmm_xmm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vaddpd-ymm_ymm_ymm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vcvtdq2pd-xmm_xmm,1.0,5,"(1.0, 0, 0, 0, 0, 1.0, 0, 0)" -vcvtdq2pd-ymm_xmm,1.0,7,"(1.0, 0, 0, 0, 0, 1.0, 0, 0)" -vcvtsi2sd-xmm_xmm_r32,1.0,4,"(0.5, 0.5, 0, 0, 0, 1.0, 0, 0)" -vdivpd-xmm_xmm_xmm,4.0,13,"(4.0, 0, 0, 0, 0, 0, 0, 0)" -vdivsd-xmm_xmm_xmm,4.0,13,"(4.0, 0, 0, 0, 0, 0, 0, 0)" -vextracti128-xmm_ymm_imd,1.0,3,"(0, 0, 0, 0, 0, 1.0, 0, 0)" -vfmadd132pd-xmm_xmm_xmm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vfmadd132pd-ymm_ymm_ymm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vfmadd132sd-xmm_xmm_xmm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vmulpd-xmm_xmm_xmm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vpaddd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0.33, 0, 0, 0, 0.33, 0, 0)" -vpaddd-ymm_ymm_ymm,0.3333333333333333,1,"(0.33, 0.33, 0, 0, 0, 0.33, 0, 0)" -vpshufd-xmm_xmm_imd,1.0,1,"(0, 0, 0, 0, 0, 1.0, 0, 0)" -vxorpd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0.33, 0, 0, 0, 0.33, 0, 0)" +vxorps-xmm_xmm_xmm,0.3333333333333333,1.0,"(0.33, 0,0.33,0,0,0,0.33,0,0)" +xor-r32_r32,0.25,1.0,"(0.25, 0,0.25,0,0,0,0.25,0.25,0)" +cmpl-r32_imd,0.25,1.0,"(0.25, 0, 0.25,0,0,0,0.25,0.25,0)" +vaddpd-xmm_xmm_xmm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vaddpd-ymm_ymm_ymm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vcvtdq2pd-xmm_xmm,1.0,5,"(1.0, 0, 0, 0, 0, 0, 1.0, 0, 0)" +vcvtdq2pd-ymm_xmm,1.0,7,"(1.0, 0, 0, 0, 0, 0, 1.0, 0, 0)" +vcvtsi2sd-xmm_xmm_r32,1.0,4,"(0.5, 0, 0.5, 0, 0, 0, 1.0, 0, 0)" +vextracti128-xmm_ymm_imd,1.0,3,"(0, 0, 0, 0, 0, 0, 1.0, 0, 0)" +vfmadd132pd-xmm_xmm_xmm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vfmadd132pd-ymm_ymm_ymm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vfmadd132sd-xmm_xmm_xmm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vmulpd-xmm_xmm_xmm,0.5,4,"(0.5, 0, 0.5, 0, 0, 0, 0, 0, 0)" +vpaddd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0, 0.33, 0, 0, 0, 0.33, 0, 0)" +vpaddd-ymm_ymm_ymm,0.3333333333333333,1,"(0.33, 0, 0.33, 0, 0, 0, 0.33, 0, 0)" +vpshufd-xmm_xmm_imd,1.0,1,"(0, 0, 0, 0, 0, 0, 1.0, 0, 0)" +vxorpd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0, 0.33, 0, 0, 0, 0.33, 0, 0)" +vdivpd-ymm_ymm_ymm,8.0,14.0,"(1.0, 8.0, 0, 0, 0, 0, 0, 0, 0)" +vdivps-ymm_ymm_ymm,5.0,11.0,"(1.0, 5.0, 0, 0, 0, 0, 0, 0, 0)" +vdivpd-xmm_xmm_xmm,4.0,13,"(1.0, 4.0, 0, 0, 0, 0, 0, 0, 0)" +vdivsd-xmm_xmm_xmm,4.0,13,"(1.0, 4.0, 0, 0, 0, 0, 0, 0, 0)" diff --git a/osaca/data/zen_data.csv b/osaca/data/zen_data.csv index 5325c10..73683f7 100644 --- a/osaca/data/zen_data.csv +++ b/osaca/data/zen_data.csv @@ -1,123 +1,123 @@ instr,TP,LT,ports -jae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -ja-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jcxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jecxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -je-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jg-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jmp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jmpq-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jna-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jne-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jng-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jnz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jpe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jpo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -jz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" -add-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -add-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -addl-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -addq-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -addl-mem_imd,1.0,7.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" -addq-mem_imd,1.0,7.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" -add-mem_r32,1.0,7.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" -add-mem_r64,1.0,7.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" -addl-mem_r32,1.0,7.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" -addq-mem_r64,1.0,7.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" -cmp-mem_r32,0.5,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)" -cmpl-mem_r32,0.5,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)" -cmp-r32_mem,0.5,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)" -cmpl-r32_mem,0.5,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)" -cmp-r32_r32,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -cmpl-r32_r32,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -cmp-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -cmp-r64_r64,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -cmpq-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -cmpq-r64_r64,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -inc-r64,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -incq-r64,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -incl-r32,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -mov-mem_r64,1.0,4.0,"(0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" -mov-r64_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -mov-r32_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -movq-mem_r64,1.0,4.0,"(0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" -movq-r64_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -movl-r32_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -movslq-r64_r32,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -sub-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -vaddpd-ymm_ymm_mem,1.0,3.0,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0.5, 0.5)" -vaddsd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0.5, 0.5)" -vaddsd-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vaddss-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vcvtsi2ss-xmm_xmm_r32,1.0,4.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0)" -vcvtss2si-r32_xmm,1.0,7.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0)" -cvtsi2ss-xmm_r32,1.0,8.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0)" -vfmadd213pd-ymm_ymm_ymm,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd213pd-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd213ps-ymm_ymm_ymm,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd213ps-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd213sd-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd213ss-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd132sd-xmm_xmm_mem,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -vfmadd132pd-xmm_xmm_mem,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -vfmadd132pd-ymm_ymm_mem,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" +jae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +ja-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jcxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jecxz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +je-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jg-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jmp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jmpq-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnae-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jna-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnbe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnb-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnc-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jne-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnge-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jng-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnle-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnl-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jno-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jns-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jnz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jpe-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jp-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jpo-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +js-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +jz-lbl,0.0,0.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +add-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +add-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +addl-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +addq-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +addl-mem_imd,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" +addq-mem_imd,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" +add-mem_r32,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" +add-mem_r64,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" +addl-mem_r32,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" +addq-mem_r64,1.0,7.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0)" +cmp-mem_r32,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)" +cmpl-mem_r32,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)" +cmp-r32_mem,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)" +cmpl-r32_mem,0.5,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5)" +cmp-r32_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +cmpl-r32_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +cmp-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +cmp-r64_r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +cmpq-r64_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +cmpq-r64_r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +inc-r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +incq-r64,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +incl-r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +mov-mem_r64,1.0,4.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" +mov-r64_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +mov-r32_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +movq-mem_r64,1.0,4.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" +movq-r64_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +movl-r32_mem,0.5,3.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +movslq-r64_r32,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +sub-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +vaddpd-ymm_ymm_mem,1.0,3.0,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0.5, 0.5)" +vaddsd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)" +vaddsd-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)" +vaddss-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)" +vcvtsi2ss-xmm_xmm_r32,1.0,4.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)" +vcvtss2si-r32_xmm,1.0,7.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)" +cvtsi2ss-xmm_r32,1.0,8.0,"(1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd213pd-ymm_ymm_ymm,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd213pd-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd213ps-ymm_ymm_ymm,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd213ps-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd213sd-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd213ss-xmm_xmm_xmm,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd132sd-xmm_xmm_mem,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +vfmadd132pd-xmm_xmm_mem,0.5,5.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +vfmadd132pd-ymm_ymm_mem,1.0,5.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" vinsertf128-ymm_ymm_imd,0.6666666666666667,1.0,"(-1,)" -vmovsd-mem_xmm,1.0,8.0,"(0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" -vmovsd-xmm_mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -vmulpd-ymm_ymm_ymm,1.0,4.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0)" -vmulsd-xmm_xmm_mem,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -vmulsd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vmulss-xmm_xmm_xmm,0.5,3.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vsubpd-ymm_ymm_mem,1.0,3.0,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 1.0, 1.0)" -vsubsd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0.5, 0.5)" -vsubsd-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vsubss-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0)" -vmovaps-xmm_mem,0.5,3.0,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0.5, 0.5)" -vmovaps-mem_xmm,1.0,5.0,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 1.0, 1.0)" -vmovapd-ymm_mem,1.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" -vmovapd-mem_ymm,2.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 2.0, 2.0)" -movq_r64_xmm,1.0,-1.0,"(0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0)" -#prefetcht0-mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -#prefetchw-mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" -cmpl-r32_imd,0.25,1.0,"(0, 0 ,0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" -vaddpd-xmm_xmm_xmm,0.5,3,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)" -vaddpd-ymm_ymm_ymm,1,3,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0)" -vcvtdq2pd-xmm_xmm,1,7,"(0.5, 0.5, 0, 1.0, 0, 0, 0, 0, 0, 0)" -vcvtdq2pd-ymm_xmm,2,7,"(1.0, 1.0, 0, 2.0, 0, 0, 0, 0, 0, 0)" -vcvtsi2sd-xmm_xmm_r32,1,4,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0)" -vdivpd-xmm_xmm_xmm,4,8,"(0, 0, 0, 4.0, 0, 0, 0, 0, 0, 0)" -vdivsd-xmm_xmm_xmm,4,8,"(0, 0, 0, 4.0, 0, 0, 0, 0, 0, 0)" -vextracti128-xmm_ymm_imd,0.3333333333333333,1,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0)" -vfmadd132pd-xmm_xmm_xmm,0.5,5,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd132pd-ymm_ymm_ymm,1,5,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0)" -vfmadd132sd-xmm_xmm_xmm,0.5,5,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vmulpd-xmm_xmm_xmm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" -vpaddd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0)" -vpaddd-ymm_ymm_ymm,0.6666666666666667,1,"(0.66, 0.66, 0, 0.66, 0, 0, 0, 0, 0, 0)" -vpshufd-xmm_xmm_imd,0.5,1,"(0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)" -vxorpd-xmm_xmm_xmm,0.25,1,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0)" -vxorps-xmm_xmm_xmm,0.25,1,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0)" +vmovsd-mem_xmm,1.0,8.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" +vmovsd-xmm_mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +vmulpd-ymm_ymm_ymm,1.0,4.0,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vmulsd-xmm_xmm_mem,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +vmulsd-xmm_xmm_xmm,0.5,4.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vmulss-xmm_xmm_xmm,0.5,3.0,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vsubpd-ymm_ymm_mem,1.0,3.0,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 1.0, 1.0)" +vsubsd-xmm_xmm_mem,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0.5, 0.5)" +vsubsd-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)" +vsubss-xmm_xmm_xmm,0.5,3.0,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0)" +vmovaps-xmm_mem,0.5,3.0,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0.5, 0.5)" +vmovaps-mem_xmm,1.0,5.0,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 1.0, 1.0)" +vmovapd-ymm_mem,1.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0, 1.0)" +vmovapd-mem_ymm,2.0,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 2.0, 2.0)" +movq_r64_xmm,1.0,-1.0,"(0, 0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0)" +#prefetcht0-mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +#prefetchw-mem,0.5,-1.0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5)" +cmpl-r32_imd,0.25,1.0,"(0, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0.25, 0, 0)" +vaddpd-xmm_xmm_xmm,0.5,3,"(0, 0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" +vaddpd-ymm_ymm_ymm,1,3,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)" +vcvtdq2pd-xmm_xmm,1,7,"(0.5, 0.5, 0, 1.0, 0, 0, 0, 0, 0, 0, 0)" +vcvtdq2pd-ymm_xmm,2,7,"(1.0, 1.0, 0, 2.0, 0, 0, 0, 0, 0, 0, 0)" +vcvtsi2sd-xmm_xmm_r32,1,4,"(0, 0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0)" +vextracti128-xmm_ymm_imd,0.3333333333333333,1,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)" +vfmadd132pd-xmm_xmm_xmm,0.5,5,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd132pd-ymm_ymm_ymm,1,5,"(1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vfmadd132sd-xmm_xmm_xmm,0.5,5,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vmulpd-xmm_xmm_xmm,0.5,4,"(0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0)" +vpaddd-xmm_xmm_xmm,0.3333333333333333,1,"(0.33, 0.33, 0, 0.33, 0, 0, 0, 0, 0, 0, 0)" +vpaddd-ymm_ymm_ymm,0.6666666666666667,1,"(0.66, 0.66, 0, 0.66, 0, 0, 0, 0, 0, 0, 0)" +vpshufd-xmm_xmm_imd,0.5,1,"(0, 0.5, 0.5, 0, 0, 0, 0, 0, 0, 0, 0)" +vxorpd-xmm_xmm_xmm,0.25,1,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0)" +vxorps-xmm_xmm_xmm,0.25,1,"(0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0, 0, 0)" +vdivpd-xmm_xmm_xmm,4,8,"(0, 0, 0, 1.0, 4.0, 0, 0, 0, 0, 0, 0)" +vdivsd-xmm_xmm_xmm,4,8,"(0, 0, 0, 1.0, 4.0, 0, 0, 0, 0, 0, 0)" diff --git a/osaca/eu_sched.py b/osaca/eu_sched.py index 34710f3..05ae5f5 100755 --- a/osaca/eu_sched.py +++ b/osaca/eu_sched.py @@ -13,6 +13,7 @@ from osaca.param import Register, MemAddr class Scheduler(object): arch_dict = {'SNB': 6, 'IVB': 6, 'HSW': 8, 'BDW': 8, 'SKL': 8, 'ZEN': 10} + dv_port_dict = {'SKL': 0, 'ZEN': 3} ports = None # type: int instrList = None # type: list>, # content of most inner list in instrList: instr, operand(s), instr form @@ -21,6 +22,7 @@ class Scheduler(object): ld_ports = None # type: list # enable flag for parallel ld/st en_par_ldst = False # type: boolean + dv_port = -1 # type: int def __init__(self, arch, instruction_list): @@ -34,6 +36,13 @@ class Scheduler(object): if(arch == 'ZEN'): self.en_par_ldst = True self.ld_ports = [8, 9] + # check for DV port + try: + self.dv_port = self.dv_port_dict[arch] + except KeyError: + # no DV port available (yet, new feature in OSACA v0.2) + # do nothing + pass self.instrList = instruction_list #curr_dir = os.path.realpath(__file__)[:-11] osaca_dir = os.path.expanduser('~/.osaca/') @@ -51,8 +60,12 @@ class Scheduler(object): """ sched = self.get_head() # Initialize ports - occ_ports = [[0] * self.ports for x in range(len(self.instrList))] - port_bndgs = [0] * self.ports + # Add DV port, if it is existing + tmp_port = 0 + if(self.dv_port != -1): + tmp_port = 1 + occ_ports = [[0] * (self.ports + tmp_port) for x in range(len(self.instrList))] + port_bndgs = [0] * (self.ports + tmp_port) # Store instruction counter for parallel ld/st par_ldst = 0 # Count the number of store instr if we schedule for an architecture with par ld/st @@ -307,12 +320,17 @@ class Scheduler(object): str String containing the header """ - horiz_line = '-' * 7 * self.ports + '-\n' + horiz_line = '-' * 7 * self.ports + if(self.dv_port != -1): + horiz_line += '-' * 6 + horiz_line += '-\n' port_anno = (' ' * (math.floor((len(horiz_line) - 24) / 2)) + 'Ports Pressure in cycles' + ' ' * (math.ceil((len(horiz_line) - 24) / 2)) + '\n') port_line = '' for i in range(0, self.ports): port_line += '| {} '.format(i) + if(i == self.dv_port): + port_line = port_line + '- DV ' port_line += '|\n' head = port_anno + port_line + horiz_line return head @@ -335,7 +353,8 @@ class Scheduler(object): """ line = '' r_space = ' ' - for i in occ_ports: + for p_num, i in enumerate(occ_ports): + pipe = '|' if(isinstance(i, str)): cycles = i i = float(i[1:-1]) @@ -343,10 +362,12 @@ class Scheduler(object): else: cycles = ' ' if (i == 0) else '%.2f' % float(i) r_space = ' ' + if(p_num == self.dv_port + 1 and p_num != 0): + pipe = ' ' if(i >= 10): - line += '|' + cycles + r_space + line += pipe + cycles + r_space else: - line += '| ' + cycles + r_space + line += pipe + ' ' + cycles + r_space line += '| ' + instr_name + '\n' return line @@ -368,13 +389,21 @@ class Scheduler(object): header = 'Port Binding in Cycles Per Iteration:\n' horiz_line = '-' * 10 + '-' * total + '\n' port_line = '| Port |' + after_dv = 0 for i in range(0, self.ports): - port_line += ' ' * sp_left[i] + str(i) + ' ' * sp_right[i] + '|' + if(i == self.dv_port): + port_line += ' ' * sp_left[i] + str(i) + ' ' * sp_right[i] + '-' + port_line += ' ' * (sp_left[i+1] - 1) + 'DV' + ' ' * sp_right[i+1] + '|' + after_dv = 1 + else: + port_line += ' ' * sp_left[i + after_dv] + str(i) + ' ' * sp_right[i + after_dv] + port_line += '|' port_line += '\n' cyc_line = '| Cycles |' for i in range(len(port_bndg)): + pipe = '|' if (i != self.dv_port) else ' ' cyc = str(round(port_bndg[i], 2)) - cyc_line += ' {} |'.format(cyc) + cyc_line += ' {} {}'.format(cyc, pipe) cyc_line += '\n' binding = header + horiz_line + port_line + horiz_line + cyc_line + horiz_line return binding @@ -416,26 +445,3 @@ class Scheduler(object): if __name__ == '__main__': print('Nothing to do.') - # data = [ - # ['lea',Register('RAX'),MemAddr('%edx,(%rax,%rax,1)'),'lea 0x1(%rax,%rax,1),%edx'], - # ['vcvtsi2ss',Register('XMM0'),Register('XMM0'),Register('RAX'),'vcvtsi2ss %edx,%xmm2,%xmm2'], - # ['vmulss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vmulss %xmm2,%xmm0,%xmm3'], - # ['lea',Register('RAX'),MemAddr('%edx,(%rax,%rax,1)'),'lea 0x2(%rax,%rax,1),%ecx'], - # ['vaddss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vaddss %xmm3,%xmm1,%xmm4'], - # ['vxorps',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vxorps %xmm1, %xmm1,%xmm1'], - # ['vcvtsi2ss',Register('XMM0'),Register('XMM0'),Register('RAX'),'vcvtsi2ss %ecx,%xmm1,%xmm1'], - # ['vmulss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vmulss %xmm1,%xmm0,%xmm5'], - # ['vmovss',MemAddr('%edx,(%rax,%rax,1)'),Register('XMM0'),'vmovss %xmm4,0x4(%rsp,%rax,8)'], - # ['vaddss',Register('XMM0'),Register('XMM0'),Register('XMM0'),'vaddss %xmm5,%xmm4,%xmm1'], - # ['vmovss',MemAddr('%edx,(%rax,%rax,1)'),Register('XMM0'),'vmovss %xmm1,0x8(%rsp,%rax,8)'], - # ['inc',Register('RAX'),'inc %rax'], - # ['cmp',Register('RAX'),Parameter('IMD'),'cmp $0x1f3,%rax'], - # ['jb',Parameter('LBL'),'jb 400bc2 '] - # ] - - # sched = Scheduler('ivb', data) - # output,binding = sched.schedule() - # print(sched.get_port_binding(binding)) - # print(sched.get_report_info(),end='') - # print(output) - # print('Block Throughput: {}'.format(round(max(binding),2)))