From ca22c02691f26908c99aa4962b1fc9d3779cee27 Mon Sep 17 00:00:00 2001 From: Jan Laukemann Date: Sat, 23 Sep 2017 17:52:10 +0200 Subject: [PATCH] updated testcases --- testcases/TaxCalc/add-r32_mem-TP.S | 134 ------------ testcases/TaxCalc/add-r32_mem.S | 134 ------------ testcases/TaxCalc/add-r64_r64-TP.S | 143 ------------- testcases/TaxCalc/add-r64_r64.S | 143 ------------- testcases/TaxCalc/cmp-r32_mem-TP.S | 134 ------------ testcases/TaxCalc/cmp-r32_mem.S | 134 ------------ testcases/TaxCalc/cmp-r32_r32-TP.S | 143 ------------- testcases/TaxCalc/cmp-r32_r32.S | 143 ------------- testcases/TaxCalc/cmp-r64_imd-TP.S | 170 --------------- testcases/TaxCalc/cmp-r64_imd.S | 170 --------------- testcases/TaxCalc/dec-r32-TP.S | 143 ------------- testcases/TaxCalc/dec-r32.S | 143 ------------- testcases/TaxCalc/inc-r32-TP.S | 143 ------------- testcases/TaxCalc/inc-r32.S | 143 ------------- testcases/TaxCalc/lea-r32_mem-TP.S | 134 ------------ testcases/TaxCalc/lea-r32_mem.S | 134 ------------ testcases/TaxCalc/lea-r64_mem-TP.S | 134 ------------ testcases/TaxCalc/lea-r64_mem.S | 134 ------------ testcases/TaxCalc/mov-mem_r32-TP.S | 143 ------------- testcases/TaxCalc/mov-mem_r32.S | 143 ------------- testcases/TaxCalc/mov-mem_r64-TP.S | 143 ------------- testcases/TaxCalc/mov-mem_r64.S | 143 ------------- testcases/TaxCalc/mov-r32_mem-TP.S | 134 ------------ testcases/TaxCalc/mov-r32_mem.S | 134 ------------ testcases/TaxCalc/mov-r32_r32-TP.S | 207 ------------------- testcases/TaxCalc/mov-r32_r32.S | 207 ------------------- testcases/TaxCalc/mov-r64_mem-TP.S | 134 ------------ testcases/TaxCalc/mov-r64_mem.S | 134 ------------ testcases/TaxCalc/movslq-r64_mem-TP.S | 134 ------------ testcases/TaxCalc/movslq-r64_mem.S | 134 ------------ testcases/TaxCalc/movslq-r64_r32-TP.S | 143 ------------- testcases/TaxCalc/movslq-r64_r32.S | 143 ------------- testcases/TaxCalc/movzbl-r32_r8-TP.S | 207 ------------------- testcases/TaxCalc/movzbl-r32_r8.S | 207 ------------------- testcases/TaxCalc/neg-r32-TP.S | 143 ------------- testcases/TaxCalc/neg-r32.S | 143 ------------- testcases/TaxCalc/sub-r32_r32-TP.S | 143 ------------- testcases/TaxCalc/sub-r32_r32.S | 143 ------------- testcases/TaxCalc/test-r32_r32-TP.S | 143 ------------- testcases/TaxCalc/test-r32_r32.S | 143 ------------- testcases/TaxCalc/vaddpd-xmm_xmm_xmm-TP.S | 108 ---------- testcases/TaxCalc/vaddpd-xmm_xmm_xmm.S | 108 ---------- testcases/TaxCalc/vaddpd-ymm_ymm_ymm-TP.S | 110 ---------- testcases/TaxCalc/vaddpd-ymm_ymm_ymm.S | 110 ---------- testcases/TaxCalc/vaddsd-xmm_xmm_mem-TP.S | 108 ---------- testcases/TaxCalc/vaddsd-xmm_xmm_mem.S | 108 ---------- testcases/TaxCalc/vaddsd-xmm_xmm_xmm-TP.S | 108 ---------- testcases/TaxCalc/vaddsd-xmm_xmm_xmm.S | 108 ---------- testcases/TaxCalc/vmovapd-xmm_xmm-TP.S | 172 --------------- testcases/TaxCalc/vmovapd-xmm_xmm.S | 172 --------------- testcases/TaxCalc/vmovapd-ymm_ymm-TP.S | 174 ---------------- testcases/TaxCalc/vmovapd-ymm_ymm.S | 174 ---------------- testcases/TaxCalc/vmovaps-xmm_xmm-TP.S | 172 --------------- testcases/TaxCalc/vmovaps-xmm_xmm.S | 172 --------------- testcases/TaxCalc/vmovhpd-xmm_xmm_mem-TP.S | 108 ---------- testcases/TaxCalc/vmovhpd-xmm_xmm_mem.S | 108 ---------- testcases/TaxCalc/vmovq-r64_xmm-TP.S | 141 ------------- testcases/TaxCalc/vmovq-r64_xmm.S | 141 ------------- testcases/TaxCalc/vmovq-xmm_r64-TP.S | 143 ------------- testcases/TaxCalc/vmovq-xmm_r64.S | 143 ------------- testcases/TaxCalc/vmovsd-mem_xmm-TP.S | 108 ---------- testcases/TaxCalc/vmovsd-mem_xmm.S | 108 ---------- testcases/TaxCalc/vmovsd-xmm_mem-TP.S | 101 --------- testcases/TaxCalc/vmovsd-xmm_mem.S | 101 --------- testcases/TaxCalc/vmovsd-xmm_xmm_xmm-TP.S | 108 ---------- testcases/TaxCalc/vmovsd-xmm_xmm_xmm.S | 108 ---------- testcases/TaxCalc/vmovupd-xmm_mem-TP.S | 101 --------- testcases/TaxCalc/vmovupd-xmm_mem.S | 101 --------- testcases/TaxCalc/vmulpd-ymm_ymm_mem-TP.S | 110 ---------- testcases/TaxCalc/vmulpd-ymm_ymm_mem.S | 110 ---------- testcases/TaxCalc/vmulpd-ymm_ymm_ymm-TP.S | 110 ---------- testcases/TaxCalc/vmulpd-ymm_ymm_ymm.S | 110 ---------- testcases/TaxCalc/vmulsd-xmm_xmm_mem-TP.S | 108 ---------- testcases/TaxCalc/vmulsd-xmm_xmm_mem.S | 108 ---------- testcases/TaxCalc/vmulsd-xmm_xmm_xmm-TP.S | 108 ---------- testcases/TaxCalc/vmulsd-xmm_xmm_xmm.S | 108 ---------- testcases/TaxCalc/vsubpd-ymm_ymm_ymm-TP.S | 110 ---------- testcases/TaxCalc/vsubpd-ymm_ymm_ymm.S | 110 ---------- testcases/TaxCalc/vsubsd-xmm_xmm_xmm-TP.S | 108 ---------- testcases/TaxCalc/vsubsd-xmm_xmm_xmm.S | 108 ---------- testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm-TP.S | 108 ---------- testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm.S | 108 ---------- testcases/TaxCalc/vxorpd-xmm_xmm_xmm-TP.S | 172 --------------- testcases/TaxCalc/vxorpd-xmm_xmm_xmm.S | 172 --------------- testcases/TaxCalc/vxorpd-ymm_ymm_ymm-TP.S | 110 ---------- testcases/TaxCalc/vxorpd-ymm_ymm_ymm.S | 110 ---------- testcases/TaxCalc/vxorps-xmm_xmm_xmm-TP.S | 108 ---------- testcases/TaxCalc/vxorps-xmm_xmm_xmm.S | 108 ---------- testcases/TaxCalc/xor-r32_r32-TP.S | 143 ------------- testcases/TaxCalc/xor-r32_r32.S | 143 ------------- 90 files changed, 12018 deletions(-) delete mode 100644 testcases/TaxCalc/add-r32_mem-TP.S delete mode 100644 testcases/TaxCalc/add-r32_mem.S delete mode 100644 testcases/TaxCalc/add-r64_r64-TP.S delete mode 100644 testcases/TaxCalc/add-r64_r64.S delete mode 100644 testcases/TaxCalc/cmp-r32_mem-TP.S delete mode 100644 testcases/TaxCalc/cmp-r32_mem.S delete mode 100644 testcases/TaxCalc/cmp-r32_r32-TP.S delete mode 100644 testcases/TaxCalc/cmp-r32_r32.S delete mode 100644 testcases/TaxCalc/cmp-r64_imd-TP.S delete mode 100644 testcases/TaxCalc/cmp-r64_imd.S delete mode 100644 testcases/TaxCalc/dec-r32-TP.S delete mode 100644 testcases/TaxCalc/dec-r32.S delete mode 100644 testcases/TaxCalc/inc-r32-TP.S delete mode 100644 testcases/TaxCalc/inc-r32.S delete mode 100644 testcases/TaxCalc/lea-r32_mem-TP.S delete mode 100644 testcases/TaxCalc/lea-r32_mem.S delete mode 100644 testcases/TaxCalc/lea-r64_mem-TP.S delete mode 100644 testcases/TaxCalc/lea-r64_mem.S delete mode 100644 testcases/TaxCalc/mov-mem_r32-TP.S delete mode 100644 testcases/TaxCalc/mov-mem_r32.S delete mode 100644 testcases/TaxCalc/mov-mem_r64-TP.S delete mode 100644 testcases/TaxCalc/mov-mem_r64.S delete mode 100644 testcases/TaxCalc/mov-r32_mem-TP.S delete mode 100644 testcases/TaxCalc/mov-r32_mem.S delete mode 100644 testcases/TaxCalc/mov-r32_r32-TP.S delete mode 100644 testcases/TaxCalc/mov-r32_r32.S delete mode 100644 testcases/TaxCalc/mov-r64_mem-TP.S delete mode 100644 testcases/TaxCalc/mov-r64_mem.S delete mode 100644 testcases/TaxCalc/movslq-r64_mem-TP.S delete mode 100644 testcases/TaxCalc/movslq-r64_mem.S delete mode 100644 testcases/TaxCalc/movslq-r64_r32-TP.S delete mode 100644 testcases/TaxCalc/movslq-r64_r32.S delete mode 100644 testcases/TaxCalc/movzbl-r32_r8-TP.S delete mode 100644 testcases/TaxCalc/movzbl-r32_r8.S delete mode 100644 testcases/TaxCalc/neg-r32-TP.S delete mode 100644 testcases/TaxCalc/neg-r32.S delete mode 100644 testcases/TaxCalc/sub-r32_r32-TP.S delete mode 100644 testcases/TaxCalc/sub-r32_r32.S delete mode 100644 testcases/TaxCalc/test-r32_r32-TP.S delete mode 100644 testcases/TaxCalc/test-r32_r32.S delete mode 100644 testcases/TaxCalc/vaddpd-xmm_xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vaddpd-xmm_xmm_xmm.S delete mode 100644 testcases/TaxCalc/vaddpd-ymm_ymm_ymm-TP.S delete mode 100644 testcases/TaxCalc/vaddpd-ymm_ymm_ymm.S delete mode 100644 testcases/TaxCalc/vaddsd-xmm_xmm_mem-TP.S delete mode 100644 testcases/TaxCalc/vaddsd-xmm_xmm_mem.S delete mode 100644 testcases/TaxCalc/vaddsd-xmm_xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vaddsd-xmm_xmm_xmm.S delete mode 100644 testcases/TaxCalc/vmovapd-xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vmovapd-xmm_xmm.S delete mode 100644 testcases/TaxCalc/vmovapd-ymm_ymm-TP.S delete mode 100644 testcases/TaxCalc/vmovapd-ymm_ymm.S delete mode 100644 testcases/TaxCalc/vmovaps-xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vmovaps-xmm_xmm.S delete mode 100644 testcases/TaxCalc/vmovhpd-xmm_xmm_mem-TP.S delete mode 100644 testcases/TaxCalc/vmovhpd-xmm_xmm_mem.S delete mode 100644 testcases/TaxCalc/vmovq-r64_xmm-TP.S delete mode 100644 testcases/TaxCalc/vmovq-r64_xmm.S delete mode 100644 testcases/TaxCalc/vmovq-xmm_r64-TP.S delete mode 100644 testcases/TaxCalc/vmovq-xmm_r64.S delete mode 100644 testcases/TaxCalc/vmovsd-mem_xmm-TP.S delete mode 100644 testcases/TaxCalc/vmovsd-mem_xmm.S delete mode 100644 testcases/TaxCalc/vmovsd-xmm_mem-TP.S delete mode 100644 testcases/TaxCalc/vmovsd-xmm_mem.S delete mode 100644 testcases/TaxCalc/vmovsd-xmm_xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vmovsd-xmm_xmm_xmm.S delete mode 100644 testcases/TaxCalc/vmovupd-xmm_mem-TP.S delete mode 100644 testcases/TaxCalc/vmovupd-xmm_mem.S delete mode 100644 testcases/TaxCalc/vmulpd-ymm_ymm_mem-TP.S delete mode 100644 testcases/TaxCalc/vmulpd-ymm_ymm_mem.S delete mode 100644 testcases/TaxCalc/vmulpd-ymm_ymm_ymm-TP.S delete mode 100644 testcases/TaxCalc/vmulpd-ymm_ymm_ymm.S delete mode 100644 testcases/TaxCalc/vmulsd-xmm_xmm_mem-TP.S delete mode 100644 testcases/TaxCalc/vmulsd-xmm_xmm_mem.S delete mode 100644 testcases/TaxCalc/vmulsd-xmm_xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vmulsd-xmm_xmm_xmm.S delete mode 100644 testcases/TaxCalc/vsubpd-ymm_ymm_ymm-TP.S delete mode 100644 testcases/TaxCalc/vsubpd-ymm_ymm_ymm.S delete mode 100644 testcases/TaxCalc/vsubsd-xmm_xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vsubsd-xmm_xmm_xmm.S delete mode 100644 testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm.S delete mode 100644 testcases/TaxCalc/vxorpd-xmm_xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vxorpd-xmm_xmm_xmm.S delete mode 100644 testcases/TaxCalc/vxorpd-ymm_ymm_ymm-TP.S delete mode 100644 testcases/TaxCalc/vxorpd-ymm_ymm_ymm.S delete mode 100644 testcases/TaxCalc/vxorps-xmm_xmm_xmm-TP.S delete mode 100644 testcases/TaxCalc/vxorps-xmm_xmm_xmm.S delete mode 100644 testcases/TaxCalc/xor-r32_r32-TP.S delete mode 100644 testcases/TaxCalc/xor-r32_r32.S diff --git a/testcases/TaxCalc/add-r32_mem-TP.S b/testcases/TaxCalc/add-r32_mem-TP.S deleted file mode 100644 index 64fc02f..0000000 --- a/testcases/TaxCalc/add-r32_mem-TP.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR add -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/add-r32_mem.S b/testcases/TaxCalc/add-r32_mem.S deleted file mode 100644 index 7c94bcc..0000000 --- a/testcases/TaxCalc/add-r32_mem.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR add -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/add-r64_r64-TP.S b/testcases/TaxCalc/add-r64_r64-TP.S deleted file mode 100644 index d475743..0000000 --- a/testcases/TaxCalc/add-r64_r64-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR add -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR rdx, rax - INSTR r9, rbx - INSTR r10, rcx - INSTR r11, rax - INSTR r12, rbx - INSTR r13, rcx - INSTR r14, rax - INSTR r15, rbx - INSTR rdx, rcx - INSTR r9, rax - INSTR r10, rbx - INSTR r11, rcx - INSTR r12, rax - INSTR r13, rbx - INSTR r14, rcx - INSTR r15, rax - INSTR rdx, rbx - INSTR r9, rcx - INSTR r10, rax - INSTR r11, rbx - INSTR r12, rcx - INSTR r13, rax - INSTR r14, rbx - INSTR r15, rcx - INSTR rdx, rax - INSTR r9, rbx - INSTR r10, rcx - INSTR r11, rax - INSTR r12, rbx - INSTR r13, rcx - INSTR r14, rax - INSTR r15, rbx - INSTR rdx, rcx - INSTR r9, rax - INSTR r10, rbx - INSTR r11, rcx - INSTR r12, rax - INSTR r13, rbx - INSTR r14, rcx - INSTR r15, rax - INSTR rdx, rbx - INSTR r9, rcx - INSTR r10, rax - INSTR r11, rbx - INSTR r12, rcx - INSTR r13, rax - INSTR r14, rbx - INSTR r15, rcx - INSTR rdx, rax - INSTR r9, rbx - INSTR r10, rcx - INSTR r11, rax - INSTR r12, rbx - INSTR r13, rcx - INSTR r14, rax - INSTR r15, rbx - INSTR rdx, rcx - INSTR r9, rax - INSTR r10, rbx - INSTR r11, rcx - INSTR r12, rax - INSTR r13, rbx - INSTR r14, rcx - INSTR r15, rax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/add-r64_r64.S b/testcases/TaxCalc/add-r64_r64.S deleted file mode 100644 index a64dc7c..0000000 --- a/testcases/TaxCalc/add-r64_r64.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR add -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - INSTR rax, rbx - INSTR rbx, rax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/cmp-r32_mem-TP.S b/testcases/TaxCalc/cmp-r32_mem-TP.S deleted file mode 100644 index 88baf8d..0000000 --- a/testcases/TaxCalc/cmp-r32_mem-TP.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR cmp -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/cmp-r32_mem.S b/testcases/TaxCalc/cmp-r32_mem.S deleted file mode 100644 index 12b88d1..0000000 --- a/testcases/TaxCalc/cmp-r32_mem.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR cmp -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/cmp-r32_r32-TP.S b/testcases/TaxCalc/cmp-r32_r32-TP.S deleted file mode 100644 index c359fe8..0000000 --- a/testcases/TaxCalc/cmp-r32_r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR cmp -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/cmp-r32_r32.S b/testcases/TaxCalc/cmp-r32_r32.S deleted file mode 100644 index 99b4b20..0000000 --- a/testcases/TaxCalc/cmp-r32_r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR cmp -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/cmp-r64_imd-TP.S b/testcases/TaxCalc/cmp-r64_imd-TP.S deleted file mode 100644 index a24dfd0..0000000 --- a/testcases/TaxCalc/cmp-r64_imd-TP.S +++ /dev/null @@ -1,170 +0,0 @@ -#define INSTR cmp -#define NINST 100 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR rdx, 1 - INSTR r9, 2 - INSTR r10, 13 - INSTR r11, 1 - INSTR r12, 2 - INSTR r13, 13 - INSTR r14, 1 - INSTR r15, 2 - INSTR rdx, 13 - INSTR r9, 1 - INSTR r10, 2 - INSTR r11, 13 - INSTR r12, 1 - INSTR r13, 2 - INSTR r14, 13 - INSTR r15, 1 - INSTR rdx, 2 - INSTR r9, 13 - INSTR r10, 1 - INSTR r11, 2 - INSTR r12, 13 - INSTR r13, 1 - INSTR r14, 2 - INSTR r15, 13 - INSTR rdx, 1 - INSTR r9, 2 - INSTR r10, 13 - INSTR r11, 1 - INSTR r12, 2 - INSTR r13, 13 - INSTR r14, 1 - INSTR r15, 2 - INSTR rdx, 13 - INSTR r9, 1 - INSTR r10, 2 - INSTR r11, 13 - INSTR r12, 1 - INSTR r13, 2 - INSTR r14, 13 - INSTR r15, 1 - INSTR rdx, 2 - INSTR r9, 13 - INSTR r10, 1 - INSTR r11, 2 - INSTR r12, 13 - INSTR r13, 1 - INSTR r14, 2 - INSTR r15, 13 - INSTR rdx, 1 - INSTR r9, 2 - INSTR r10, 13 - INSTR r11, 1 - INSTR r12, 2 - INSTR r13, 13 - INSTR r14, 1 - INSTR r15, 2 - INSTR rdx, 13 - INSTR r9, 1 - INSTR r10, 2 - INSTR r11, 13 - INSTR r12, 1 - INSTR r13, 2 - INSTR r14, 13 - INSTR r15, 1 - INSTR rdx, 2 - INSTR r9, 13 - INSTR r10, 1 - INSTR r11, 2 - INSTR r12, 13 - INSTR r13, 1 - INSTR r14, 2 - INSTR r15, 13 - INSTR rdx, 1 - INSTR r9, 2 - INSTR r10, 13 - INSTR r11, 1 - INSTR r12, 2 - INSTR r13, 13 - INSTR r14, 1 - INSTR r15, 2 - INSTR rdx, 13 - INSTR r9, 1 - INSTR r10, 2 - INSTR r11, 13 - INSTR r12, 1 - INSTR r13, 2 - INSTR r14, 13 - INSTR r15, 1 - INSTR rdx, 2 - INSTR r9, 13 - INSTR r10, 1 - INSTR r11, 2 - INSTR r12, 13 - INSTR r13, 1 - INSTR r14, 2 - INSTR r15, 13 - INSTR rdx, 1 - INSTR r9, 2 - INSTR r10, 13 - INSTR r11, 1 - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/cmp-r64_imd.S b/testcases/TaxCalc/cmp-r64_imd.S deleted file mode 100644 index 00198e9..0000000 --- a/testcases/TaxCalc/cmp-r64_imd.S +++ /dev/null @@ -1,170 +0,0 @@ -#define INSTR cmp -#define NINST 100 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - INSTR rax, 1 - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/dec-r32-TP.S b/testcases/TaxCalc/dec-r32-TP.S deleted file mode 100644 index f886ad1..0000000 --- a/testcases/TaxCalc/dec-r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR dec -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/dec-r32.S b/testcases/TaxCalc/dec-r32.S deleted file mode 100644 index 7c18fd9..0000000 --- a/testcases/TaxCalc/dec-r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR dec -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/inc-r32-TP.S b/testcases/TaxCalc/inc-r32-TP.S deleted file mode 100644 index 34f98ff..0000000 --- a/testcases/TaxCalc/inc-r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR inc -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/inc-r32.S b/testcases/TaxCalc/inc-r32.S deleted file mode 100644 index 84f2a8c..0000000 --- a/testcases/TaxCalc/inc-r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR inc -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/lea-r32_mem-TP.S b/testcases/TaxCalc/lea-r32_mem-TP.S deleted file mode 100644 index 9ab76b8..0000000 --- a/testcases/TaxCalc/lea-r32_mem-TP.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR lea -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/lea-r32_mem.S b/testcases/TaxCalc/lea-r32_mem.S deleted file mode 100644 index 0516e8d..0000000 --- a/testcases/TaxCalc/lea-r32_mem.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR lea -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/lea-r64_mem-TP.S b/testcases/TaxCalc/lea-r64_mem-TP.S deleted file mode 100644 index e31ca30..0000000 --- a/testcases/TaxCalc/lea-r64_mem-TP.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR lea -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/lea-r64_mem.S b/testcases/TaxCalc/lea-r64_mem.S deleted file mode 100644 index aad963e..0000000 --- a/testcases/TaxCalc/lea-r64_mem.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR lea -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-mem_r32-TP.S b/testcases/TaxCalc/mov-mem_r32-TP.S deleted file mode 100644 index 18142e2..0000000 --- a/testcases/TaxCalc/mov-mem_r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR mov -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - INSTR [rip+PI], ebx - INSTR [rip+PI], ecx - INSTR [rip+PI], eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-mem_r32.S b/testcases/TaxCalc/mov-mem_r32.S deleted file mode 100644 index 427caf4..0000000 --- a/testcases/TaxCalc/mov-mem_r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR mov -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - INSTR [rip+PI], eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-mem_r64-TP.S b/testcases/TaxCalc/mov-mem_r64-TP.S deleted file mode 100644 index b4a7f6a..0000000 --- a/testcases/TaxCalc/mov-mem_r64-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR mov -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - INSTR [rip+PI], rbx - INSTR [rip+PI], rcx - INSTR [rip+PI], rax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-mem_r64.S b/testcases/TaxCalc/mov-mem_r64.S deleted file mode 100644 index c1c6012..0000000 --- a/testcases/TaxCalc/mov-mem_r64.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR mov -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - INSTR [rip+PI], rax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-r32_mem-TP.S b/testcases/TaxCalc/mov-r32_mem-TP.S deleted file mode 100644 index 69c76ec..0000000 --- a/testcases/TaxCalc/mov-r32_mem-TP.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR mov -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - INSTR edx, [rip+PI] - INSTR r9d, [rip+PI] - INSTR r10d, [rip+PI] - INSTR r11d, [rip+PI] - INSTR r12d, [rip+PI] - INSTR r13d, [rip+PI] - INSTR r14d, [rip+PI] - INSTR r15d, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-r32_mem.S b/testcases/TaxCalc/mov-r32_mem.S deleted file mode 100644 index e4e7313..0000000 --- a/testcases/TaxCalc/mov-r32_mem.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR mov -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - INSTR eax, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-r32_r32-TP.S b/testcases/TaxCalc/mov-r32_r32-TP.S deleted file mode 100644 index ce489c3..0000000 --- a/testcases/TaxCalc/mov-r32_r32-TP.S +++ /dev/null @@ -1,207 +0,0 @@ -#define INSTR mov -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-r32_r32.S b/testcases/TaxCalc/mov-r32_r32.S deleted file mode 100644 index 71e767e..0000000 --- a/testcases/TaxCalc/mov-r32_r32.S +++ /dev/null @@ -1,207 +0,0 @@ -#define INSTR mov -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-r64_mem-TP.S b/testcases/TaxCalc/mov-r64_mem-TP.S deleted file mode 100644 index 97984a3..0000000 --- a/testcases/TaxCalc/mov-r64_mem-TP.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR mov -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/mov-r64_mem.S b/testcases/TaxCalc/mov-r64_mem.S deleted file mode 100644 index 7095f31..0000000 --- a/testcases/TaxCalc/mov-r64_mem.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR mov -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/movslq-r64_mem-TP.S b/testcases/TaxCalc/movslq-r64_mem-TP.S deleted file mode 100644 index e4ba19f..0000000 --- a/testcases/TaxCalc/movslq-r64_mem-TP.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR movslq -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - INSTR rdx, [rip+PI] - INSTR r9, [rip+PI] - INSTR r10, [rip+PI] - INSTR r11, [rip+PI] - INSTR r12, [rip+PI] - INSTR r13, [rip+PI] - INSTR r14, [rip+PI] - INSTR r15, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/movslq-r64_mem.S b/testcases/TaxCalc/movslq-r64_mem.S deleted file mode 100644 index 50c48ed..0000000 --- a/testcases/TaxCalc/movslq-r64_mem.S +++ /dev/null @@ -1,134 +0,0 @@ -#define INSTR movslq -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 -loop: - inc i - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - INSTR rax, [rip+PI] - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/movslq-r64_r32-TP.S b/testcases/TaxCalc/movslq-r64_r32-TP.S deleted file mode 100644 index 9b12cc4..0000000 --- a/testcases/TaxCalc/movslq-r64_r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR movslq -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR rdx, eax - INSTR r9, ebx - INSTR r10, ecx - INSTR r11, eax - INSTR r12, ebx - INSTR r13, ecx - INSTR r14, eax - INSTR r15, ebx - INSTR rdx, ecx - INSTR r9, eax - INSTR r10, ebx - INSTR r11, ecx - INSTR r12, eax - INSTR r13, ebx - INSTR r14, ecx - INSTR r15, eax - INSTR rdx, ebx - INSTR r9, ecx - INSTR r10, eax - INSTR r11, ebx - INSTR r12, ecx - INSTR r13, eax - INSTR r14, ebx - INSTR r15, ecx - INSTR rdx, eax - INSTR r9, ebx - INSTR r10, ecx - INSTR r11, eax - INSTR r12, ebx - INSTR r13, ecx - INSTR r14, eax - INSTR r15, ebx - INSTR rdx, ecx - INSTR r9, eax - INSTR r10, ebx - INSTR r11, ecx - INSTR r12, eax - INSTR r13, ebx - INSTR r14, ecx - INSTR r15, eax - INSTR rdx, ebx - INSTR r9, ecx - INSTR r10, eax - INSTR r11, ebx - INSTR r12, ecx - INSTR r13, eax - INSTR r14, ebx - INSTR r15, ecx - INSTR rdx, eax - INSTR r9, ebx - INSTR r10, ecx - INSTR r11, eax - INSTR r12, ebx - INSTR r13, ecx - INSTR r14, eax - INSTR r15, ebx - INSTR rdx, ecx - INSTR r9, eax - INSTR r10, ebx - INSTR r11, ecx - INSTR r12, eax - INSTR r13, ebx - INSTR r14, ecx - INSTR r15, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/movslq-r64_r32.S b/testcases/TaxCalc/movslq-r64_r32.S deleted file mode 100644 index bf6f2bd..0000000 --- a/testcases/TaxCalc/movslq-r64_r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR movslq -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - INSTR rax, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/movzbl-r32_r8-TP.S b/testcases/TaxCalc/movzbl-r32_r8-TP.S deleted file mode 100644 index c7de3ab..0000000 --- a/testcases/TaxCalc/movzbl-r32_r8-TP.S +++ /dev/null @@ -1,207 +0,0 @@ -#define INSTR movzbl -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx, al - INSTR r9d, bl - INSTR r10d, cl - INSTR r11d, al - INSTR r12d, bl - INSTR r13d, cl - INSTR r14d, al - INSTR r15d, bl - INSTR edx, cl - INSTR r9d, al - INSTR r10d, bl - INSTR r11d, cl - INSTR r12d, al - INSTR r13d, bl - INSTR r14d, cl - INSTR r15d, al - INSTR edx, bl - INSTR r9d, cl - INSTR r10d, al - INSTR r11d, bl - INSTR r12d, cl - INSTR r13d, al - INSTR r14d, bl - INSTR r15d, cl - INSTR edx, al - INSTR r9d, bl - INSTR r10d, cl - INSTR r11d, al - INSTR r12d, bl - INSTR r13d, cl - INSTR r14d, al - INSTR r15d, bl - INSTR edx, cl - INSTR r9d, al - INSTR r10d, bl - INSTR r11d, cl - INSTR r12d, al - INSTR r13d, bl - INSTR r14d, cl - INSTR r15d, al - INSTR edx, bl - INSTR r9d, cl - INSTR r10d, al - INSTR r11d, bl - INSTR r12d, cl - INSTR r13d, al - INSTR r14d, bl - INSTR r15d, cl - INSTR edx, al - INSTR r9d, bl - INSTR r10d, cl - INSTR r11d, al - INSTR r12d, bl - INSTR r13d, cl - INSTR r14d, al - INSTR r15d, bl - INSTR edx, cl - INSTR r9d, al - INSTR r10d, bl - INSTR r11d, cl - INSTR r12d, al - INSTR r13d, bl - INSTR r14d, cl - INSTR r15d, al - INSTR edx, bl - INSTR r9d, cl - INSTR r10d, al - INSTR r11d, bl - INSTR r12d, cl - INSTR r13d, al - INSTR r14d, bl - INSTR r15d, cl - INSTR edx, al - INSTR r9d, bl - INSTR r10d, cl - INSTR r11d, al - INSTR r12d, bl - INSTR r13d, cl - INSTR r14d, al - INSTR r15d, bl - INSTR edx, cl - INSTR r9d, al - INSTR r10d, bl - INSTR r11d, cl - INSTR r12d, al - INSTR r13d, bl - INSTR r14d, cl - INSTR r15d, al - INSTR edx, bl - INSTR r9d, cl - INSTR r10d, al - INSTR r11d, bl - INSTR r12d, cl - INSTR r13d, al - INSTR r14d, bl - INSTR r15d, cl - INSTR edx, al - INSTR r9d, bl - INSTR r10d, cl - INSTR r11d, al - INSTR r12d, bl - INSTR r13d, cl - INSTR r14d, al - INSTR r15d, bl - INSTR edx, cl - INSTR r9d, al - INSTR r10d, bl - INSTR r11d, cl - INSTR r12d, al - INSTR r13d, bl - INSTR r14d, cl - INSTR r15d, al - INSTR edx, bl - INSTR r9d, cl - INSTR r10d, al - INSTR r11d, bl - INSTR r12d, cl - INSTR r13d, al - INSTR r14d, bl - INSTR r15d, cl - INSTR edx, al - INSTR r9d, bl - INSTR r10d, cl - INSTR r11d, al - INSTR r12d, bl - INSTR r13d, cl - INSTR r14d, al - INSTR r15d, bl - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/movzbl-r32_r8.S b/testcases/TaxCalc/movzbl-r32_r8.S deleted file mode 100644 index 2f0ab2f..0000000 --- a/testcases/TaxCalc/movzbl-r32_r8.S +++ /dev/null @@ -1,207 +0,0 @@ -#define INSTR movzbl -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - INSTR eax, al - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/neg-r32-TP.S b/testcases/TaxCalc/neg-r32-TP.S deleted file mode 100644 index e60f4a2..0000000 --- a/testcases/TaxCalc/neg-r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR neg -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - INSTR edx - INSTR r9d - INSTR r10d - INSTR r11d - INSTR r12d - INSTR r13d - INSTR r14d - INSTR r15d - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/neg-r32.S b/testcases/TaxCalc/neg-r32.S deleted file mode 100644 index c25e69c..0000000 --- a/testcases/TaxCalc/neg-r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR neg -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - INSTR eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/sub-r32_r32-TP.S b/testcases/TaxCalc/sub-r32_r32-TP.S deleted file mode 100644 index 2f45769..0000000 --- a/testcases/TaxCalc/sub-r32_r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR sub -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/sub-r32_r32.S b/testcases/TaxCalc/sub-r32_r32.S deleted file mode 100644 index 91a7610..0000000 --- a/testcases/TaxCalc/sub-r32_r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR sub -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/test-r32_r32-TP.S b/testcases/TaxCalc/test-r32_r32-TP.S deleted file mode 100644 index 5403390..0000000 --- a/testcases/TaxCalc/test-r32_r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR test -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/test-r32_r32.S b/testcases/TaxCalc/test-r32_r32.S deleted file mode 100644 index 8c7e48d..0000000 --- a/testcases/TaxCalc/test-r32_r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR test -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vaddpd-xmm_xmm_xmm-TP.S b/testcases/TaxCalc/vaddpd-xmm_xmm_xmm-TP.S deleted file mode 100644 index 7bf13a5..0000000 --- a/testcases/TaxCalc/vaddpd-xmm_xmm_xmm-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vaddpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vaddpd-xmm_xmm_xmm.S b/testcases/TaxCalc/vaddpd-xmm_xmm_xmm.S deleted file mode 100644 index a4bf29b..0000000 --- a/testcases/TaxCalc/vaddpd-xmm_xmm_xmm.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vaddpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vaddpd-ymm_ymm_ymm-TP.S b/testcases/TaxCalc/vaddpd-ymm_ymm_ymm-TP.S deleted file mode 100644 index 268aafe..0000000 --- a/testcases/TaxCalc/vaddpd-ymm_ymm_ymm-TP.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vaddpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm3, ymm0, ymm0 - INSTR ymm4, ymm1, ymm1 - INSTR ymm5, ymm2, ymm2 - INSTR ymm6, ymm0, ymm0 - INSTR ymm7, ymm1, ymm1 - INSTR ymm8, ymm2, ymm2 - INSTR ymm9, ymm0, ymm0 - INSTR ymm10, ymm1, ymm1 - INSTR ymm11, ymm2, ymm2 - INSTR ymm12, ymm0, ymm0 - INSTR ymm13, ymm1, ymm1 - INSTR ymm14, ymm2, ymm2 - INSTR ymm15, ymm0, ymm0 - INSTR ymm3, ymm1, ymm1 - INSTR ymm4, ymm2, ymm2 - INSTR ymm5, ymm0, ymm0 - INSTR ymm6, ymm1, ymm1 - INSTR ymm7, ymm2, ymm2 - INSTR ymm8, ymm0, ymm0 - INSTR ymm9, ymm1, ymm1 - INSTR ymm10, ymm2, ymm2 - INSTR ymm11, ymm0, ymm0 - INSTR ymm12, ymm1, ymm1 - INSTR ymm13, ymm2, ymm2 - INSTR ymm14, ymm0, ymm0 - INSTR ymm15, ymm1, ymm1 - INSTR ymm3, ymm2, ymm2 - INSTR ymm4, ymm0, ymm0 - INSTR ymm5, ymm1, ymm1 - INSTR ymm6, ymm2, ymm2 - INSTR ymm7, ymm0, ymm0 - INSTR ymm8, ymm1, ymm1 - INSTR ymm9, ymm2, ymm2 - INSTR ymm10, ymm0, ymm0 - INSTR ymm11, ymm1, ymm1 - INSTR ymm12, ymm2, ymm2 - INSTR ymm13, ymm0, ymm0 - INSTR ymm14, ymm1, ymm1 - INSTR ymm15, ymm2, ymm2 - INSTR ymm3, ymm0, ymm0 - INSTR ymm4, ymm1, ymm1 - INSTR ymm5, ymm2, ymm2 - INSTR ymm6, ymm0, ymm0 - INSTR ymm7, ymm1, ymm1 - INSTR ymm8, ymm2, ymm2 - INSTR ymm9, ymm0, ymm0 - INSTR ymm10, ymm1, ymm1 - INSTR ymm11, ymm2, ymm2 - INSTR ymm12, ymm0, ymm0 - INSTR ymm13, ymm1, ymm1 - INSTR ymm14, ymm2, ymm2 - INSTR ymm15, ymm0, ymm0 - INSTR ymm3, ymm1, ymm1 - INSTR ymm4, ymm2, ymm2 - INSTR ymm5, ymm0, ymm0 - INSTR ymm6, ymm1, ymm1 - INSTR ymm7, ymm2, ymm2 - INSTR ymm8, ymm0, ymm0 - INSTR ymm9, ymm1, ymm1 - INSTR ymm10, ymm2, ymm2 - INSTR ymm11, ymm0, ymm0 - INSTR ymm12, ymm1, ymm1 - INSTR ymm13, ymm2, ymm2 - INSTR ymm14, ymm0, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vaddpd-ymm_ymm_ymm.S b/testcases/TaxCalc/vaddpd-ymm_ymm_ymm.S deleted file mode 100644 index 0edbbbe..0000000 --- a/testcases/TaxCalc/vaddpd-ymm_ymm_ymm.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vaddpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vaddsd-xmm_xmm_mem-TP.S b/testcases/TaxCalc/vaddsd-xmm_xmm_mem-TP.S deleted file mode 100644 index 902cf3a..0000000 --- a/testcases/TaxCalc/vaddsd-xmm_xmm_mem-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vaddsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, [rip+PI] - INSTR xmm4, xmm1, [rip+PI] - INSTR xmm5, xmm2, [rip+PI] - INSTR xmm6, xmm0, [rip+PI] - INSTR xmm7, xmm1, [rip+PI] - INSTR xmm8, xmm2, [rip+PI] - INSTR xmm9, xmm0, [rip+PI] - INSTR xmm10, xmm1, [rip+PI] - INSTR xmm11, xmm2, [rip+PI] - INSTR xmm12, xmm0, [rip+PI] - INSTR xmm13, xmm1, [rip+PI] - INSTR xmm14, xmm2, [rip+PI] - INSTR xmm15, xmm0, [rip+PI] - INSTR xmm3, xmm1, [rip+PI] - INSTR xmm4, xmm2, [rip+PI] - INSTR xmm5, xmm0, [rip+PI] - INSTR xmm6, xmm1, [rip+PI] - INSTR xmm7, xmm2, [rip+PI] - INSTR xmm8, xmm0, [rip+PI] - INSTR xmm9, xmm1, [rip+PI] - INSTR xmm10, xmm2, [rip+PI] - INSTR xmm11, xmm0, [rip+PI] - INSTR xmm12, xmm1, [rip+PI] - INSTR xmm13, xmm2, [rip+PI] - INSTR xmm14, xmm0, [rip+PI] - INSTR xmm15, xmm1, [rip+PI] - INSTR xmm3, xmm2, [rip+PI] - INSTR xmm4, xmm0, [rip+PI] - INSTR xmm5, xmm1, [rip+PI] - INSTR xmm6, xmm2, [rip+PI] - INSTR xmm7, xmm0, [rip+PI] - INSTR xmm8, xmm1, [rip+PI] - INSTR xmm9, xmm2, [rip+PI] - INSTR xmm10, xmm0, [rip+PI] - INSTR xmm11, xmm1, [rip+PI] - INSTR xmm12, xmm2, [rip+PI] - INSTR xmm13, xmm0, [rip+PI] - INSTR xmm14, xmm1, [rip+PI] - INSTR xmm15, xmm2, [rip+PI] - INSTR xmm3, xmm0, [rip+PI] - INSTR xmm4, xmm1, [rip+PI] - INSTR xmm5, xmm2, [rip+PI] - INSTR xmm6, xmm0, [rip+PI] - INSTR xmm7, xmm1, [rip+PI] - INSTR xmm8, xmm2, [rip+PI] - INSTR xmm9, xmm0, [rip+PI] - INSTR xmm10, xmm1, [rip+PI] - INSTR xmm11, xmm2, [rip+PI] - INSTR xmm12, xmm0, [rip+PI] - INSTR xmm13, xmm1, [rip+PI] - INSTR xmm14, xmm2, [rip+PI] - INSTR xmm15, xmm0, [rip+PI] - INSTR xmm3, xmm1, [rip+PI] - INSTR xmm4, xmm2, [rip+PI] - INSTR xmm5, xmm0, [rip+PI] - INSTR xmm6, xmm1, [rip+PI] - INSTR xmm7, xmm2, [rip+PI] - INSTR xmm8, xmm0, [rip+PI] - INSTR xmm9, xmm1, [rip+PI] - INSTR xmm10, xmm2, [rip+PI] - INSTR xmm11, xmm0, [rip+PI] - INSTR xmm12, xmm1, [rip+PI] - INSTR xmm13, xmm2, [rip+PI] - INSTR xmm14, xmm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vaddsd-xmm_xmm_mem.S b/testcases/TaxCalc/vaddsd-xmm_xmm_mem.S deleted file mode 100644 index 8a4bc84..0000000 --- a/testcases/TaxCalc/vaddsd-xmm_xmm_mem.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vaddsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vaddsd-xmm_xmm_xmm-TP.S b/testcases/TaxCalc/vaddsd-xmm_xmm_xmm-TP.S deleted file mode 100644 index 274e201..0000000 --- a/testcases/TaxCalc/vaddsd-xmm_xmm_xmm-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vaddsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vaddsd-xmm_xmm_xmm.S b/testcases/TaxCalc/vaddsd-xmm_xmm_xmm.S deleted file mode 100644 index d071892..0000000 --- a/testcases/TaxCalc/vaddsd-xmm_xmm_xmm.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vaddsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovapd-xmm_xmm-TP.S b/testcases/TaxCalc/vmovapd-xmm_xmm-TP.S deleted file mode 100644 index b6583d8..0000000 --- a/testcases/TaxCalc/vmovapd-xmm_xmm-TP.S +++ /dev/null @@ -1,172 +0,0 @@ -#define INSTR vmovapd -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0 - INSTR xmm4, xmm1 - INSTR xmm5, xmm2 - INSTR xmm6, xmm0 - INSTR xmm7, xmm1 - INSTR xmm8, xmm2 - INSTR xmm9, xmm0 - INSTR xmm10, xmm1 - INSTR xmm11, xmm2 - INSTR xmm12, xmm0 - INSTR xmm13, xmm1 - INSTR xmm14, xmm2 - INSTR xmm15, xmm0 - INSTR xmm3, xmm1 - INSTR xmm4, xmm2 - INSTR xmm5, xmm0 - INSTR xmm6, xmm1 - INSTR xmm7, xmm2 - INSTR xmm8, xmm0 - INSTR xmm9, xmm1 - INSTR xmm10, xmm2 - INSTR xmm11, xmm0 - INSTR xmm12, xmm1 - INSTR xmm13, xmm2 - INSTR xmm14, xmm0 - INSTR xmm15, xmm1 - INSTR xmm3, xmm2 - INSTR xmm4, xmm0 - INSTR xmm5, xmm1 - INSTR xmm6, xmm2 - INSTR xmm7, xmm0 - INSTR xmm8, xmm1 - INSTR xmm9, xmm2 - INSTR xmm10, xmm0 - INSTR xmm11, xmm1 - INSTR xmm12, xmm2 - INSTR xmm13, xmm0 - INSTR xmm14, xmm1 - INSTR xmm15, xmm2 - INSTR xmm3, xmm0 - INSTR xmm4, xmm1 - INSTR xmm5, xmm2 - INSTR xmm6, xmm0 - INSTR xmm7, xmm1 - INSTR xmm8, xmm2 - INSTR xmm9, xmm0 - INSTR xmm10, xmm1 - INSTR xmm11, xmm2 - INSTR xmm12, xmm0 - INSTR xmm13, xmm1 - INSTR xmm14, xmm2 - INSTR xmm15, xmm0 - INSTR xmm3, xmm1 - INSTR xmm4, xmm2 - INSTR xmm5, xmm0 - INSTR xmm6, xmm1 - INSTR xmm7, xmm2 - INSTR xmm8, xmm0 - INSTR xmm9, xmm1 - INSTR xmm10, xmm2 - INSTR xmm11, xmm0 - INSTR xmm12, xmm1 - INSTR xmm13, xmm2 - INSTR xmm14, xmm0 - INSTR xmm15, xmm1 - INSTR xmm3, xmm2 - INSTR xmm4, xmm0 - INSTR xmm5, xmm1 - INSTR xmm6, xmm2 - INSTR xmm7, xmm0 - INSTR xmm8, xmm1 - INSTR xmm9, xmm2 - INSTR xmm10, xmm0 - INSTR xmm11, xmm1 - INSTR xmm12, xmm2 - INSTR xmm13, xmm0 - INSTR xmm14, xmm1 - INSTR xmm15, xmm2 - INSTR xmm3, xmm0 - INSTR xmm4, xmm1 - INSTR xmm5, xmm2 - INSTR xmm6, xmm0 - INSTR xmm7, xmm1 - INSTR xmm8, xmm2 - INSTR xmm9, xmm0 - INSTR xmm10, xmm1 - INSTR xmm11, xmm2 - INSTR xmm12, xmm0 - INSTR xmm13, xmm1 - INSTR xmm14, xmm2 - INSTR xmm15, xmm0 - INSTR xmm3, xmm1 - INSTR xmm4, xmm2 - INSTR xmm5, xmm0 - INSTR xmm6, xmm1 - INSTR xmm7, xmm2 - INSTR xmm8, xmm0 - INSTR xmm9, xmm1 - INSTR xmm10, xmm2 - INSTR xmm11, xmm0 - INSTR xmm12, xmm1 - INSTR xmm13, xmm2 - INSTR xmm14, xmm0 - INSTR xmm15, xmm1 - INSTR xmm3, xmm2 - INSTR xmm4, xmm0 - INSTR xmm5, xmm1 - INSTR xmm6, xmm2 - INSTR xmm7, xmm0 - INSTR xmm8, xmm1 - INSTR xmm9, xmm2 - INSTR xmm10, xmm0 - INSTR xmm11, xmm1 - INSTR xmm12, xmm2 - INSTR xmm13, xmm0 - INSTR xmm14, xmm1 - INSTR xmm15, xmm2 - INSTR xmm3, xmm0 - INSTR xmm4, xmm1 - INSTR xmm5, xmm2 - INSTR xmm6, xmm0 - INSTR xmm7, xmm1 - INSTR xmm8, xmm2 - INSTR xmm9, xmm0 - INSTR xmm10, xmm1 - INSTR xmm11, xmm2 - INSTR xmm12, xmm0 - INSTR xmm13, xmm1 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovapd-xmm_xmm.S b/testcases/TaxCalc/vmovapd-xmm_xmm.S deleted file mode 100644 index dcdb49f..0000000 --- a/testcases/TaxCalc/vmovapd-xmm_xmm.S +++ /dev/null @@ -1,172 +0,0 @@ -#define INSTR vmovapd -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovapd-ymm_ymm-TP.S b/testcases/TaxCalc/vmovapd-ymm_ymm-TP.S deleted file mode 100644 index dc26fe0..0000000 --- a/testcases/TaxCalc/vmovapd-ymm_ymm-TP.S +++ /dev/null @@ -1,174 +0,0 @@ -#define INSTR vmovapd -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm3, ymm0 - INSTR ymm4, ymm1 - INSTR ymm5, ymm2 - INSTR ymm6, ymm0 - INSTR ymm7, ymm1 - INSTR ymm8, ymm2 - INSTR ymm9, ymm0 - INSTR ymm10, ymm1 - INSTR ymm11, ymm2 - INSTR ymm12, ymm0 - INSTR ymm13, ymm1 - INSTR ymm14, ymm2 - INSTR ymm15, ymm0 - INSTR ymm3, ymm1 - INSTR ymm4, ymm2 - INSTR ymm5, ymm0 - INSTR ymm6, ymm1 - INSTR ymm7, ymm2 - INSTR ymm8, ymm0 - INSTR ymm9, ymm1 - INSTR ymm10, ymm2 - INSTR ymm11, ymm0 - INSTR ymm12, ymm1 - INSTR ymm13, ymm2 - INSTR ymm14, ymm0 - INSTR ymm15, ymm1 - INSTR ymm3, ymm2 - INSTR ymm4, ymm0 - INSTR ymm5, ymm1 - INSTR ymm6, ymm2 - INSTR ymm7, ymm0 - INSTR ymm8, ymm1 - INSTR ymm9, ymm2 - INSTR ymm10, ymm0 - INSTR ymm11, ymm1 - INSTR ymm12, ymm2 - INSTR ymm13, ymm0 - INSTR ymm14, ymm1 - INSTR ymm15, ymm2 - INSTR ymm3, ymm0 - INSTR ymm4, ymm1 - INSTR ymm5, ymm2 - INSTR ymm6, ymm0 - INSTR ymm7, ymm1 - INSTR ymm8, ymm2 - INSTR ymm9, ymm0 - INSTR ymm10, ymm1 - INSTR ymm11, ymm2 - INSTR ymm12, ymm0 - INSTR ymm13, ymm1 - INSTR ymm14, ymm2 - INSTR ymm15, ymm0 - INSTR ymm3, ymm1 - INSTR ymm4, ymm2 - INSTR ymm5, ymm0 - INSTR ymm6, ymm1 - INSTR ymm7, ymm2 - INSTR ymm8, ymm0 - INSTR ymm9, ymm1 - INSTR ymm10, ymm2 - INSTR ymm11, ymm0 - INSTR ymm12, ymm1 - INSTR ymm13, ymm2 - INSTR ymm14, ymm0 - INSTR ymm15, ymm1 - INSTR ymm3, ymm2 - INSTR ymm4, ymm0 - INSTR ymm5, ymm1 - INSTR ymm6, ymm2 - INSTR ymm7, ymm0 - INSTR ymm8, ymm1 - INSTR ymm9, ymm2 - INSTR ymm10, ymm0 - INSTR ymm11, ymm1 - INSTR ymm12, ymm2 - INSTR ymm13, ymm0 - INSTR ymm14, ymm1 - INSTR ymm15, ymm2 - INSTR ymm3, ymm0 - INSTR ymm4, ymm1 - INSTR ymm5, ymm2 - INSTR ymm6, ymm0 - INSTR ymm7, ymm1 - INSTR ymm8, ymm2 - INSTR ymm9, ymm0 - INSTR ymm10, ymm1 - INSTR ymm11, ymm2 - INSTR ymm12, ymm0 - INSTR ymm13, ymm1 - INSTR ymm14, ymm2 - INSTR ymm15, ymm0 - INSTR ymm3, ymm1 - INSTR ymm4, ymm2 - INSTR ymm5, ymm0 - INSTR ymm6, ymm1 - INSTR ymm7, ymm2 - INSTR ymm8, ymm0 - INSTR ymm9, ymm1 - INSTR ymm10, ymm2 - INSTR ymm11, ymm0 - INSTR ymm12, ymm1 - INSTR ymm13, ymm2 - INSTR ymm14, ymm0 - INSTR ymm15, ymm1 - INSTR ymm3, ymm2 - INSTR ymm4, ymm0 - INSTR ymm5, ymm1 - INSTR ymm6, ymm2 - INSTR ymm7, ymm0 - INSTR ymm8, ymm1 - INSTR ymm9, ymm2 - INSTR ymm10, ymm0 - INSTR ymm11, ymm1 - INSTR ymm12, ymm2 - INSTR ymm13, ymm0 - INSTR ymm14, ymm1 - INSTR ymm15, ymm2 - INSTR ymm3, ymm0 - INSTR ymm4, ymm1 - INSTR ymm5, ymm2 - INSTR ymm6, ymm0 - INSTR ymm7, ymm1 - INSTR ymm8, ymm2 - INSTR ymm9, ymm0 - INSTR ymm10, ymm1 - INSTR ymm11, ymm2 - INSTR ymm12, ymm0 - INSTR ymm13, ymm1 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovapd-ymm_ymm.S b/testcases/TaxCalc/vmovapd-ymm_ymm.S deleted file mode 100644 index 05d0539..0000000 --- a/testcases/TaxCalc/vmovapd-ymm_ymm.S +++ /dev/null @@ -1,174 +0,0 @@ -#define INSTR vmovapd -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - INSTR ymm0, ymm1 - INSTR ymm1, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovaps-xmm_xmm-TP.S b/testcases/TaxCalc/vmovaps-xmm_xmm-TP.S deleted file mode 100644 index af1fd01..0000000 --- a/testcases/TaxCalc/vmovaps-xmm_xmm-TP.S +++ /dev/null @@ -1,172 +0,0 @@ -#define INSTR vmovaps -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0 - INSTR xmm4, xmm1 - INSTR xmm5, xmm2 - INSTR xmm6, xmm0 - INSTR xmm7, xmm1 - INSTR xmm8, xmm2 - INSTR xmm9, xmm0 - INSTR xmm10, xmm1 - INSTR xmm11, xmm2 - INSTR xmm12, xmm0 - INSTR xmm13, xmm1 - INSTR xmm14, xmm2 - INSTR xmm15, xmm0 - INSTR xmm3, xmm1 - INSTR xmm4, xmm2 - INSTR xmm5, xmm0 - INSTR xmm6, xmm1 - INSTR xmm7, xmm2 - INSTR xmm8, xmm0 - INSTR xmm9, xmm1 - INSTR xmm10, xmm2 - INSTR xmm11, xmm0 - INSTR xmm12, xmm1 - INSTR xmm13, xmm2 - INSTR xmm14, xmm0 - INSTR xmm15, xmm1 - INSTR xmm3, xmm2 - INSTR xmm4, xmm0 - INSTR xmm5, xmm1 - INSTR xmm6, xmm2 - INSTR xmm7, xmm0 - INSTR xmm8, xmm1 - INSTR xmm9, xmm2 - INSTR xmm10, xmm0 - INSTR xmm11, xmm1 - INSTR xmm12, xmm2 - INSTR xmm13, xmm0 - INSTR xmm14, xmm1 - INSTR xmm15, xmm2 - INSTR xmm3, xmm0 - INSTR xmm4, xmm1 - INSTR xmm5, xmm2 - INSTR xmm6, xmm0 - INSTR xmm7, xmm1 - INSTR xmm8, xmm2 - INSTR xmm9, xmm0 - INSTR xmm10, xmm1 - INSTR xmm11, xmm2 - INSTR xmm12, xmm0 - INSTR xmm13, xmm1 - INSTR xmm14, xmm2 - INSTR xmm15, xmm0 - INSTR xmm3, xmm1 - INSTR xmm4, xmm2 - INSTR xmm5, xmm0 - INSTR xmm6, xmm1 - INSTR xmm7, xmm2 - INSTR xmm8, xmm0 - INSTR xmm9, xmm1 - INSTR xmm10, xmm2 - INSTR xmm11, xmm0 - INSTR xmm12, xmm1 - INSTR xmm13, xmm2 - INSTR xmm14, xmm0 - INSTR xmm15, xmm1 - INSTR xmm3, xmm2 - INSTR xmm4, xmm0 - INSTR xmm5, xmm1 - INSTR xmm6, xmm2 - INSTR xmm7, xmm0 - INSTR xmm8, xmm1 - INSTR xmm9, xmm2 - INSTR xmm10, xmm0 - INSTR xmm11, xmm1 - INSTR xmm12, xmm2 - INSTR xmm13, xmm0 - INSTR xmm14, xmm1 - INSTR xmm15, xmm2 - INSTR xmm3, xmm0 - INSTR xmm4, xmm1 - INSTR xmm5, xmm2 - INSTR xmm6, xmm0 - INSTR xmm7, xmm1 - INSTR xmm8, xmm2 - INSTR xmm9, xmm0 - INSTR xmm10, xmm1 - INSTR xmm11, xmm2 - INSTR xmm12, xmm0 - INSTR xmm13, xmm1 - INSTR xmm14, xmm2 - INSTR xmm15, xmm0 - INSTR xmm3, xmm1 - INSTR xmm4, xmm2 - INSTR xmm5, xmm0 - INSTR xmm6, xmm1 - INSTR xmm7, xmm2 - INSTR xmm8, xmm0 - INSTR xmm9, xmm1 - INSTR xmm10, xmm2 - INSTR xmm11, xmm0 - INSTR xmm12, xmm1 - INSTR xmm13, xmm2 - INSTR xmm14, xmm0 - INSTR xmm15, xmm1 - INSTR xmm3, xmm2 - INSTR xmm4, xmm0 - INSTR xmm5, xmm1 - INSTR xmm6, xmm2 - INSTR xmm7, xmm0 - INSTR xmm8, xmm1 - INSTR xmm9, xmm2 - INSTR xmm10, xmm0 - INSTR xmm11, xmm1 - INSTR xmm12, xmm2 - INSTR xmm13, xmm0 - INSTR xmm14, xmm1 - INSTR xmm15, xmm2 - INSTR xmm3, xmm0 - INSTR xmm4, xmm1 - INSTR xmm5, xmm2 - INSTR xmm6, xmm0 - INSTR xmm7, xmm1 - INSTR xmm8, xmm2 - INSTR xmm9, xmm0 - INSTR xmm10, xmm1 - INSTR xmm11, xmm2 - INSTR xmm12, xmm0 - INSTR xmm13, xmm1 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovaps-xmm_xmm.S b/testcases/TaxCalc/vmovaps-xmm_xmm.S deleted file mode 100644 index d743c98..0000000 --- a/testcases/TaxCalc/vmovaps-xmm_xmm.S +++ /dev/null @@ -1,172 +0,0 @@ -#define INSTR vmovaps -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - INSTR xmm0, xmm1 - INSTR xmm1, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovhpd-xmm_xmm_mem-TP.S b/testcases/TaxCalc/vmovhpd-xmm_xmm_mem-TP.S deleted file mode 100644 index 11cbaf0..0000000 --- a/testcases/TaxCalc/vmovhpd-xmm_xmm_mem-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmovhpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, [rip+PI] - INSTR xmm4, xmm1, [rip+PI] - INSTR xmm5, xmm2, [rip+PI] - INSTR xmm6, xmm0, [rip+PI] - INSTR xmm7, xmm1, [rip+PI] - INSTR xmm8, xmm2, [rip+PI] - INSTR xmm9, xmm0, [rip+PI] - INSTR xmm10, xmm1, [rip+PI] - INSTR xmm11, xmm2, [rip+PI] - INSTR xmm12, xmm0, [rip+PI] - INSTR xmm13, xmm1, [rip+PI] - INSTR xmm14, xmm2, [rip+PI] - INSTR xmm15, xmm0, [rip+PI] - INSTR xmm3, xmm1, [rip+PI] - INSTR xmm4, xmm2, [rip+PI] - INSTR xmm5, xmm0, [rip+PI] - INSTR xmm6, xmm1, [rip+PI] - INSTR xmm7, xmm2, [rip+PI] - INSTR xmm8, xmm0, [rip+PI] - INSTR xmm9, xmm1, [rip+PI] - INSTR xmm10, xmm2, [rip+PI] - INSTR xmm11, xmm0, [rip+PI] - INSTR xmm12, xmm1, [rip+PI] - INSTR xmm13, xmm2, [rip+PI] - INSTR xmm14, xmm0, [rip+PI] - INSTR xmm15, xmm1, [rip+PI] - INSTR xmm3, xmm2, [rip+PI] - INSTR xmm4, xmm0, [rip+PI] - INSTR xmm5, xmm1, [rip+PI] - INSTR xmm6, xmm2, [rip+PI] - INSTR xmm7, xmm0, [rip+PI] - INSTR xmm8, xmm1, [rip+PI] - INSTR xmm9, xmm2, [rip+PI] - INSTR xmm10, xmm0, [rip+PI] - INSTR xmm11, xmm1, [rip+PI] - INSTR xmm12, xmm2, [rip+PI] - INSTR xmm13, xmm0, [rip+PI] - INSTR xmm14, xmm1, [rip+PI] - INSTR xmm15, xmm2, [rip+PI] - INSTR xmm3, xmm0, [rip+PI] - INSTR xmm4, xmm1, [rip+PI] - INSTR xmm5, xmm2, [rip+PI] - INSTR xmm6, xmm0, [rip+PI] - INSTR xmm7, xmm1, [rip+PI] - INSTR xmm8, xmm2, [rip+PI] - INSTR xmm9, xmm0, [rip+PI] - INSTR xmm10, xmm1, [rip+PI] - INSTR xmm11, xmm2, [rip+PI] - INSTR xmm12, xmm0, [rip+PI] - INSTR xmm13, xmm1, [rip+PI] - INSTR xmm14, xmm2, [rip+PI] - INSTR xmm15, xmm0, [rip+PI] - INSTR xmm3, xmm1, [rip+PI] - INSTR xmm4, xmm2, [rip+PI] - INSTR xmm5, xmm0, [rip+PI] - INSTR xmm6, xmm1, [rip+PI] - INSTR xmm7, xmm2, [rip+PI] - INSTR xmm8, xmm0, [rip+PI] - INSTR xmm9, xmm1, [rip+PI] - INSTR xmm10, xmm2, [rip+PI] - INSTR xmm11, xmm0, [rip+PI] - INSTR xmm12, xmm1, [rip+PI] - INSTR xmm13, xmm2, [rip+PI] - INSTR xmm14, xmm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovhpd-xmm_xmm_mem.S b/testcases/TaxCalc/vmovhpd-xmm_xmm_mem.S deleted file mode 100644 index b423e4a..0000000 --- a/testcases/TaxCalc/vmovhpd-xmm_xmm_mem.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmovhpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovq-r64_xmm-TP.S b/testcases/TaxCalc/vmovq-r64_xmm-TP.S deleted file mode 100644 index b80c773..0000000 --- a/testcases/TaxCalc/vmovq-r64_xmm-TP.S +++ /dev/null @@ -1,141 +0,0 @@ -#define INSTR vmovq -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR rdx, xmm0 - INSTR r9, xmm1 - INSTR r10, xmm2 - INSTR r11, xmm0 - INSTR r12, xmm1 - INSTR r13, xmm2 - INSTR r14, xmm0 - INSTR r15, xmm1 - INSTR rdx, xmm2 - INSTR r9, xmm0 - INSTR r10, xmm1 - INSTR r11, xmm2 - INSTR r12, xmm0 - INSTR r13, xmm1 - INSTR r14, xmm2 - INSTR r15, xmm0 - INSTR rdx, xmm1 - INSTR r9, xmm2 - INSTR r10, xmm0 - INSTR r11, xmm1 - INSTR r12, xmm2 - INSTR r13, xmm0 - INSTR r14, xmm1 - INSTR r15, xmm2 - INSTR rdx, xmm0 - INSTR r9, xmm1 - INSTR r10, xmm2 - INSTR r11, xmm0 - INSTR r12, xmm1 - INSTR r13, xmm2 - INSTR r14, xmm0 - INSTR r15, xmm1 - INSTR rdx, xmm2 - INSTR r9, xmm0 - INSTR r10, xmm1 - INSTR r11, xmm2 - INSTR r12, xmm0 - INSTR r13, xmm1 - INSTR r14, xmm2 - INSTR r15, xmm0 - INSTR rdx, xmm1 - INSTR r9, xmm2 - INSTR r10, xmm0 - INSTR r11, xmm1 - INSTR r12, xmm2 - INSTR r13, xmm0 - INSTR r14, xmm1 - INSTR r15, xmm2 - INSTR rdx, xmm0 - INSTR r9, xmm1 - INSTR r10, xmm2 - INSTR r11, xmm0 - INSTR r12, xmm1 - INSTR r13, xmm2 - INSTR r14, xmm0 - INSTR r15, xmm1 - INSTR rdx, xmm2 - INSTR r9, xmm0 - INSTR r10, xmm1 - INSTR r11, xmm2 - INSTR r12, xmm0 - INSTR r13, xmm1 - INSTR r14, xmm2 - INSTR r15, xmm0 - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovq-r64_xmm.S b/testcases/TaxCalc/vmovq-r64_xmm.S deleted file mode 100644 index 029ebc3..0000000 --- a/testcases/TaxCalc/vmovq-r64_xmm.S +++ /dev/null @@ -1,141 +0,0 @@ -#define INSTR vmovq -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - INSTR rax, xmm0 - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovq-xmm_r64-TP.S b/testcases/TaxCalc/vmovq-xmm_r64-TP.S deleted file mode 100644 index fc7da5a..0000000 --- a/testcases/TaxCalc/vmovq-xmm_r64-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR vmovq -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR xmm3, rax - INSTR xmm4, rbx - INSTR xmm5, rcx - INSTR xmm6, rax - INSTR xmm7, rbx - INSTR xmm8, rcx - INSTR xmm9, rax - INSTR xmm10, rbx - INSTR xmm11, rcx - INSTR xmm12, rax - INSTR xmm13, rbx - INSTR xmm14, rcx - INSTR xmm15, rax - INSTR xmm3, rbx - INSTR xmm4, rcx - INSTR xmm5, rax - INSTR xmm6, rbx - INSTR xmm7, rcx - INSTR xmm8, rax - INSTR xmm9, rbx - INSTR xmm10, rcx - INSTR xmm11, rax - INSTR xmm12, rbx - INSTR xmm13, rcx - INSTR xmm14, rax - INSTR xmm15, rbx - INSTR xmm3, rcx - INSTR xmm4, rax - INSTR xmm5, rbx - INSTR xmm6, rcx - INSTR xmm7, rax - INSTR xmm8, rbx - INSTR xmm9, rcx - INSTR xmm10, rax - INSTR xmm11, rbx - INSTR xmm12, rcx - INSTR xmm13, rax - INSTR xmm14, rbx - INSTR xmm15, rcx - INSTR xmm3, rax - INSTR xmm4, rbx - INSTR xmm5, rcx - INSTR xmm6, rax - INSTR xmm7, rbx - INSTR xmm8, rcx - INSTR xmm9, rax - INSTR xmm10, rbx - INSTR xmm11, rcx - INSTR xmm12, rax - INSTR xmm13, rbx - INSTR xmm14, rcx - INSTR xmm15, rax - INSTR xmm3, rbx - INSTR xmm4, rcx - INSTR xmm5, rax - INSTR xmm6, rbx - INSTR xmm7, rcx - INSTR xmm8, rax - INSTR xmm9, rbx - INSTR xmm10, rcx - INSTR xmm11, rax - INSTR xmm12, rbx - INSTR xmm13, rcx - INSTR xmm14, rax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovq-xmm_r64.S b/testcases/TaxCalc/vmovq-xmm_r64.S deleted file mode 100644 index 6a89af7..0000000 --- a/testcases/TaxCalc/vmovq-xmm_r64.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR vmovq -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - INSTR xmm0, rax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovsd-mem_xmm-TP.S b/testcases/TaxCalc/vmovsd-mem_xmm-TP.S deleted file mode 100644 index 14a1cb6..0000000 --- a/testcases/TaxCalc/vmovsd-mem_xmm-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmovsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm1 - INSTR [rip+PI], xmm2 - INSTR [rip+PI], xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovsd-mem_xmm.S b/testcases/TaxCalc/vmovsd-mem_xmm.S deleted file mode 100644 index 4f1bfbb..0000000 --- a/testcases/TaxCalc/vmovsd-mem_xmm.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmovsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - INSTR [rip+PI], xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovsd-xmm_mem-TP.S b/testcases/TaxCalc/vmovsd-xmm_mem-TP.S deleted file mode 100644 index 74f7da2..0000000 --- a/testcases/TaxCalc/vmovsd-xmm_mem-TP.S +++ /dev/null @@ -1,101 +0,0 @@ -#define INSTR vmovsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero -loop: - inc i - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - INSTR xmm15, [rip+PI] - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - INSTR xmm15, [rip+PI] - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - INSTR xmm15, [rip+PI] - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - INSTR xmm15, [rip+PI] - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovsd-xmm_mem.S b/testcases/TaxCalc/vmovsd-xmm_mem.S deleted file mode 100644 index 6447ff8..0000000 --- a/testcases/TaxCalc/vmovsd-xmm_mem.S +++ /dev/null @@ -1,101 +0,0 @@ -#define INSTR vmovsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero -loop: - inc i - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovsd-xmm_xmm_xmm-TP.S b/testcases/TaxCalc/vmovsd-xmm_xmm_xmm-TP.S deleted file mode 100644 index 1c847dd..0000000 --- a/testcases/TaxCalc/vmovsd-xmm_xmm_xmm-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmovsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovsd-xmm_xmm_xmm.S b/testcases/TaxCalc/vmovsd-xmm_xmm_xmm.S deleted file mode 100644 index d31c45a..0000000 --- a/testcases/TaxCalc/vmovsd-xmm_xmm_xmm.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmovsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovupd-xmm_mem-TP.S b/testcases/TaxCalc/vmovupd-xmm_mem-TP.S deleted file mode 100644 index 9c5d7a0..0000000 --- a/testcases/TaxCalc/vmovupd-xmm_mem-TP.S +++ /dev/null @@ -1,101 +0,0 @@ -#define INSTR vmovupd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero -loop: - inc i - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - INSTR xmm15, [rip+PI] - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - INSTR xmm15, [rip+PI] - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - INSTR xmm15, [rip+PI] - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - INSTR xmm15, [rip+PI] - INSTR xmm3, [rip+PI] - INSTR xmm4, [rip+PI] - INSTR xmm5, [rip+PI] - INSTR xmm6, [rip+PI] - INSTR xmm7, [rip+PI] - INSTR xmm8, [rip+PI] - INSTR xmm9, [rip+PI] - INSTR xmm10, [rip+PI] - INSTR xmm11, [rip+PI] - INSTR xmm12, [rip+PI] - INSTR xmm13, [rip+PI] - INSTR xmm14, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmovupd-xmm_mem.S b/testcases/TaxCalc/vmovupd-xmm_mem.S deleted file mode 100644 index b5cc153..0000000 --- a/testcases/TaxCalc/vmovupd-xmm_mem.S +++ /dev/null @@ -1,101 +0,0 @@ -#define INSTR vmovupd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero -loop: - inc i - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - INSTR xmm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmulpd-ymm_ymm_mem-TP.S b/testcases/TaxCalc/vmulpd-ymm_ymm_mem-TP.S deleted file mode 100644 index bdbd111..0000000 --- a/testcases/TaxCalc/vmulpd-ymm_ymm_mem-TP.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vmulpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm3, ymm0, [rip+PI] - INSTR ymm4, ymm1, [rip+PI] - INSTR ymm5, ymm2, [rip+PI] - INSTR ymm6, ymm0, [rip+PI] - INSTR ymm7, ymm1, [rip+PI] - INSTR ymm8, ymm2, [rip+PI] - INSTR ymm9, ymm0, [rip+PI] - INSTR ymm10, ymm1, [rip+PI] - INSTR ymm11, ymm2, [rip+PI] - INSTR ymm12, ymm0, [rip+PI] - INSTR ymm13, ymm1, [rip+PI] - INSTR ymm14, ymm2, [rip+PI] - INSTR ymm15, ymm0, [rip+PI] - INSTR ymm3, ymm1, [rip+PI] - INSTR ymm4, ymm2, [rip+PI] - INSTR ymm5, ymm0, [rip+PI] - INSTR ymm6, ymm1, [rip+PI] - INSTR ymm7, ymm2, [rip+PI] - INSTR ymm8, ymm0, [rip+PI] - INSTR ymm9, ymm1, [rip+PI] - INSTR ymm10, ymm2, [rip+PI] - INSTR ymm11, ymm0, [rip+PI] - INSTR ymm12, ymm1, [rip+PI] - INSTR ymm13, ymm2, [rip+PI] - INSTR ymm14, ymm0, [rip+PI] - INSTR ymm15, ymm1, [rip+PI] - INSTR ymm3, ymm2, [rip+PI] - INSTR ymm4, ymm0, [rip+PI] - INSTR ymm5, ymm1, [rip+PI] - INSTR ymm6, ymm2, [rip+PI] - INSTR ymm7, ymm0, [rip+PI] - INSTR ymm8, ymm1, [rip+PI] - INSTR ymm9, ymm2, [rip+PI] - INSTR ymm10, ymm0, [rip+PI] - INSTR ymm11, ymm1, [rip+PI] - INSTR ymm12, ymm2, [rip+PI] - INSTR ymm13, ymm0, [rip+PI] - INSTR ymm14, ymm1, [rip+PI] - INSTR ymm15, ymm2, [rip+PI] - INSTR ymm3, ymm0, [rip+PI] - INSTR ymm4, ymm1, [rip+PI] - INSTR ymm5, ymm2, [rip+PI] - INSTR ymm6, ymm0, [rip+PI] - INSTR ymm7, ymm1, [rip+PI] - INSTR ymm8, ymm2, [rip+PI] - INSTR ymm9, ymm0, [rip+PI] - INSTR ymm10, ymm1, [rip+PI] - INSTR ymm11, ymm2, [rip+PI] - INSTR ymm12, ymm0, [rip+PI] - INSTR ymm13, ymm1, [rip+PI] - INSTR ymm14, ymm2, [rip+PI] - INSTR ymm15, ymm0, [rip+PI] - INSTR ymm3, ymm1, [rip+PI] - INSTR ymm4, ymm2, [rip+PI] - INSTR ymm5, ymm0, [rip+PI] - INSTR ymm6, ymm1, [rip+PI] - INSTR ymm7, ymm2, [rip+PI] - INSTR ymm8, ymm0, [rip+PI] - INSTR ymm9, ymm1, [rip+PI] - INSTR ymm10, ymm2, [rip+PI] - INSTR ymm11, ymm0, [rip+PI] - INSTR ymm12, ymm1, [rip+PI] - INSTR ymm13, ymm2, [rip+PI] - INSTR ymm14, ymm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmulpd-ymm_ymm_mem.S b/testcases/TaxCalc/vmulpd-ymm_ymm_mem.S deleted file mode 100644 index 3193575..0000000 --- a/testcases/TaxCalc/vmulpd-ymm_ymm_mem.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vmulpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - INSTR ymm0, ymm1, [rip+PI] - INSTR ymm1, ymm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmulpd-ymm_ymm_ymm-TP.S b/testcases/TaxCalc/vmulpd-ymm_ymm_ymm-TP.S deleted file mode 100644 index 029acd9..0000000 --- a/testcases/TaxCalc/vmulpd-ymm_ymm_ymm-TP.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vmulpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm3, ymm0, ymm0 - INSTR ymm4, ymm1, ymm1 - INSTR ymm5, ymm2, ymm2 - INSTR ymm6, ymm0, ymm0 - INSTR ymm7, ymm1, ymm1 - INSTR ymm8, ymm2, ymm2 - INSTR ymm9, ymm0, ymm0 - INSTR ymm10, ymm1, ymm1 - INSTR ymm11, ymm2, ymm2 - INSTR ymm12, ymm0, ymm0 - INSTR ymm13, ymm1, ymm1 - INSTR ymm14, ymm2, ymm2 - INSTR ymm15, ymm0, ymm0 - INSTR ymm3, ymm1, ymm1 - INSTR ymm4, ymm2, ymm2 - INSTR ymm5, ymm0, ymm0 - INSTR ymm6, ymm1, ymm1 - INSTR ymm7, ymm2, ymm2 - INSTR ymm8, ymm0, ymm0 - INSTR ymm9, ymm1, ymm1 - INSTR ymm10, ymm2, ymm2 - INSTR ymm11, ymm0, ymm0 - INSTR ymm12, ymm1, ymm1 - INSTR ymm13, ymm2, ymm2 - INSTR ymm14, ymm0, ymm0 - INSTR ymm15, ymm1, ymm1 - INSTR ymm3, ymm2, ymm2 - INSTR ymm4, ymm0, ymm0 - INSTR ymm5, ymm1, ymm1 - INSTR ymm6, ymm2, ymm2 - INSTR ymm7, ymm0, ymm0 - INSTR ymm8, ymm1, ymm1 - INSTR ymm9, ymm2, ymm2 - INSTR ymm10, ymm0, ymm0 - INSTR ymm11, ymm1, ymm1 - INSTR ymm12, ymm2, ymm2 - INSTR ymm13, ymm0, ymm0 - INSTR ymm14, ymm1, ymm1 - INSTR ymm15, ymm2, ymm2 - INSTR ymm3, ymm0, ymm0 - INSTR ymm4, ymm1, ymm1 - INSTR ymm5, ymm2, ymm2 - INSTR ymm6, ymm0, ymm0 - INSTR ymm7, ymm1, ymm1 - INSTR ymm8, ymm2, ymm2 - INSTR ymm9, ymm0, ymm0 - INSTR ymm10, ymm1, ymm1 - INSTR ymm11, ymm2, ymm2 - INSTR ymm12, ymm0, ymm0 - INSTR ymm13, ymm1, ymm1 - INSTR ymm14, ymm2, ymm2 - INSTR ymm15, ymm0, ymm0 - INSTR ymm3, ymm1, ymm1 - INSTR ymm4, ymm2, ymm2 - INSTR ymm5, ymm0, ymm0 - INSTR ymm6, ymm1, ymm1 - INSTR ymm7, ymm2, ymm2 - INSTR ymm8, ymm0, ymm0 - INSTR ymm9, ymm1, ymm1 - INSTR ymm10, ymm2, ymm2 - INSTR ymm11, ymm0, ymm0 - INSTR ymm12, ymm1, ymm1 - INSTR ymm13, ymm2, ymm2 - INSTR ymm14, ymm0, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmulpd-ymm_ymm_ymm.S b/testcases/TaxCalc/vmulpd-ymm_ymm_ymm.S deleted file mode 100644 index 830c26d..0000000 --- a/testcases/TaxCalc/vmulpd-ymm_ymm_ymm.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vmulpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmulsd-xmm_xmm_mem-TP.S b/testcases/TaxCalc/vmulsd-xmm_xmm_mem-TP.S deleted file mode 100644 index 5a0359f..0000000 --- a/testcases/TaxCalc/vmulsd-xmm_xmm_mem-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmulsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, [rip+PI] - INSTR xmm4, xmm1, [rip+PI] - INSTR xmm5, xmm2, [rip+PI] - INSTR xmm6, xmm0, [rip+PI] - INSTR xmm7, xmm1, [rip+PI] - INSTR xmm8, xmm2, [rip+PI] - INSTR xmm9, xmm0, [rip+PI] - INSTR xmm10, xmm1, [rip+PI] - INSTR xmm11, xmm2, [rip+PI] - INSTR xmm12, xmm0, [rip+PI] - INSTR xmm13, xmm1, [rip+PI] - INSTR xmm14, xmm2, [rip+PI] - INSTR xmm15, xmm0, [rip+PI] - INSTR xmm3, xmm1, [rip+PI] - INSTR xmm4, xmm2, [rip+PI] - INSTR xmm5, xmm0, [rip+PI] - INSTR xmm6, xmm1, [rip+PI] - INSTR xmm7, xmm2, [rip+PI] - INSTR xmm8, xmm0, [rip+PI] - INSTR xmm9, xmm1, [rip+PI] - INSTR xmm10, xmm2, [rip+PI] - INSTR xmm11, xmm0, [rip+PI] - INSTR xmm12, xmm1, [rip+PI] - INSTR xmm13, xmm2, [rip+PI] - INSTR xmm14, xmm0, [rip+PI] - INSTR xmm15, xmm1, [rip+PI] - INSTR xmm3, xmm2, [rip+PI] - INSTR xmm4, xmm0, [rip+PI] - INSTR xmm5, xmm1, [rip+PI] - INSTR xmm6, xmm2, [rip+PI] - INSTR xmm7, xmm0, [rip+PI] - INSTR xmm8, xmm1, [rip+PI] - INSTR xmm9, xmm2, [rip+PI] - INSTR xmm10, xmm0, [rip+PI] - INSTR xmm11, xmm1, [rip+PI] - INSTR xmm12, xmm2, [rip+PI] - INSTR xmm13, xmm0, [rip+PI] - INSTR xmm14, xmm1, [rip+PI] - INSTR xmm15, xmm2, [rip+PI] - INSTR xmm3, xmm0, [rip+PI] - INSTR xmm4, xmm1, [rip+PI] - INSTR xmm5, xmm2, [rip+PI] - INSTR xmm6, xmm0, [rip+PI] - INSTR xmm7, xmm1, [rip+PI] - INSTR xmm8, xmm2, [rip+PI] - INSTR xmm9, xmm0, [rip+PI] - INSTR xmm10, xmm1, [rip+PI] - INSTR xmm11, xmm2, [rip+PI] - INSTR xmm12, xmm0, [rip+PI] - INSTR xmm13, xmm1, [rip+PI] - INSTR xmm14, xmm2, [rip+PI] - INSTR xmm15, xmm0, [rip+PI] - INSTR xmm3, xmm1, [rip+PI] - INSTR xmm4, xmm2, [rip+PI] - INSTR xmm5, xmm0, [rip+PI] - INSTR xmm6, xmm1, [rip+PI] - INSTR xmm7, xmm2, [rip+PI] - INSTR xmm8, xmm0, [rip+PI] - INSTR xmm9, xmm1, [rip+PI] - INSTR xmm10, xmm2, [rip+PI] - INSTR xmm11, xmm0, [rip+PI] - INSTR xmm12, xmm1, [rip+PI] - INSTR xmm13, xmm2, [rip+PI] - INSTR xmm14, xmm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmulsd-xmm_xmm_mem.S b/testcases/TaxCalc/vmulsd-xmm_xmm_mem.S deleted file mode 100644 index 4b70252..0000000 --- a/testcases/TaxCalc/vmulsd-xmm_xmm_mem.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmulsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - INSTR xmm0, xmm1, [rip+PI] - INSTR xmm1, xmm0, [rip+PI] - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmulsd-xmm_xmm_xmm-TP.S b/testcases/TaxCalc/vmulsd-xmm_xmm_xmm-TP.S deleted file mode 100644 index c2dc870..0000000 --- a/testcases/TaxCalc/vmulsd-xmm_xmm_xmm-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmulsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vmulsd-xmm_xmm_xmm.S b/testcases/TaxCalc/vmulsd-xmm_xmm_xmm.S deleted file mode 100644 index 97d4bac..0000000 --- a/testcases/TaxCalc/vmulsd-xmm_xmm_xmm.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vmulsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vsubpd-ymm_ymm_ymm-TP.S b/testcases/TaxCalc/vsubpd-ymm_ymm_ymm-TP.S deleted file mode 100644 index 2eca166..0000000 --- a/testcases/TaxCalc/vsubpd-ymm_ymm_ymm-TP.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vsubpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm3, ymm0, ymm0 - INSTR ymm4, ymm1, ymm1 - INSTR ymm5, ymm2, ymm2 - INSTR ymm6, ymm0, ymm0 - INSTR ymm7, ymm1, ymm1 - INSTR ymm8, ymm2, ymm2 - INSTR ymm9, ymm0, ymm0 - INSTR ymm10, ymm1, ymm1 - INSTR ymm11, ymm2, ymm2 - INSTR ymm12, ymm0, ymm0 - INSTR ymm13, ymm1, ymm1 - INSTR ymm14, ymm2, ymm2 - INSTR ymm15, ymm0, ymm0 - INSTR ymm3, ymm1, ymm1 - INSTR ymm4, ymm2, ymm2 - INSTR ymm5, ymm0, ymm0 - INSTR ymm6, ymm1, ymm1 - INSTR ymm7, ymm2, ymm2 - INSTR ymm8, ymm0, ymm0 - INSTR ymm9, ymm1, ymm1 - INSTR ymm10, ymm2, ymm2 - INSTR ymm11, ymm0, ymm0 - INSTR ymm12, ymm1, ymm1 - INSTR ymm13, ymm2, ymm2 - INSTR ymm14, ymm0, ymm0 - INSTR ymm15, ymm1, ymm1 - INSTR ymm3, ymm2, ymm2 - INSTR ymm4, ymm0, ymm0 - INSTR ymm5, ymm1, ymm1 - INSTR ymm6, ymm2, ymm2 - INSTR ymm7, ymm0, ymm0 - INSTR ymm8, ymm1, ymm1 - INSTR ymm9, ymm2, ymm2 - INSTR ymm10, ymm0, ymm0 - INSTR ymm11, ymm1, ymm1 - INSTR ymm12, ymm2, ymm2 - INSTR ymm13, ymm0, ymm0 - INSTR ymm14, ymm1, ymm1 - INSTR ymm15, ymm2, ymm2 - INSTR ymm3, ymm0, ymm0 - INSTR ymm4, ymm1, ymm1 - INSTR ymm5, ymm2, ymm2 - INSTR ymm6, ymm0, ymm0 - INSTR ymm7, ymm1, ymm1 - INSTR ymm8, ymm2, ymm2 - INSTR ymm9, ymm0, ymm0 - INSTR ymm10, ymm1, ymm1 - INSTR ymm11, ymm2, ymm2 - INSTR ymm12, ymm0, ymm0 - INSTR ymm13, ymm1, ymm1 - INSTR ymm14, ymm2, ymm2 - INSTR ymm15, ymm0, ymm0 - INSTR ymm3, ymm1, ymm1 - INSTR ymm4, ymm2, ymm2 - INSTR ymm5, ymm0, ymm0 - INSTR ymm6, ymm1, ymm1 - INSTR ymm7, ymm2, ymm2 - INSTR ymm8, ymm0, ymm0 - INSTR ymm9, ymm1, ymm1 - INSTR ymm10, ymm2, ymm2 - INSTR ymm11, ymm0, ymm0 - INSTR ymm12, ymm1, ymm1 - INSTR ymm13, ymm2, ymm2 - INSTR ymm14, ymm0, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vsubpd-ymm_ymm_ymm.S b/testcases/TaxCalc/vsubpd-ymm_ymm_ymm.S deleted file mode 100644 index 96d3fe9..0000000 --- a/testcases/TaxCalc/vsubpd-ymm_ymm_ymm.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vsubpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vsubsd-xmm_xmm_xmm-TP.S b/testcases/TaxCalc/vsubsd-xmm_xmm_xmm-TP.S deleted file mode 100644 index ceb9507..0000000 --- a/testcases/TaxCalc/vsubsd-xmm_xmm_xmm-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vsubsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vsubsd-xmm_xmm_xmm.S b/testcases/TaxCalc/vsubsd-xmm_xmm_xmm.S deleted file mode 100644 index b7429a4..0000000 --- a/testcases/TaxCalc/vsubsd-xmm_xmm_xmm.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vsubsd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm-TP.S b/testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm-TP.S deleted file mode 100644 index 1d99838..0000000 --- a/testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vunpckhpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm.S b/testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm.S deleted file mode 100644 index 8807655..0000000 --- a/testcases/TaxCalc/vunpckhpd-xmm_xmm_xmm.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vunpckhpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vxorpd-xmm_xmm_xmm-TP.S b/testcases/TaxCalc/vxorpd-xmm_xmm_xmm-TP.S deleted file mode 100644 index a7a81f7..0000000 --- a/testcases/TaxCalc/vxorpd-xmm_xmm_xmm-TP.S +++ /dev/null @@ -1,172 +0,0 @@ -#define INSTR vxorpd -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vxorpd-xmm_xmm_xmm.S b/testcases/TaxCalc/vxorpd-xmm_xmm_xmm.S deleted file mode 100644 index 4c56abc..0000000 --- a/testcases/TaxCalc/vxorpd-xmm_xmm_xmm.S +++ /dev/null @@ -1,172 +0,0 @@ -#define INSTR vxorpd -#define NINST 128 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vxorpd-ymm_ymm_ymm-TP.S b/testcases/TaxCalc/vxorpd-ymm_ymm_ymm-TP.S deleted file mode 100644 index 3a7e7fe..0000000 --- a/testcases/TaxCalc/vxorpd-ymm_ymm_ymm-TP.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vxorpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm3, ymm0, ymm0 - INSTR ymm4, ymm1, ymm1 - INSTR ymm5, ymm2, ymm2 - INSTR ymm6, ymm0, ymm0 - INSTR ymm7, ymm1, ymm1 - INSTR ymm8, ymm2, ymm2 - INSTR ymm9, ymm0, ymm0 - INSTR ymm10, ymm1, ymm1 - INSTR ymm11, ymm2, ymm2 - INSTR ymm12, ymm0, ymm0 - INSTR ymm13, ymm1, ymm1 - INSTR ymm14, ymm2, ymm2 - INSTR ymm15, ymm0, ymm0 - INSTR ymm3, ymm1, ymm1 - INSTR ymm4, ymm2, ymm2 - INSTR ymm5, ymm0, ymm0 - INSTR ymm6, ymm1, ymm1 - INSTR ymm7, ymm2, ymm2 - INSTR ymm8, ymm0, ymm0 - INSTR ymm9, ymm1, ymm1 - INSTR ymm10, ymm2, ymm2 - INSTR ymm11, ymm0, ymm0 - INSTR ymm12, ymm1, ymm1 - INSTR ymm13, ymm2, ymm2 - INSTR ymm14, ymm0, ymm0 - INSTR ymm15, ymm1, ymm1 - INSTR ymm3, ymm2, ymm2 - INSTR ymm4, ymm0, ymm0 - INSTR ymm5, ymm1, ymm1 - INSTR ymm6, ymm2, ymm2 - INSTR ymm7, ymm0, ymm0 - INSTR ymm8, ymm1, ymm1 - INSTR ymm9, ymm2, ymm2 - INSTR ymm10, ymm0, ymm0 - INSTR ymm11, ymm1, ymm1 - INSTR ymm12, ymm2, ymm2 - INSTR ymm13, ymm0, ymm0 - INSTR ymm14, ymm1, ymm1 - INSTR ymm15, ymm2, ymm2 - INSTR ymm3, ymm0, ymm0 - INSTR ymm4, ymm1, ymm1 - INSTR ymm5, ymm2, ymm2 - INSTR ymm6, ymm0, ymm0 - INSTR ymm7, ymm1, ymm1 - INSTR ymm8, ymm2, ymm2 - INSTR ymm9, ymm0, ymm0 - INSTR ymm10, ymm1, ymm1 - INSTR ymm11, ymm2, ymm2 - INSTR ymm12, ymm0, ymm0 - INSTR ymm13, ymm1, ymm1 - INSTR ymm14, ymm2, ymm2 - INSTR ymm15, ymm0, ymm0 - INSTR ymm3, ymm1, ymm1 - INSTR ymm4, ymm2, ymm2 - INSTR ymm5, ymm0, ymm0 - INSTR ymm6, ymm1, ymm1 - INSTR ymm7, ymm2, ymm2 - INSTR ymm8, ymm0, ymm0 - INSTR ymm9, ymm1, ymm1 - INSTR ymm10, ymm2, ymm2 - INSTR ymm11, ymm0, ymm0 - INSTR ymm12, ymm1, ymm1 - INSTR ymm13, ymm2, ymm2 - INSTR ymm14, ymm0, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vxorpd-ymm_ymm_ymm.S b/testcases/TaxCalc/vxorpd-ymm_ymm_ymm.S deleted file mode 100644 index 8ab0f92..0000000 --- a/testcases/TaxCalc/vxorpd-ymm_ymm_ymm.S +++ /dev/null @@ -1,110 +0,0 @@ -#define INSTR vxorpd -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # expand from SSE to AVX - vinsertf128 ymm0, ymm0, xmm0, 0x1 - # copy DP 1.0 - vmovaps ymm0, ymm0 - vmovaps ymm1, ymm0 - # Create DP 2.0 - vaddpd ymm1, ymm1, ymm1 - # Create DP 0.5 - vdivpd ymm2, ymm0, ymm1 -loop: - inc i - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - INSTR ymm0, ymm1, ymm0 - INSTR ymm1, ymm0, ymm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vxorps-xmm_xmm_xmm-TP.S b/testcases/TaxCalc/vxorps-xmm_xmm_xmm-TP.S deleted file mode 100644 index 77475af..0000000 --- a/testcases/TaxCalc/vxorps-xmm_xmm_xmm-TP.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vxorps -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - INSTR xmm15, xmm1, xmm1 - INSTR xmm3, xmm2, xmm2 - INSTR xmm4, xmm0, xmm0 - INSTR xmm5, xmm1, xmm1 - INSTR xmm6, xmm2, xmm2 - INSTR xmm7, xmm0, xmm0 - INSTR xmm8, xmm1, xmm1 - INSTR xmm9, xmm2, xmm2 - INSTR xmm10, xmm0, xmm0 - INSTR xmm11, xmm1, xmm1 - INSTR xmm12, xmm2, xmm2 - INSTR xmm13, xmm0, xmm0 - INSTR xmm14, xmm1, xmm1 - INSTR xmm15, xmm2, xmm2 - INSTR xmm3, xmm0, xmm0 - INSTR xmm4, xmm1, xmm1 - INSTR xmm5, xmm2, xmm2 - INSTR xmm6, xmm0, xmm0 - INSTR xmm7, xmm1, xmm1 - INSTR xmm8, xmm2, xmm2 - INSTR xmm9, xmm0, xmm0 - INSTR xmm10, xmm1, xmm1 - INSTR xmm11, xmm2, xmm2 - INSTR xmm12, xmm0, xmm0 - INSTR xmm13, xmm1, xmm1 - INSTR xmm14, xmm2, xmm2 - INSTR xmm15, xmm0, xmm0 - INSTR xmm3, xmm1, xmm1 - INSTR xmm4, xmm2, xmm2 - INSTR xmm5, xmm0, xmm0 - INSTR xmm6, xmm1, xmm1 - INSTR xmm7, xmm2, xmm2 - INSTR xmm8, xmm0, xmm0 - INSTR xmm9, xmm1, xmm1 - INSTR xmm10, xmm2, xmm2 - INSTR xmm11, xmm0, xmm0 - INSTR xmm12, xmm1, xmm1 - INSTR xmm13, xmm2, xmm2 - INSTR xmm14, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/vxorps-xmm_xmm_xmm.S b/testcases/TaxCalc/vxorps-xmm_xmm_xmm.S deleted file mode 100644 index f1a1a8c..0000000 --- a/testcases/TaxCalc/vxorps-xmm_xmm_xmm.S +++ /dev/null @@ -1,108 +0,0 @@ -#define INSTR vxorps -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - # copy DP 1.0 - vmovaps xmm0, xmm0 - vmovaps xmm1, xmm0 - # Create DP 2.0 - vaddpd xmm1, xmm1, xmm1 - # Create DP 0.5 - vdivpd xmm2, xmm0, xmm1 -loop: - inc i - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - INSTR xmm0, xmm1, xmm0 - INSTR xmm1, xmm0, xmm0 - cmp i, N - jl loop -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/xor-r32_r32-TP.S b/testcases/TaxCalc/xor-r32_r32-TP.S deleted file mode 100644 index bf5757b..0000000 --- a/testcases/TaxCalc/xor-r32_r32-TP.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR xor -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - INSTR edx, ebx - INSTR r9d, ecx - INSTR r10d, eax - INSTR r11d, ebx - INSTR r12d, ecx - INSTR r13d, eax - INSTR r14d, ebx - INSTR r15d, ecx - INSTR edx, eax - INSTR r9d, ebx - INSTR r10d, ecx - INSTR r11d, eax - INSTR r12d, ebx - INSTR r13d, ecx - INSTR r14d, eax - INSTR r15d, ebx - INSTR edx, ecx - INSTR r9d, eax - INSTR r10d, ebx - INSTR r11d, ecx - INSTR r12d, eax - INSTR r13d, ebx - INSTR r14d, ecx - INSTR r15d, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file diff --git a/testcases/TaxCalc/xor-r32_r32.S b/testcases/TaxCalc/xor-r32_r32.S deleted file mode 100644 index 652a935..0000000 --- a/testcases/TaxCalc/xor-r32_r32.S +++ /dev/null @@ -1,143 +0,0 @@ -#define INSTR xor -#define NINST 64 -#define N edi -#define i r8d - - -.intel_syntax noprefix -.globl ninst -.data -ninst: -.long NINST -.align 32 -PI: -.long 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9, 0xf01b866e, 0x400921f9 -.text -.globl latency -.type latency, @function -.align 32 -latency: - push rbp - mov rbp, rsp - xor i, i - test N, N - jle done - # create DP 1.0 - vpcmpeqw xmm0, xmm0, xmm0 # all ones - vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1)) - vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero - push rax - push rbx - push rcx - push rdx - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - xor rax, rax - xor rbx, rbx - xor rcx, rcx - xor rdx, rdx - xor r9, r9 - xor r10, r10 - xor r11, r11 - xor r12, r12 - xor r13, r13 - xor r14, r14 - xor r15, r15 - # copy DP 1.0 - vmovq rax, xmm0 - vmovq rbx, xmm0 - # Create DP 2.0 - add rbx, rax - # Create DP 0.5 - div rax - movq rcx, rax - vmovq rax, xmm0 -loop: - inc i - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - INSTR eax, ebx - INSTR ebx, eax - cmp i, N - jl loop - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop rdx - pop rcx - pop rbx - pop rax -done: - mov rsp, rbp - pop rbp - ret -.size latency, .-latency \ No newline at end of file