From af07f9cf6fbaedebeaa0795ae7cf58c9719b27b3 Mon Sep 17 00:00:00 2001 From: Johannes Hofmann Date: Tue, 23 May 2017 07:41:54 +0200 Subject: [PATCH] add AVX load/store templates --- src/AVX/vmovapd-load-avx-TP.S | 39 ++++++++++++++++++++++++++++++++++ src/AVX/vmovapd-store-avx-TP.S | 39 ++++++++++++++++++++++++++++++++++ src/AVX/vmovupd-load-avx-TP.S | 39 ++++++++++++++++++++++++++++++++++ src/AVX/vmovupd-store-avx-TP.S | 39 ++++++++++++++++++++++++++++++++++ 4 files changed, 156 insertions(+) create mode 100644 src/AVX/vmovapd-load-avx-TP.S create mode 100644 src/AVX/vmovapd-store-avx-TP.S create mode 100644 src/AVX/vmovupd-load-avx-TP.S create mode 100644 src/AVX/vmovupd-store-avx-TP.S diff --git a/src/AVX/vmovapd-load-avx-TP.S b/src/AVX/vmovapd-load-avx-TP.S new file mode 100644 index 0000000..0e05322 --- /dev/null +++ b/src/AVX/vmovapd-load-avx-TP.S @@ -0,0 +1,39 @@ +#define INSTR vmovapd +#define NINST 6 +#define N edi +#define i r8d + + +.intel_syntax noprefix +.globl ninst +.data +ninst: +.long NINST +.align 32 +PI: +.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9 +.text +.globl latency +.type latency, @function +.align 32 +latency: + push rbp + mov rbp, rsp + xor i, i + test N, N + jle done +loop: + inc i + INSTR ymm0, [rip+PI] + INSTR ymm1, [rip+PI] + INSTR ymm2, [rip+PI] + cmp i, N + INSTR ymm3, [rip+PI] + INSTR ymm4, [rip+PI] + INSTR ymm5, [rip+PI] + jl loop +done: + mov rsp, rbp + pop rbp + ret +.size latency, .-latency diff --git a/src/AVX/vmovapd-store-avx-TP.S b/src/AVX/vmovapd-store-avx-TP.S new file mode 100644 index 0000000..cc20be6 --- /dev/null +++ b/src/AVX/vmovapd-store-avx-TP.S @@ -0,0 +1,39 @@ +#define INSTR vmovapd +#define NINST 6 +#define N edi +#define i r8d + + +.intel_syntax noprefix +.globl ninst +.data +ninst: +.long NINST +.align 32 +PI: +.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9 +.text +.globl latency +.type latency, @function +.align 32 +latency: + push rbp + mov rbp, rsp + xor i, i + test N, N + jle done +loop: + inc i + INSTR [rip+PI], ymm0 + INSTR [rip+PI], ymm1 + INSTR [rip+PI], ymm2 + cmp i, N + INSTR [rip+PI], ymm3 + INSTR [rip+PI], ymm4 + INSTR [rip+PI], ymm5 + jl loop +done: + mov rsp, rbp + pop rbp + ret +.size latency, .-latency diff --git a/src/AVX/vmovupd-load-avx-TP.S b/src/AVX/vmovupd-load-avx-TP.S new file mode 100644 index 0000000..80bff3b --- /dev/null +++ b/src/AVX/vmovupd-load-avx-TP.S @@ -0,0 +1,39 @@ +#define INSTR vmovupd +#define NINST 6 +#define N edi +#define i r8d + + +.intel_syntax noprefix +.globl ninst +.data +ninst: +.long NINST +.align 32 +PI: +.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9 +.text +.globl latency +.type latency, @function +.align 32 +latency: + push rbp + mov rbp, rsp + xor i, i + test N, N + jle done +loop: + inc i + INSTR ymm0, [rip+PI] + INSTR ymm1, [rip+PI] + INSTR ymm2, [rip+PI] + cmp i, N + INSTR ymm3, [rip+PI] + INSTR ymm4, [rip+PI] + INSTR ymm5, [rip+PI] + jl loop +done: + mov rsp, rbp + pop rbp + ret +.size latency, .-latency diff --git a/src/AVX/vmovupd-store-avx-TP.S b/src/AVX/vmovupd-store-avx-TP.S new file mode 100644 index 0000000..0993657 --- /dev/null +++ b/src/AVX/vmovupd-store-avx-TP.S @@ -0,0 +1,39 @@ +#define INSTR vmovupd +#define NINST 6 +#define N edi +#define i r8d + + +.intel_syntax noprefix +.globl ninst +.data +ninst: +.long NINST +.align 32 +PI: +.long 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9, 0xf01b866e,0x400921f9 +.text +.globl latency +.type latency, @function +.align 32 +latency: + push rbp + mov rbp, rsp + xor i, i + test N, N + jle done +loop: + inc i + INSTR [rip+PI], ymm0 + INSTR [rip+PI], ymm1 + INSTR [rip+PI], ymm2 + cmp i, N + INSTR [rip+PI], ymm3 + INSTR [rip+PI], ymm4 + INSTR [rip+PI], ymm5 + jl loop +done: + mov rsp, rbp + pop rbp + ret +.size latency, .-latency