Files
OSACA/testcases/vmovq-rxmm-TP.S
Jan Laukemann a1dc3b639b initial upload
2017-07-17 15:29:56 +02:00

98 lines
1.7 KiB
ArmAsm

#define INSTR vmovq
#define NINST 24
#define N edi
#define i r8d
.intel_syntax noprefix
.globl ninst
.data
ninst:
.long NINST
.text
.globl latency
.type latency, @function
.align 32
latency:
push rbp
mov rbp, rsp
xor i, i
test N, N
jle done
# create DP 1.0
vpcmpeqw xmm0, xmm0, xmm0 # all ones
vpsllq xmm0, xmm0, 54 # logical left shift: 11111110..0 (54=64-(10-1))
vpsrlq xmm0, xmm0, 2 # logical right shift: 1 bit for sign; leading mantissa bit is zero
push rax
push rbx
push rcx
push rdx
push r9
push r10
push r11
push r12
push r13
push r14
push r15
xor rax, rax
xor rbx, rbx
xor rcx, rcx
xor rdx, rdx
xor r9, r9
xor r10, r10
xor r11, r11
xor r12, r12
xor r13, r13
xor r14, r14
xor r15, r15
# copy DP 1.0
vmovaps xmm0, xmm0
vmovaps xmm1, xmm0
# Create DP 2.0
vaddpd xmm1, xmm1, xmm1
# Create DP 0.5
vdivpd xmm2, xmm0, xmm1
loop:
inc i
INSTR rdx, xmm0
INSTR r9, xmm1
INSTR r10, xmm2
INSTR rdx, xmm0
INSTR r9, xmm1
INSTR r10, xmm2
INSTR r11, xmm0
INSTR r12, xmm1
INSTR r13, xmm2
INSTR r14, xmm0
INSTR r15, xmm1
INSTR rax, xmm2
INSTR rbx, xmm0
INSTR rcx, xmm1
INSTR rdx, xmm2
INSTR r9, xmm0
INSTR r10, xmm1
INSTR r11, xmm2
INSTR r12, xmm0
INSTR r13, xmm1
INSTR r14, xmm2
INSTR r15, xmm0
INSTR rax, xmm1
INSTR rbx, xmm2
cmp i, N
jl loop
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop rdx
pop rcx
pop rbx
pop rax
done:
mov rsp, rbp
pop rbp
ret
.size latency, .-latency