Add support for the Intel syntax supported by MSVC and ICC

This commit is contained in:
pleroy
2025-02-02 14:02:16 +01:00
parent 785a365c63
commit 1a7c1588f6
30 changed files with 2744 additions and 499 deletions

View File

@@ -0,0 +1,227 @@
# Produced with ICC 2021.10.0 with -O3 -xcore-avx512, https://godbolt.org/z/87bYseh8r
..B1.1: # Preds ..B1.0
push rbp #5.32
mov rbp, rsp #5.32
and rsp, -128 #5.32
push r15 #5.32
push rbx #5.32
sub rsp, 112 #5.32
mov edi, 3 #5.32
mov rsi, 0x64199d9ffe #5.32
call __intel_new_feature_proc_init #5.32
..B1.34: # Preds ..B1.1
vstmxcsr DWORD PTR [rsp] #5.32
xor edi, edi #11.7
or DWORD PTR [rsp], 32832 #5.32
vldmxcsr DWORD PTR [rsp] #5.32
call time #11.7
..B1.2: # Preds ..B1.34
mov edi, eax #11.1
call srand #11.1
..B1.3: # Preds ..B1.2
mov edi, 1600 #13.16
call malloc #13.16
..B1.35: # Preds ..B1.3
mov rsi, rax #13.16
..B1.4: # Preds ..B1.35
xor eax, eax #14.1
mov rbx, rsi #14.1
mov r15, rax #14.1
..B1.5: # Preds ..B1.6 ..B1.4
mov edi, 1600 #15.22
call malloc #15.22
..B1.6: # Preds ..B1.5
mov QWORD PTR [rbx+r15*8], rax #15.5
inc r15 #14.1
cmp r15, 200 #14.1
jb ..B1.5 # Prob 82% #14.1
..B1.7: # Preds ..B1.6
xor eax, eax #17.1
mov rsi, rbx #
mov r15, rax #19.44
mov QWORD PTR [rsp], r13 #19.44[spill]
mov QWORD PTR [8+rsp], r14 #19.44[spill]
..B1.8: # Preds ..B1.11 ..B1.7
mov r13, QWORD PTR [8+rbx+r15*8] #19.5
xor r14d, r14d #18.3
..B1.9: # Preds ..B1.10 ..B1.8
call rand #19.26
..B1.37: # Preds ..B1.9
mov r8d, eax #19.26
..B1.10: # Preds ..B1.37
mov eax, 351843721 #19.33
mov ecx, r8d #19.33
imul r8d #19.33
sar ecx, 31 #19.33
vxorpd xmm0, xmm0, xmm0 #19.33
sar edx, 13 #19.33
sub edx, ecx #19.33
imul edi, edx, -100000 #19.33
add r8d, edi #19.33
vcvtsi2sd xmm0, xmm0, r8d #19.33
vdivsd xmm1, xmm0, QWORD PTR .L_2il0floatpacket.0[rip] #19.44
vmovsd QWORD PTR [8+r13+r14*8], xmm1 #19.5
inc r14 #18.3
cmp r14, 198 #18.3
jb ..B1.9 # Prob 82% #18.3
..B1.11: # Preds ..B1.10
inc r15 #17.1
cmp r15, 198 #17.1
jb ..B1.8 # Prob 91% #17.1
..B1.12: # Preds ..B1.11
mov r13, QWORD PTR [rsp] #[spill]
mov rsi, rbx #
mov r14, QWORD PTR [8+rsp] #[spill]
xor ecx, ecx #23.1
vmovsd xmm0, QWORD PTR .L_2il0floatpacket.1[rip] #10.14
xor dil, dil #10.14
mov edx, 196 #10.14
..B1.13: # Preds ..B1.27 ..B1.12
mov rax, QWORD PTR [8+rsi+rcx*8] #25.5
mov r8, rax #25.5
lea r9, QWORD PTR [8+rax] #25.5
sub r8, r9 #25.5
cmp r8, 1584 #24.3
jge ..B1.15 # Prob 50% #24.3
..B1.14: # Preds ..B1.13
neg r8 #26.7
cmp r8, 1584 #24.3
jl ..B1.22 # Prob 50% #24.3
..B1.15: # Preds ..B1.13 ..B1.14
lea r8, QWORD PTR [16+rax] #27.9
sub r9, r8 #27.9
cmp r9, 1584 #24.3
jge ..B1.17 # Prob 50% #24.3
..B1.16: # Preds ..B1.15
neg r9 #25.5
cmp r9, 1584 #24.3
jl ..B1.22 # Prob 50% #24.3
..B1.17: # Preds ..B1.15 ..B1.16
vmovsd xmm1, QWORD PTR [rax] #27.9
mov bl, dil #24.3
mov r9, QWORD PTR [rsi+rcx*8] #27.21
xor r11d, r11d #25.5
mov r10, QWORD PTR [16+rsi+rcx*8] #26.19
mov r8, QWORD PTR [8+rsi+rcx*8] #27.9
..B1.18: # Preds ..B1.18 ..B1.17
vmovsd xmm2, QWORD PTR [8+r11+r10] #26.19
inc bl #24.3
vaddsd xmm3, xmm2, QWORD PTR [16+r11+r8] #25.5
vaddsd xmm4, xmm3, QWORD PTR [8+r11+r9] #25.5
vaddsd xmm1, xmm4, xmm1 #25.5
vmulsd xmm8, xmm0, xmm1 #27.21
vmovsd QWORD PTR [8+r11+r8], xmm8 #25.5
vmovsd xmm5, QWORD PTR [16+r11+r10] #26.19
vaddsd xmm6, xmm5, QWORD PTR [24+r11+r8] #26.19
vaddsd xmm7, xmm6, QWORD PTR [16+r11+r9] #27.9
vaddsd xmm9, xmm7, xmm8 #27.21
vmulsd xmm13, xmm0, xmm9 #27.21
vmovsd QWORD PTR [16+r11+r8], xmm13 #25.5
vmovsd xmm10, QWORD PTR [24+r11+r10] #26.19
vaddsd xmm11, xmm10, QWORD PTR [32+r11+r8] #26.19
vaddsd xmm12, xmm11, QWORD PTR [24+r11+r9] #27.9
vaddsd xmm14, xmm12, xmm13 #27.21
vmulsd xmm18, xmm0, xmm14 #27.21
vmovsd QWORD PTR [24+r11+r8], xmm18 #25.5
vmovsd xmm15, QWORD PTR [32+r11+r10] #26.19
vaddsd xmm16, xmm15, QWORD PTR [40+r11+r8] #26.19
vaddsd xmm17, xmm16, QWORD PTR [32+r11+r9] #27.9
vaddsd xmm19, xmm17, xmm18 #27.21
vmulsd xmm1, xmm0, xmm19 #27.21
vmovsd QWORD PTR [32+r11+r8], xmm1 #25.5
add r11, 32 #24.3
cmp bl, 49 #24.3
jb ..B1.18 # Prob 27% #24.3
..B1.19: # Preds ..B1.18
mov r11, rdx #24.3
..B1.20: # Preds ..B1.20 ..B1.19
vmovsd xmm1, QWORD PTR [r8+r11*8] #26.7
vaddsd xmm2, xmm1, QWORD PTR [8+r10+r11*8] #26.19
vaddsd xmm3, xmm2, QWORD PTR [16+r8+r11*8] #27.9
vaddsd xmm4, xmm3, QWORD PTR [8+r9+r11*8] #27.21
vmulsd xmm5, xmm0, xmm4 #27.21
vmovsd QWORD PTR [8+r8+r11*8], xmm5 #25.5
inc r11 #24.3
cmp r11, 198 #24.3
jb ..B1.20 # Prob 66% #24.3
jmp ..B1.27 # Prob 100% #24.3
..B1.22: # Preds ..B1.14 ..B1.16
mov r9, QWORD PTR [rsi+rcx*8] #27.21
mov bl, dil #24.3
mov r10, QWORD PTR [16+rsi+rcx*8] #26.19
xor r11d, r11d #25.5
mov r8, QWORD PTR [8+rsi+rcx*8] #26.7
..B1.23: # Preds ..B1.23 ..B1.22
inc bl #24.3
vmovsd xmm1, QWORD PTR [r11+r8] #26.7
vaddsd xmm2, xmm1, QWORD PTR [8+r11+r10] #26.19
vaddsd xmm3, xmm2, QWORD PTR [16+r11+r8] #27.9
vaddsd xmm4, xmm3, QWORD PTR [8+r11+r9] #27.21
vmulsd xmm5, xmm0, xmm4 #27.21
vmovsd QWORD PTR [8+r11+r8], xmm5 #25.5
vaddsd xmm6, xmm5, QWORD PTR [16+r11+r10] #26.19
vaddsd xmm7, xmm6, QWORD PTR [24+r11+r8] #27.9
vaddsd xmm8, xmm7, QWORD PTR [16+r11+r9] #27.21
vmulsd xmm9, xmm0, xmm8 #27.21
vmovsd QWORD PTR [16+r11+r8], xmm9 #25.5
vaddsd xmm10, xmm9, QWORD PTR [24+r11+r10] #26.19
vaddsd xmm11, xmm10, QWORD PTR [32+r11+r8] #27.9
vaddsd xmm12, xmm11, QWORD PTR [24+r11+r9] #27.21
vmulsd xmm13, xmm0, xmm12 #27.21
vmovsd QWORD PTR [24+r11+r8], xmm13 #25.5
vaddsd xmm14, xmm13, QWORD PTR [32+r11+r10] #26.19
vaddsd xmm15, xmm14, QWORD PTR [40+r11+r8] #27.9
vaddsd xmm16, xmm15, QWORD PTR [32+r11+r9] #27.21
vmulsd xmm17, xmm0, xmm16 #27.21
vmovsd QWORD PTR [32+r11+r8], xmm17 #25.5
add r11, 32 #24.3
cmp bl, 49 #24.3
jb ..B1.23 # Prob 27% #24.3
..B1.24: # Preds ..B1.23
mov r11, rdx #24.3
..B1.25: # Preds ..B1.25 ..B1.24
vmovsd xmm1, QWORD PTR [r8+r11*8] #26.7
vaddsd xmm2, xmm1, QWORD PTR [8+r10+r11*8] #26.19
vaddsd xmm3, xmm2, QWORD PTR [16+r8+r11*8] #27.9
vaddsd xmm4, xmm3, QWORD PTR [8+r9+r11*8] #27.21
vmulsd xmm5, xmm0, xmm4 #27.21
vmovsd QWORD PTR [8+r8+r11*8], xmm5 #25.5
inc r11 #24.3
cmp r11, 198 #24.3
jb ..B1.25 # Prob 66% #24.3
..B1.27: # Preds ..B1.25 ..B1.20
mov r8, QWORD PTR [16+rsi+rcx*8] #30.3
inc rcx #23.1
mov rax, QWORD PTR [1592+rax] #30.15
mov QWORD PTR [8+r8], rax #30.3
cmp rcx, 198 #23.1
jb ..B1.13 # Prob 91% #23.1
..B1.28: # Preds ..B1.27
mov rax, QWORD PTR [1584+rsi] #33.4
vmovsd xmm0, QWORD PTR [1584+rax] #33.4
vucomisd xmm0, QWORD PTR .L_2il0floatpacket.2[rip] #33.29
jp ..B1.29 # Prob 0% #33.29
je ..B1.30 # Prob 5% #33.29
..B1.29: # Preds ..B1.28 ..B1.30
xor eax, eax #34.1
add rsp, 112 #34.1
pop rbx #34.1
pop r15 #34.1
mov rsp, rbp #34.1
pop rbp #34.1
ret #34.1
..B1.30: # Preds ..B1.28
mov rax, QWORD PTR [rsi] #33.39
mov edi, offset flat: .L_2__STRING.0 #33.39
vmovsd xmm0, QWORD PTR [rax] #33.39
mov eax, 1 #33.39
call printf #33.39
jmp ..B1.29 # Prob 100% #33.39
.L_2il0floatpacket.0:
.long 0x00000000,0x408f4000
.L_2il0floatpacket.1:
.long 0x7ae147ae,0x3ff3ae14
.L_2il0floatpacket.2:
.long 0xfc8f3238,0x3ff3c0c1
.L_2__STRING.0:
.long 681509

View File

@@ -0,0 +1,9 @@
; https://godbolt.org/z/o49jjojnx /std:c++latest /O1 /fp:contract /arch:AVX2
$LL13@foo:
vmovsd xmm1, QWORD PTR [rax]
vmovsd xmm0, QWORD PTR [rcx+rax]
vfmadd213sd xmm1, xmm0, QWORD PTR [rdx+rax]
vmovsd QWORD PTR [r8+rax], xmm1
lea rax, QWORD PTR [rax+8]
sub rbx, 1
jne SHORT $LL13@foo

View File

@@ -0,0 +1,19 @@
; Translated from kernel_x86_memdep.s
L4:
vmovsd [rax+8], xmm0
add rax, 8
vmovsd [rax+rcx*8+8], xmm0
vaddsd xmm0, xmm0, [rax]
sub rax, -8
vaddsd xmm0, xmm0, [rax-8]
dec rcx
vaddsd xmm0, xmm0, [rax+rcx*8+8]
mov rdx, rcx
vaddsd xmm0, xmm0, [rax+rdx*8+8]
vmulsd xmm0, xmm0, xmm1
add rax, 8
cmp rsi, rax
jne L4
; Added to test LOAD dependencies
shl rax, 5
subsd xmm10, QWORD PTR [rax+r8]

View File

@@ -0,0 +1,124 @@
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.41.34123.0
include listing.inc
INCLUDELIB MSVCRTD
INCLUDELIB OLDNAMES
msvcjmc SEGMENT
__FAC6D534_triad@c DB 01H
msvcjmc ENDS
PUBLIC kernel
PUBLIC __JustMyCode_Default
EXTRN dummy:PROC
EXTRN _RTC_InitBase:PROC
EXTRN _RTC_Shutdown:PROC
EXTRN __CheckForDebuggerJustMyCode:PROC
EXTRN _fltused:DWORD
; COMDAT pdata
pdata SEGMENT
$pdata$kernel DD imagerel $LN9
DD imagerel $LN9+194
DD imagerel $unwind$kernel
pdata ENDS
; COMDAT rtc$TMZ
rtc$TMZ SEGMENT
_RTC_Shutdown.rtc$TMZ DQ FLAT:_RTC_Shutdown
rtc$TMZ ENDS
; COMDAT rtc$IMZ
rtc$IMZ SEGMENT
_RTC_InitBase.rtc$IMZ DQ FLAT:_RTC_InitBase
rtc$IMZ ENDS
; COMDAT xdata
xdata SEGMENT
$unwind$kernel DD 025052301H
DD 011e2323H
DD 070170025H
DD 05016H
xdata ENDS
; Function compile flags: /Odt
; COMDAT __JustMyCode_Default
_TEXT SEGMENT
__JustMyCode_Default PROC ; COMDAT
ret 0
__JustMyCode_Default ENDP
_TEXT ENDS
; Function compile flags: /Odtp /RTCsu /ZI
; COMDAT kernel
_TEXT SEGMENT
r$1 = 4
i$2 = 36
a$ = 288
b$ = 296
c$ = 304
s$ = 312
repeat$ = 320
cur_elements$ = 328
kernel PROC ; COMDAT
; File C:\Users\phl.bastiani\Projects\OSACA\validation\kernels\triad.c
; Line 16
$LN9:
movsd QWORD PTR [rsp+32], xmm3
mov QWORD PTR [rsp+24], r8
mov QWORD PTR [rsp+16], rdx
mov QWORD PTR [rsp+8], rcx
push rbp
push rdi
sub rsp, 296 ; 00000128H
lea rbp, QWORD PTR [rsp+32]
lea rcx, OFFSET FLAT:__FAC6D534_triad@c
call __CheckForDebuggerJustMyCode
npad 1
; Line 17
mov DWORD PTR r$1[rbp], 0
jmp SHORT $LN4@kernel
$LN2@kernel:
mov eax, DWORD PTR r$1[rbp]
inc eax
mov DWORD PTR r$1[rbp], eax
$LN4@kernel:
mov eax, DWORD PTR repeat$[rbp]
cmp DWORD PTR r$1[rbp], eax
jge SHORT $LN3@kernel
; Line 18
mov DWORD PTR i$2[rbp], 0
jmp SHORT $LN7@kernel
$LN5@kernel:
mov eax, DWORD PTR i$2[rbp]
inc eax
mov DWORD PTR i$2[rbp], eax
$LN7@kernel:
mov eax, DWORD PTR cur_elements$[rbp]
cmp DWORD PTR i$2[rbp], eax
jge SHORT $LN6@kernel
; Line 19
movsxd rax, DWORD PTR i$2[rbp]
movsxd rcx, DWORD PTR i$2[rbp]
mov rdx, QWORD PTR c$[rbp]
movsd xmm0, QWORD PTR s$[rbp]
mulsd xmm0, QWORD PTR [rdx+rcx*8]
mov rcx, QWORD PTR b$[rbp]
movsd xmm1, QWORD PTR [rcx+rax*8]
addsd xmm1, xmm0
movaps xmm0, xmm1
movsxd rax, DWORD PTR i$2[rbp]
mov rcx, QWORD PTR a$[rbp]
movsd QWORD PTR [rcx+rax*8], xmm0
; Line 20
jmp SHORT $LN5@kernel
$LN6@kernel:
; Line 21
mov rcx, QWORD PTR a$[rbp]
call dummy
npad 1
; Line 22
jmp SHORT $LN2@kernel
$LN3@kernel:
; Line 23
lea rsp, QWORD PTR [rbp+264]
pop rdi
pop rbp
ret 0
kernel ENDP
_TEXT ENDS
END

View File

@@ -0,0 +1,139 @@
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.41.34123.0
include listing.inc
INCLUDELIB MSVCRTD
INCLUDELIB OLDNAMES
msvcjmc SEGMENT
__68D132EB_concurrencysal@h DB 01H
__4DC47379_sal@h DB 01H
__B6ADDB23_vadefs@h DB 01H
__A2A1025A_vcruntime@h DB 01H
__0EF3BC42_intrin0@inl@h DB 01H
__5EC35D46_setjmp@h DB 01H
__368E74E0_mmintrin@h DB 01H
__735960E1_corecrt@h DB 01H
__211DB995_corecrt_malloc@h DB 01H
__7CD62D9E_malloc@h DB 01H
__22746E0E_xmmintrin@h DB 01H
__4716E7C2_emmintrin@h DB 01H
__98B78F4B_pmmintrin@h DB 01H
__286EFCC9_tmmintrin@h DB 01H
__0155E94A_smmintrin@h DB 01H
__64376086_nmmintrin@h DB 01H
__B18C9AC8_wmmintrin@h DB 01H
__7A18D7CF_zmmintrin@h DB 01H
__4D0C7505_immintrin@h DB 01H
__F7CF9440_ammintrin@h DB 01H
__78F5E131_intrin@h DB 01H
__6A584D4A_iacaMarks@h DB 01H
__FAC6D534_triad@c DB 01H
msvcjmc ENDS
PUBLIC kernel
PUBLIC __JustMyCode_Default
EXTRN dummy:PROC
EXTRN __CheckForDebuggerJustMyCode:PROC
EXTRN _fltused:DWORD
; COMDAT pdata
pdata SEGMENT
$pdata$kernel DD imagerel $LN18
DD imagerel $LN18+182
DD imagerel $unwind$kernel
pdata ENDS
; COMDAT voltbl
voltbl SEGMENT
_volmd DB 05bH
DB 079H
voltbl ENDS
; COMDAT xdata
xdata SEGMENT
$unwind$kernel DD 0c2001H
DD 026820H
DD 0b7419H
DD 0a6419H
DD 095419H
DD 083419H
DD 0e0155219H
xdata ENDS
; Function compile flags: /Odt
; COMDAT __JustMyCode_Default
_TEXT SEGMENT
__JustMyCode_Default PROC ; COMDAT
ret 0
__JustMyCode_Default ENDP
_TEXT ENDS
; Function compile flags: /Ogspy
; COMDAT kernel
_TEXT SEGMENT
a$ = 64
b$ = 72
c$ = 80
s$ = 88
repeat$ = 96
cur_elements$ = 104
kernel PROC ; COMDAT
; File C:\Users\phl.bastiani\Projects\OSACA\validation\kernels\triad.c
; Line 22
$LN18:
mov rax, rsp
mov QWORD PTR [rax+8], rbx
mov QWORD PTR [rax+16], rbp
mov QWORD PTR [rax+24], rsi
mov QWORD PTR [rax+32], rdi
push r14
sub rsp, 48 ; 00000030H
mov rbp, rcx
movaps XMMWORD PTR [rax-24], xmm6
lea rcx, OFFSET FLAT:__FAC6D534_triad@c
movaps xmm6, xmm3
mov r14, r8
mov rdi, rdx
call __CheckForDebuggerJustMyCode
mov eax, DWORD PTR repeat$[rsp]
movsxd rsi, DWORD PTR cur_elements$[rsp]
test eax, eax
jle SHORT $LN3@kernel
mov ebx, eax
$LL4@kernel:
; Line 24
test rsi, rsi
jle SHORT $LN6@kernel
mov rcx, r14
mov rdx, rbp
sub rcx, rdi
mov rax, rdi
sub rdx, rdi
mov r8, rsi
$LL7@kernel:
; Line 26
mov BYTE PTR gs:111, 111 ; 0000006fH
; Line 28
movaps xmm0, xmm6
mulsd xmm0, QWORD PTR [rax+rcx]
addsd xmm0, QWORD PTR [rax]
movsd QWORD PTR [rdx+rax], xmm0
add rax, 8
; Line 30
mov BYTE PTR gs:222, 222 ; 000000deH
sub r8, 1
jne SHORT $LL7@kernel
$LN6@kernel:
; Line 33
mov rcx, rbp
call dummy
sub rbx, 1
jne SHORT $LL4@kernel
$LN3@kernel:
; Line 35
mov rbx, QWORD PTR [rsp+64]
mov rbp, QWORD PTR [rsp+72]
mov rsi, QWORD PTR [rsp+80]
mov rdi, QWORD PTR [rsp+88]
movaps xmm6, XMMWORD PTR [rsp+32]
add rsp, 48 ; 00000030H
pop r14
ret 0
kernel ENDP
_TEXT ENDS
END