mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
initial support for SPR
This commit is contained in:
30
README.rst
30
README.rst
@@ -100,7 +100,7 @@ The usage of OSACA can be listed as:
|
||||
shows the program’s version number.
|
||||
--arch ARCH
|
||||
needs to be replaced with the target architecture abbreviation.
|
||||
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX``, ``ICL`` (Client), ``ICX`` (Server) for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1``, ``ZEN2``, and ``ZEN3`` for AMD Zen architectures.
|
||||
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX``, ``ICL`` (Client), ``ICX`` (Server), ``SPR`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1``, ``ZEN2``, and ``ZEN3`` for AMD Zen architectures.
|
||||
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse, ``A72`` for ARM Cortex-A72, ``TSV110`` for the HiSilicon TaiShan v110, ``A64FX`` for Fujitsu's HPC ARM architecture, ``M1`` for the Apple M1-Firestorm performance core, and ``V2`` for the Neoverse V2 (used in NVIDIA's Grace CPU) are available.
|
||||
If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64.
|
||||
--fixed
|
||||
@@ -150,21 +150,23 @@ Supported microarchitectures
|
||||
| | | Ivy Bridge | ``IVB`` |
|
||||
| | +----------------+------------+
|
||||
| | | Haswell | ``HSW`` |
|
||||
| | Intel +----------------+------------+
|
||||
| | +----------------+------------+
|
||||
| | | Broadwell | ``BDW`` |
|
||||
| +----------------+------------+
|
||||
| | Skylake-X | ``SKX`` |
|
||||
| +----------------+------------+
|
||||
| | Cascadelake-X | ``CSX`` |
|
||||
| +----------------+------------+
|
||||
| | Icelake client | ``ICL`` |
|
||||
| +----------------+------------+
|
||||
| | Icelake server | ``ICX`` |
|
||||
| | +----------------+------------+
|
||||
| | | Skylake-X | ``SKX`` |
|
||||
| | Intel +----------------+------------+
|
||||
| | | Cascadelake-X | ``CSX`` |
|
||||
| | +----------------+------------+
|
||||
| | | Icelake client | ``ICL`` |
|
||||
| | +----------------+------------+
|
||||
| | | Icelake server | ``ICX`` |
|
||||
| | +----------------+------------+
|
||||
| | | Sapphire Rapids| ``SPR`` |
|
||||
+----------+----------------+------------+
|
||||
| | | Naples / Zen 1 | ``ZEN1`` |
|
||||
| +----------------+------------+
|
||||
| | +----------------+------------+
|
||||
| | AMD | Rome / Zen 2 | ``ZEN2`` |
|
||||
| +----------------+------------+
|
||||
| | +----------------+------------+
|
||||
| | | Milan / Zen 3 | ``ZEN3`` |
|
||||
+----------+----------------+------------+
|
||||
|
||||
@@ -174,9 +176,9 @@ Supported microarchitectures
|
||||
| Designer | Model/microarch | OSACA flag |
|
||||
+===========+===================+=============+
|
||||
| | | Cortex-A72 | ``A72`` |
|
||||
| +-------------------+-------------+
|
||||
| | +-------------------+-------------+
|
||||
| | ARM | Neoverse N1 | ``N1`` |
|
||||
| +-------------------+-------------+
|
||||
| | +-------------------+-------------+
|
||||
| | | Neoverse V2 | ``V2`` |
|
||||
+-----------+-------------------+-------------+
|
||||
| Marvell | ThunderX2 | ``TX2`` |
|
||||
|
||||
@@ -9,7 +9,7 @@ class MOVEntryBuilder:
|
||||
port_occupancy = defaultdict(Fraction)
|
||||
for uops, ports in port_pressure:
|
||||
for p in ports:
|
||||
port_occupancy[p] += Fraction(uops, len(ports))
|
||||
port_occupancy[p] += Fraction(int(uops*100), len(ports)*100)
|
||||
return float(max(list(port_occupancy.values()) + [0]))
|
||||
|
||||
@staticmethod
|
||||
@@ -71,7 +71,7 @@ class MOVEntryBuilder:
|
||||
ports = ports.split(",")
|
||||
if len(ports) == 1:
|
||||
ports = ports[0]
|
||||
port_pressure.append([int(cycles), ports])
|
||||
port_pressure.append([float(cycles), ports])
|
||||
return port_pressure
|
||||
|
||||
def process_item(self, instruction_form, resources):
|
||||
@@ -115,6 +115,43 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
||||
)
|
||||
|
||||
|
||||
class MOVEntryBuilderIntelPort11(MOVEntryBuilder):
|
||||
# for SPR
|
||||
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
|
||||
load, store, vec = self.classify(operand_types)
|
||||
|
||||
if load:
|
||||
if 'zmm' in operand_types:
|
||||
port_pressure += [[1.5, ["2","3", "10"]]]
|
||||
else:
|
||||
port_pressure += [[1, ["2","3","10"]]]
|
||||
latency += 5
|
||||
comment = "with load"
|
||||
return MOVEntryBuilder.build_description(
|
||||
self, instruction_name, operand_types, port_pressure, latency, comment
|
||||
)
|
||||
if store:
|
||||
if 'zmm' in operand_types:
|
||||
port_pressure += [[2, "78"], [2, "49"]]
|
||||
else:
|
||||
port_pressure += [[1, "78"], [1, "49"]]
|
||||
operands = ["mem" if o == "mem" else o for o in operand_types]
|
||||
latency += 0
|
||||
return MOVEntryBuilder.build_description(
|
||||
self,
|
||||
instruction_name,
|
||||
operands,
|
||||
port_pressure,
|
||||
latency,
|
||||
"with store",
|
||||
)
|
||||
|
||||
# Register only:
|
||||
return MOVEntryBuilder.build_description(
|
||||
self, instruction_name, operand_types, port_pressure, latency
|
||||
)
|
||||
|
||||
|
||||
class MOVEntryBuilderIntelPort9(MOVEntryBuilder):
|
||||
# for ICX
|
||||
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
|
||||
@@ -946,6 +983,433 @@ icx_mov_instructions = [
|
||||
# TODO with masking!
|
||||
]
|
||||
|
||||
p11 = MOVEntryBuilderIntelPort11()
|
||||
|
||||
spr_mov_instructions = [
|
||||
# https://www.felixcloutier.com/x86/mov
|
||||
("mov gpr gpr", ("1*p0,1,5,6,10", 1)),
|
||||
("mov gpr mem", ("", 0)),
|
||||
("mov mem gpr", ("", 0)),
|
||||
("mov imd gpr", ("1*p0,1,5,6,10", 1)),
|
||||
("mov imd mem", ("", 0)),
|
||||
("movabs imd gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||
# https://www.felixcloutier.com/x86/movapd
|
||||
("movapd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movapd xmm mem", ("", 0)),
|
||||
("movapd mem xmm", ("", 0)),
|
||||
("vmovapd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovapd xmm mem", ("", 0)),
|
||||
("vmovapd mem xmm", ("", 0)),
|
||||
("vmovapd ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovapd ymm mem", ("", 0)),
|
||||
("vmovapd mem ymm", ("", 0)),
|
||||
("vmovapd zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovapd zmm mem", ("", 0)),
|
||||
("vmovapd mem zmm", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
("movaps xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movaps xmm mem", ("", 0)),
|
||||
("movaps mem xmm", ("", 0)),
|
||||
("vmovaps xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovaps xmm mem", ("", 0)),
|
||||
("vmovaps mem xmm", ("", 0)),
|
||||
("vmovaps ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovaps ymm mem", ("", 0)),
|
||||
("vmovaps mem ymm", ("", 0)),
|
||||
("vmovaps zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovaps zmm mem", ("", 0)),
|
||||
("vmovaps mem zmm", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/movd:movq
|
||||
#("movd gpr mm", ("1*p5", 1)),
|
||||
#("movd mem mm", ("", 0)),
|
||||
#("movq gpr mm", ("1*p5", 1)),
|
||||
#("movq mem mm", ("", 0)),
|
||||
#("movd mm gpr", ("1*p0", 1)),
|
||||
#("movd mm mem", ("", 0)),
|
||||
#("movq mm gpr", ("1*p0", 1)),
|
||||
#("movq mm mem", ("", 0)),
|
||||
#("movd gpr xmm", ("1*p5", 1)),
|
||||
#("movd mem xmm", ("", 0)),
|
||||
#("movq gpr xmm", ("1*p5", 1)),
|
||||
#("movq mem xmm", ("", 0)),
|
||||
#("movd xmm gpr", ("1*p0", 1)),
|
||||
#("movd xmm mem", ("", 0)),
|
||||
#("movq xmm gpr", ("1*p0", 1)),
|
||||
#("movq xmm mem", ("", 0)),
|
||||
#("vmovd gpr xmm", ("1*p5", 1)),
|
||||
#("vmovd mem xmm", ("", 0)),
|
||||
#("vmovq gpr xmm", ("1*p5", 1)),
|
||||
#("vmovq mem xmm", ("", 0)),
|
||||
#("vmovd xmm gpr", ("1*p0", 1)),
|
||||
#("vmovd xmm mem", ("", 0)),
|
||||
#("vmovq xmm gpr", ("1*p0", 1)),
|
||||
#("vmovq xmm mem", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/movddup
|
||||
#("movddup xmm xmm", ("1*p5", 1)),
|
||||
#("movddup mem xmm", ("", 0)),
|
||||
#("vmovddup xmm xmm", ("1*p5", 1)),
|
||||
#("vmovddup mem xmm", ("", 0)),
|
||||
#("vmovddup ymm ymm", ("1*p5", 1)),
|
||||
#("vmovddup mem ymm", ("", 0)),
|
||||
#("vmovddup zmm zmm", ("1*p5", 1)),
|
||||
#("vmovddup mem zmm", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movdq2q
|
||||
#("movdq2q xmm mm", ("1*p015+1*p5", 1)),
|
||||
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||
("movdqa xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movdqa mem xmm", ("", 0)),
|
||||
("movdqa xmm mem", ("", 0)),
|
||||
("vmovdqa xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqa mem xmm", ("", 0)),
|
||||
("vmovdqa xmm mem", ("", 0)),
|
||||
("vmovdqa ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqa mem ymm", ("", 0)),
|
||||
("vmovdqa ymm mem", ("", 0)),
|
||||
("vmovdqa32 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqa32 mem xmm", ("", 0)),
|
||||
("vmovdqa32 xmm mem", ("", 0)),
|
||||
("vmovdqa32 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqa32 mem ymm", ("", 0)),
|
||||
("vmovdqa32 ymm mem", ("", 0)),
|
||||
("vmovdqa32 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqa32 mem zmm", ("", 0)),
|
||||
("vmovdqa32 zmm mem", ("", 0)),
|
||||
("vmovdqa64 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqa64 mem xmm", ("", 0)),
|
||||
("vmovdqa64 xmm mem", ("", 0)),
|
||||
("vmovdqa64 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqa64 mem ymm", ("", 0)),
|
||||
("vmovdqa64 ymm mem", ("", 0)),
|
||||
("vmovdqa64 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqa64 mem zmm", ("", 0)),
|
||||
("vmovdqa64 zmm mem", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||
("movdqu xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movdqu mem xmm", ("", 0)),
|
||||
("movdqu xmm mem", ("", 0)),
|
||||
("vmovdqu xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu mem xmm", ("", 0)),
|
||||
("vmovdqu xmm mem", ("", 0)),
|
||||
("vmovdqu ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu mem ymm", ("", 0)),
|
||||
("vmovdqu ymm mem", ("", 0)),
|
||||
("vmovdqu8 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu8 mem xmm", ("", 0)),
|
||||
("vmovdqu8 xmm mem", ("", 0)),
|
||||
("vmovdqu8 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu8 mem ymm", ("", 0)),
|
||||
("vmovdqu8 ymm mem", ("", 0)),
|
||||
("vmovdqu8 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu8 mem zmm", ("", 0)),
|
||||
("vmovdqu8 zmm mem", ("", 0)),
|
||||
("vmovdqu16 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu16 mem xmm", ("", 0)),
|
||||
("vmovdqu16 xmm mem", ("", 0)),
|
||||
("vmovdqu16 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu16 mem ymm", ("", 0)),
|
||||
("vmovdqu16 ymm mem", ("", 0)),
|
||||
("vmovdqu16 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu16 mem zmm", ("", 0)),
|
||||
("vmovdqu16 zmm mem", ("", 0)),
|
||||
("vmovdqu32 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu32 mem xmm", ("", 0)),
|
||||
("vmovdqu32 xmm mem", ("", 0)),
|
||||
("vmovdqu32 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu32 mem ymm", ("", 0)),
|
||||
("vmovdqu32 ymm mem", ("", 0)),
|
||||
("vmovdqu32 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu32 mem zmm", ("", 0)),
|
||||
("vmovdqu32 zmm mem", ("", 0)),
|
||||
("vmovdqu64 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu64 mem xmm", ("", 0)),
|
||||
("vmovdqu64 xmm mem", ("", 0)),
|
||||
("vmovdqu64 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu64 mem ymm", ("", 0)),
|
||||
("vmovdqu64 ymm mem", ("", 0)),
|
||||
("vmovdqu64 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovdqu64 mem zmm", ("", 0)),
|
||||
("vmovdqu64 zmm mem", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/movhlps
|
||||
#("movhlps xmm xmm", ("1*p5", 1)),
|
||||
#("vmovhlps xmm xmm xmm", ("1*p5", 1)),
|
||||
## https://www.felixcloutier.com/x86/movhpd
|
||||
#("movhpd mem xmm", ("1*p5", 1)),
|
||||
#("vmovhpd mem xmm xmm", ("1*p5", 1)),
|
||||
#("movhpd xmm mem", ("", 0)),
|
||||
#("vmovhpd mem xmm", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/movhps
|
||||
#("movhps mem xmm", ("1*p5", 1)),
|
||||
#("vmovhps mem xmm xmm", ("1*p5", 1)),
|
||||
#("movhps xmm mem", ("", 0)),
|
||||
#("vmovhps mem xmm", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/movlhps
|
||||
#("movlhps xmm xmm", ("1*p5", 1)),
|
||||
#("vmovlhps xmm xmm xmm", ("1*p5", 1)),
|
||||
## https://www.felixcloutier.com/x86/movlpd
|
||||
#("movlpd mem xmm", ("1*p5", 1)),
|
||||
#("vmovlpd mem xmm xmm", ("1*p5", 1)),
|
||||
#("movlpd xmm mem", ("", 0)),
|
||||
#("vmovlpd mem xmm", ("1*p5", 1)),
|
||||
## https://www.felixcloutier.com/x86/movlps
|
||||
#("movlps mem xmm", ("1*p5", 1)),
|
||||
#("vmovlps mem xmm xmm", ("1*p5", 1)),
|
||||
#("movlps xmm mem", ("", 0)),
|
||||
#("vmovlps mem xmm", ("1*p5", 1)),
|
||||
## https://www.felixcloutier.com/x86/movmskpd
|
||||
#("movmskpd xmm gpr", ("1*p0", 1)),
|
||||
#("vmovmskpd xmm gpr", ("1*p0", 1)),
|
||||
#("vmovmskpd ymm gpr", ("1*p0", 1)),
|
||||
## https://www.felixcloutier.com/x86/movmskps
|
||||
#("movmskps xmm gpr", ("1*p0", 1)),
|
||||
#("vmovmskps xmm gpr", ("1*p0", 1)),
|
||||
#("vmovmskps ymm gpr", ("1*p0", 1)),
|
||||
# https://www.felixcloutier.com/x86/movntdq
|
||||
("movntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntdq ymm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntdq zmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movntdqa
|
||||
("movntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntdqa mem ymm", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntdqa mem zmm", ("", 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movnti
|
||||
("movnti gpr mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movntpd
|
||||
("movntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntpd ymm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntpd zmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movntps
|
||||
("movntps xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntps xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntps ymm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
("vmovntps zmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movntq
|
||||
("movntq mm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movq
|
||||
("movq mm mm", ("", 0)),
|
||||
("movq mem mm", ("", 0)),
|
||||
("movq mm mem", ("", 0)),
|
||||
("movq xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movq mem xmm", ("", 0)),
|
||||
("movq xmm mem", ("", 0)),
|
||||
("vmovq xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovq mem xmm", ("", 0)),
|
||||
("vmovq xmm mem", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq
|
||||
# TODO combined load-store is currently not supported
|
||||
# ('movs mem mem', ()),
|
||||
# https://www.felixcloutier.com/x86/movsd
|
||||
("movsd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movsd mem xmm", ("", 0)),
|
||||
("movsd xmm mem", ("", 0)),
|
||||
("vmovsd xmm xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovsd mem xmm", ("", 0)),
|
||||
("vmovsd xmm mem", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/movshdup
|
||||
#("movshdup xmm xmm", ("1*p15", 1)),
|
||||
#("movshdup mem xmm", ("", 0)),
|
||||
#("vmovshdup xmm xmm", ("1*p15", 1)),
|
||||
#("vmovshdup mem xmm", ("", 0)),
|
||||
#("vmovshdup ymm ymm", ("1*p15", 1)),
|
||||
#("vmovshdup mem ymm", ("", 0)),
|
||||
#("vmovshdup zmm zmm", ("1*p5", 1)),
|
||||
#("vmovshdup mem zmm", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/movsldup
|
||||
#("movsldup xmm xmm", ("1*p15", 1)),
|
||||
#("movsldup mem xmm", ("", 0)),
|
||||
#("vmovsldup xmm xmm", ("1*p15", 1)),
|
||||
#("vmovsldup mem xmm", ("", 0)),
|
||||
#("vmovsldup ymm ymm", ("1*p15", 1)),
|
||||
#("vmovsldup mem ymm", ("", 0)),
|
||||
#("vmovsldup zmm zmm", ("1*p5", 1)),
|
||||
#("vmovsldup mem zmm", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movss
|
||||
("movss xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movss mem xmm", ("", 0)),
|
||||
("vmovss xmm xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovss mem xmm", ("", 0)),
|
||||
("vmovss xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovss xmm mem", ("", 0)),
|
||||
("movss mem xmm", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movsx:movsxd
|
||||
("movsx gpr gpr", ("1*p0,1,5,6,10", 1)),
|
||||
("movsx mem gpr", ("", 0)),
|
||||
("movsxd gpr gpr", ("", 0)),
|
||||
("movsxd mem gpr", ("", 0)),
|
||||
("movsb gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||
("movsb mem gpr", ("", 0)), # AT&T version
|
||||
("movsw gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||
("movsw mem gpr", ("", 0)), # AT&T version
|
||||
("movsl gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||
("movsl mem gpr", ("", 0)), # AT&T version
|
||||
("movsq gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||
("movsq mem gpr", ("", 0)), # AT&T version
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
("movupd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movupd mem xmm", ("", 0)),
|
||||
("movupd xmm mem", ("", 0)),
|
||||
("vmovupd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovupd mem xmm", ("", 0)),
|
||||
("vmovupd xmm mem", ("", 0)),
|
||||
("vmovupd ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovupd mem ymm", ("", 0)),
|
||||
("vmovupd ymm mem", ("", 0)),
|
||||
("vmovupd zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovupd mem zmm", ("", 0)),
|
||||
("vmovupd zmm mem", ("", 0)),
|
||||
# https://www.felixcloutier.com/x86/movups
|
||||
("movups xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("movups mem xmm", ("", 0)),
|
||||
("movups xmm mem", ("", 0)),
|
||||
("vmovups xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovups mem xmm", ("", 0)),
|
||||
("vmovups xmm mem", ("", 0)),
|
||||
("vmovups ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovups mem ymm", ("", 0)),
|
||||
("vmovups ymm mem", ("", 0)),
|
||||
("vmovups zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||
("vmovups mem zmm", ("", 0)),
|
||||
("vmovups zmm mem", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/movzx
|
||||
#("movzx gpr gpr", ("1*p0,1,5,6,10", 1)),
|
||||
#("movzx mem gpr", ("", 0)),
|
||||
#("movzb gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||
#("movzb mem gpr", ("", 0)), # AT&T version
|
||||
#("movzw gpr gpr", ("1*p0,1,5,6,106", 1)), # AT&T version
|
||||
#("movzw mem gpr", ("", 0)), # AT&T version
|
||||
#("movzl gpr gpr", ("1*p0156", 1)), # AT&T version
|
||||
#("movzl mem gpr", ("", 0)), # AT&T version
|
||||
#("movzq gpr gpr", ("1*p0156", 1)), # AT&T version
|
||||
#("movzq mem gpr", ("", 0)), # AT&T version
|
||||
## https://www.felixcloutier.com/x86/cmovcc
|
||||
#("cmova gpr gpr", ("2*p06", 1)),
|
||||
#("cmova mem gpr", ("", 0)),
|
||||
#("cmovae gpr gpr", ("1*p06", 1)),
|
||||
#("cmovae mem gpr", ("", 0)),
|
||||
#("cmovb gpr gpr", ("2*p06", 1)),
|
||||
#("cmovb mem gpr", ("", 0)),
|
||||
#("cmovbe gpr gpr", ("2*p06", 1)),
|
||||
#("cmovbe mem gpr", ("", 0)),
|
||||
#("cmovc gpr gpr", ("1*p06", 1)),
|
||||
#("cmovc mem gpr", ("", 0)),
|
||||
#("cmove gpr gpr", ("1*p06", 1)),
|
||||
#("cmove mem gpr", ("", 0)),
|
||||
#("cmovg gpr gpr", ("1*p06", 1)),
|
||||
#("cmovg mem gpr", ("", 0)),
|
||||
#("cmovge gpr gpr", ("1*p06", 1)),
|
||||
#("cmovge mem gpr", ("", 0)),
|
||||
#("cmovl gpr gpr", ("1*p06", 1)),
|
||||
#("cmovl mem gpr", ("", 0)),
|
||||
#("cmovle gpr gpr", ("1*p06", 1)),
|
||||
#("cmovle mem gpr", ("", 0)),
|
||||
#("cmovna gpr gpr", ("2*p06", 1)),
|
||||
#("cmovna mem gpr", ("", 0)),
|
||||
#("cmovnae gpr gpr", ("1*p06", 1)),
|
||||
#("cmovnae mem gpr", ("", 0)),
|
||||
#("cmovnb gpr gpr", ("1*p06", 1)),
|
||||
#("cmovnb mem gpr", ("", 0)),
|
||||
#("cmovnbe gpr gpr", ("2*p06", 1)),
|
||||
#("cmovnbe mem gpr", ("", 0)),
|
||||
#("cmovnc gpr gpr", ("1*p06", 1)),
|
||||
#("cmovnc mem gpr", ("", 0)),
|
||||
#("cmovne gpr gpr", ("1*p06", 1)),
|
||||
#("cmovne mem gpr", ("", 0)),
|
||||
#("cmovng gpr gpr", ("1*p06", 1)),
|
||||
#("cmovng mem gpr", ("", 0)),
|
||||
#("cmovnge gpr gpr", ("1*p06", 1)),
|
||||
#("cmovnge mem gpr", ("", 0)),
|
||||
#("cmovnl gpr gpr", ("1*p06", 1)),
|
||||
#("cmovnl mem gpr", ("", 0)),
|
||||
#("cmovno gpr gpr", ("1*p06", 1)),
|
||||
#("cmovno mem gpr", ("", 0)),
|
||||
#("cmovnp gpr gpr", ("1*p06", 1)),
|
||||
#("cmovnp mem gpr", ("", 0)),
|
||||
#("cmovns gpr gpr", ("1*p06", 1)),
|
||||
#("cmovns mem gpr", ("", 0)),
|
||||
#("cmovnz gpr gpr", ("1*p06", 1)),
|
||||
#("cmovnz mem gpr", ("", 0)),
|
||||
#("cmovo gpr gpr", ("1*p06", 1)),
|
||||
#("cmovo mem gpr", ("", 0)),
|
||||
#("cmovp gpr gpr", ("1*p06", 1)),
|
||||
#("cmovp mem gpr", ("", 0)),
|
||||
#("cmovpe gpr gpr", ("1*p06", 1)),
|
||||
#("cmovpe mem gpr", ("", 0)),
|
||||
#("cmovpo gpr gpr", ("1*p06", 1)),
|
||||
#("cmovpo mem gpr", ("", 0)),
|
||||
#("cmovs gpr gpr", ("1*p06", 1)),
|
||||
#("cmovs mem gpr", ("", 0)),
|
||||
#("cmovz gpr gpr", ("1*p06", 1)),
|
||||
#("cmovz mem gpr", ("", 0)),
|
||||
## https://www.felixcloutier.com/x86/pmovmskb
|
||||
#("pmovmskb mm gpr", ("1*p0", 1)),
|
||||
#("pmovmskb xmm gpr", ("1*p0", 1)),
|
||||
#("vpmovmskb xmm gpr", ("1*p0", 1)),
|
||||
## https://www.felixcloutier.com/x86/pmovsx
|
||||
#("pmovsxbw xmm xmm", ("1*p15", 1)),
|
||||
#("pmovsxbw mem xmm", ("1*p15", 1)),
|
||||
#("pmovsxbd xmm xmm", ("1*p15", 1)),
|
||||
#("pmovsxbd mem xmm", ("1*p15", 1)),
|
||||
#("pmovsxbq xmm xmm", ("1*p15", 1)),
|
||||
#("pmovsxbq mem xmm", ("1*p15", 1)),
|
||||
#("vpmovsxbw xmm xmm", ("1*p15", 1)),
|
||||
#("vpmovsxbw mem xmm", ("1*p15", 1)),
|
||||
#("vpmovsxbd xmm xmm", ("1*p15", 1)),
|
||||
#("vpmovsxbd mem xmm", ("1*p15", 1)),
|
||||
#("vpmovsxbq xmm xmm", ("1*p15", 1)),
|
||||
#("vpmovsxbq mem xmm", ("1*p15", 1)),
|
||||
#("vpmovsxbw xmm ymm", ("1*p5", 1)),
|
||||
#("vpmovsxbw mem ymm", ("1*p5", 1)),
|
||||
#("vpmovsxbd xmm ymm", ("1*p5", 1)),
|
||||
#("vpmovsxbd mem ymm", ("1*p5", 1)),
|
||||
#("vpmovsxbq xmm ymm", ("1*p5", 1)),
|
||||
#("vpmovsxbq mem ymm", ("1*p5", 1)),
|
||||
#("vpmovsxbw ymm zmm", ("1*p5", 3)),
|
||||
#("vpmovsxbw mem zmm", ("1*p5", 1)),
|
||||
## https://www.felixcloutier.com/x86/pmovzx
|
||||
#("pmovzxbw xmm xmm", ("1*p15", 1)),
|
||||
#("pmovzxbw mem xmm", ("1*p15", 1)),
|
||||
#("vpmovzxbw xmm xmm", ("1*p15", 1)),
|
||||
#("vpmovzxbw mem xmm", ("1*p15", 1)),
|
||||
#("vpmovzxbw xmm ymm", ("1*p5", 1)),
|
||||
#("vpmovzxbw mem ymm", ("1*p5", 1)),
|
||||
#("vpmovzxbw ymm zmm", ("1*p5", 1)),
|
||||
#("vpmovzxbw mem zmm", ("1*p5", 1)),
|
||||
##################################################################
|
||||
## https://www.felixcloutier.com/x86/movbe
|
||||
#("movbe gpr mem", ("1*p15", 6)),
|
||||
#("movbe mem gpr", ("1*p15", 6)),
|
||||
################################################
|
||||
# https://www.felixcloutier.com/x86/movapd
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movddup
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movq2dq
|
||||
#("movq2dq mm xmm", ("1*p0+1*p015", 1)),
|
||||
# https://www.felixcloutier.com/x86/movsd
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movshdup
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movsldup
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movss
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movups
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/pmovsx
|
||||
# TODO with masking!
|
||||
]
|
||||
|
||||
|
||||
|
||||
class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
|
||||
# for HSW, BDW, SKX and CSX
|
||||
@@ -1612,6 +2076,7 @@ def get_description(arch, rhs_comment=None):
|
||||
"skx": "\n".join([p7.process_item(*item) for item in skx_mov_instructions]),
|
||||
"csx": "\n".join([p7.process_item(*item) for item in csx_mov_instructions]),
|
||||
"icx": "\n".join([p9.process_item(*item) for item in icx_mov_instructions]),
|
||||
"spr": "\n".join([p11.process_item(*item) for item in spr_mov_instructions]),
|
||||
"zen3": "\n".join([z3.process_item(*item) for item in zen3_mov_instructions]),
|
||||
}
|
||||
|
||||
@@ -1634,7 +2099,7 @@ if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx|icx|zen3)".format(sys.argv[0]))
|
||||
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx|icx|spr|zen3)".format(sys.argv[0]))
|
||||
sys.exit(0)
|
||||
|
||||
try:
|
||||
|
||||
5464
osaca/data/spr.yml
Normal file
5464
osaca/data/spr.yml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@ SUPPORTED_ARCHS = [
|
||||
"CSX",
|
||||
"ICL",
|
||||
"ICX",
|
||||
"SPR",
|
||||
"ZEN1",
|
||||
"ZEN2",
|
||||
"ZEN3",
|
||||
@@ -102,8 +103,8 @@ def create_parser(parser=None):
|
||||
parser.add_argument(
|
||||
"--arch",
|
||||
type=str,
|
||||
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ICX, ZEN1, ZEN2, ZEN3, TX2, N1, "
|
||||
"A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a default uarch for "
|
||||
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ICX, SPR, ZEN1, ZEN2, ZEN3, "
|
||||
"TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a default uarch for "
|
||||
"x86/AArch64.",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
||||
@@ -303,6 +303,7 @@ class MachineModel(object):
|
||||
"cfl": "x86",
|
||||
"icl": "x86",
|
||||
"icx": "x86",
|
||||
"spr": "x86",
|
||||
}
|
||||
arch = arch.lower()
|
||||
if arch in arch_dict:
|
||||
|
||||
Reference in New Issue
Block a user