mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-04 18:20:09 +01:00
Merge pull request #105 from RRZE-HPC/feat/spr
SPR and Neoverse V2 support
This commit is contained in:
92
README.rst
92
README.rst
@@ -100,8 +100,8 @@ The usage of OSACA can be listed as:
|
|||||||
shows the program’s version number.
|
shows the program’s version number.
|
||||||
--arch ARCH
|
--arch ARCH
|
||||||
needs to be replaced with the target architecture abbreviation.
|
needs to be replaced with the target architecture abbreviation.
|
||||||
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX``, ``ICL`` (Client), ``ICX`` (Server) for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1``, ``ZEN2``, and ``ZEN3`` for AMD Zen architectures.
|
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX``, ``ICL`` (Client), ``ICX`` (Server), ``SPR`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1``, ``ZEN2``, and ``ZEN3`` for AMD Zen architectures.
|
||||||
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse, ``A72`` for ARM Cortex-A72, ``TSV110`` for the HiSilicon TaiShan v110, ``A64FX`` for Fujitsu's HPC ARM architecture, and ``M1`` for the Apple M1-Firestorm performance core are available.
|
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse, ``A72`` for ARM Cortex-A72, ``TSV110`` for the HiSilicon TaiShan v110, ``A64FX`` for Fujitsu's HPC ARM architecture, ``M1`` for the Apple M1-Firestorm performance core, and ``V2`` for the Neoverse V2 (used in NVIDIA's Grace CPU) are available.
|
||||||
If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64.
|
If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64.
|
||||||
--fixed
|
--fixed
|
||||||
Run the throughput analysis with fixed port utilization for all suitable ports per instruction.
|
Run the throughput analysis with fixed port utilization for all suitable ports per instruction.
|
||||||
@@ -142,51 +142,57 @@ Supported microarchitectures
|
|||||||
-----------------------------
|
-----------------------------
|
||||||
**x86 CPUs**
|
**x86 CPUs**
|
||||||
|
|
||||||
+---------+----------------+------------+
|
+----------+-----------------+------------+
|
||||||
|Designer | Model/microarch| OSACA flag |
|
| Designer | Model/microarch | OSACA flag |
|
||||||
+=========+================+============+
|
+==========+=================+============+
|
||||||
| | | Sandy Bridge | ``SNB`` |
|
| Intel | Sandy Bridge | ``SNB`` |
|
||||||
| | +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | | Ivy Bridge | ``IVB`` |
|
| Intel | Ivy Bridge | ``IVB`` |
|
||||||
| | +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | | Haswell | ``HSW`` |
|
| Intel | Haswell | ``HSW`` |
|
||||||
| | Intel +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | | Broadwell | ``BDW`` |
|
| Intel | Broadwell | ``BDW`` |
|
||||||
| +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | Skylake-X | ``SKX`` |
|
| Intel | Skylake-X | ``SKX`` |
|
||||||
| +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | Cascadelake-X | ``CSX`` |
|
| Intel | Cascadelake-X | ``CSX`` |
|
||||||
| +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | Icelake client | ``ICL`` |
|
| Intel | Icelake client | ``ICL`` |
|
||||||
| +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | Icelake server | ``ICX`` |
|
| Intel | Icelake server | ``ICX`` |
|
||||||
+---------+----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | | Naples / Zen 1 | ``ZEN1`` |
|
| Intel | Sapphire Rapids | ``SPR`` |
|
||||||
| +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | AMD | Rome / Zen 2 | ``ZEN2`` |
|
| AMD | Naples / Zen 1 | ``ZEN1`` |
|
||||||
| +----------------+------------+
|
+----------+-----------------+------------+
|
||||||
| | | Milan / Zen 3 | ``ZEN3`` |
|
| AMD | Rome / Zen 2 | ``ZEN2`` |
|
||||||
+---------+----------------+------------+
|
+----------+-----------------+------------+
|
||||||
|
| AMD | Milan / Zen 3 | ``ZEN3`` |
|
||||||
|
+----------+-----------------+------------+
|
||||||
|
|
||||||
**ARM AArch64 CPUs**
|
**ARM AArch64 CPUs**
|
||||||
|
|
||||||
+---------+----------------+------------+
|
+-----------+-------------------+-------------+
|
||||||
|Designer | Model/microarch| OSACA flag |
|
| Designer | Model/microarch | OSACA flag |
|
||||||
+=========+================+============+
|
+===========+===================+=============+
|
||||||
| | | Cortex-A72 | ``A72`` |
|
| ARM | Cortex-A72 | ``A72`` |
|
||||||
| +----------------+------------+
|
+-----------+-------------------+-------------+
|
||||||
| | ARM | Neoverse N1 | ``N1`` |
|
| ARM | Neoverse N1 | ``N1`` |
|
||||||
+---------+----------------+------------+
|
+-----------+-------------------+-------------+
|
||||||
| Marvell | ThunderX2 | ``TX2`` |
|
| ARM | Neoverse V2 | ``V2`` |
|
||||||
+---------+----------------+------------+
|
+-----------+-------------------+-------------+
|
||||||
| Fujitsu | FX700/A64FX | ``A64FX`` |
|
| Marvell | ThunderX2 | ``TX2`` |
|
||||||
+---------+----------------+------------+
|
+-----------+-------------------+-------------+
|
||||||
|HiSilicon| TaiShan v110 | ``TSV110``|
|
| Fujitsu | FX700/A64FX | ``A64FX`` |
|
||||||
+---------+----------------+------------+
|
+-----------+-------------------+-------------+
|
||||||
| Apple | M1-Firestorm | ``M1`` |
|
| HiSilicon | TaiShan v110 | ``TSV110`` |
|
||||||
+---------+----------------+------------+
|
+-----------+-------------------+-------------+
|
||||||
|
| Apple | M1-Firestorm | ``M1`` |
|
||||||
|
+-----------+-------------------+-------------+
|
||||||
|
| NVIDIA | Neoverse V2/Grace | ``V2`` |
|
||||||
|
+-----------+-------------------+-------------+
|
||||||
|
|
||||||
______________________
|
----
|
||||||
|
|
||||||
Hereinafter OSACA's scope of function will be described.
|
Hereinafter OSACA's scope of function will be described.
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ class MOVEntryBuilder:
|
|||||||
port_occupancy = defaultdict(Fraction)
|
port_occupancy = defaultdict(Fraction)
|
||||||
for uops, ports in port_pressure:
|
for uops, ports in port_pressure:
|
||||||
for p in ports:
|
for p in ports:
|
||||||
port_occupancy[p] += Fraction(uops, len(ports))
|
port_occupancy[p] += Fraction(int(uops * 100), len(ports) * 100)
|
||||||
return float(max(list(port_occupancy.values()) + [0]))
|
return float(max(list(port_occupancy.values()) + [0]))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -71,7 +71,7 @@ class MOVEntryBuilder:
|
|||||||
ports = ports.split(",")
|
ports = ports.split(",")
|
||||||
if len(ports) == 1:
|
if len(ports) == 1:
|
||||||
ports = ports[0]
|
ports = ports[0]
|
||||||
port_pressure.append([int(cycles), ports])
|
port_pressure.append([float(cycles), ports])
|
||||||
return port_pressure
|
return port_pressure
|
||||||
|
|
||||||
def process_item(self, instruction_form, resources):
|
def process_item(self, instruction_form, resources):
|
||||||
@@ -115,6 +115,43 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MOVEntryBuilderIntelPort11(MOVEntryBuilder):
|
||||||
|
# for SPR
|
||||||
|
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
|
||||||
|
load, store, vec = self.classify(operand_types)
|
||||||
|
|
||||||
|
if load:
|
||||||
|
if "zmm" in operand_types:
|
||||||
|
port_pressure += [[1.5, ["2", "3", "10"]]]
|
||||||
|
else:
|
||||||
|
port_pressure += [[1, ["2", "3", "10"]]]
|
||||||
|
latency += 5
|
||||||
|
comment = "with load"
|
||||||
|
return MOVEntryBuilder.build_description(
|
||||||
|
self, instruction_name, operand_types, port_pressure, latency, comment
|
||||||
|
)
|
||||||
|
if store:
|
||||||
|
if "zmm" in operand_types:
|
||||||
|
port_pressure += [[2, "78"], [2, "49"]]
|
||||||
|
else:
|
||||||
|
port_pressure += [[1, "78"], [1, "49"]]
|
||||||
|
operands = ["mem" if o == "mem" else o for o in operand_types]
|
||||||
|
latency += 0
|
||||||
|
return MOVEntryBuilder.build_description(
|
||||||
|
self,
|
||||||
|
instruction_name,
|
||||||
|
operands,
|
||||||
|
port_pressure,
|
||||||
|
latency,
|
||||||
|
"with store",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Register only:
|
||||||
|
return MOVEntryBuilder.build_description(
|
||||||
|
self, instruction_name, operand_types, port_pressure, latency
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MOVEntryBuilderIntelPort9(MOVEntryBuilder):
|
class MOVEntryBuilderIntelPort9(MOVEntryBuilder):
|
||||||
# for ICX
|
# for ICX
|
||||||
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
|
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
|
||||||
@@ -946,6 +983,432 @@ icx_mov_instructions = [
|
|||||||
# TODO with masking!
|
# TODO with masking!
|
||||||
]
|
]
|
||||||
|
|
||||||
|
p11 = MOVEntryBuilderIntelPort11()
|
||||||
|
|
||||||
|
spr_mov_instructions = [
|
||||||
|
# https://www.felixcloutier.com/x86/mov
|
||||||
|
("mov gpr gpr", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("mov gpr mem", ("", 0)),
|
||||||
|
("mov mem gpr", ("", 0)),
|
||||||
|
("mov imd gpr", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("mov imd mem", ("", 0)),
|
||||||
|
("movabs imd gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||||
|
# https://www.felixcloutier.com/x86/movapd
|
||||||
|
("movapd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movapd xmm mem", ("", 0)),
|
||||||
|
("movapd mem xmm", ("", 0)),
|
||||||
|
("vmovapd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovapd xmm mem", ("", 0)),
|
||||||
|
("vmovapd mem xmm", ("", 0)),
|
||||||
|
("vmovapd ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovapd ymm mem", ("", 0)),
|
||||||
|
("vmovapd mem ymm", ("", 0)),
|
||||||
|
("vmovapd zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovapd zmm mem", ("", 0)),
|
||||||
|
("vmovapd mem zmm", ("", 0)),
|
||||||
|
# https://www.felixcloutier.com/x86/movaps
|
||||||
|
("movaps xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movaps xmm mem", ("", 0)),
|
||||||
|
("movaps mem xmm", ("", 0)),
|
||||||
|
("vmovaps xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovaps xmm mem", ("", 0)),
|
||||||
|
("vmovaps mem xmm", ("", 0)),
|
||||||
|
("vmovaps ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovaps ymm mem", ("", 0)),
|
||||||
|
("vmovaps mem ymm", ("", 0)),
|
||||||
|
("vmovaps zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovaps zmm mem", ("", 0)),
|
||||||
|
("vmovaps mem zmm", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movd:movq
|
||||||
|
# ("movd gpr mm", ("1*p5", 1)),
|
||||||
|
# ("movd mem mm", ("", 0)),
|
||||||
|
# ("movq gpr mm", ("1*p5", 1)),
|
||||||
|
# ("movq mem mm", ("", 0)),
|
||||||
|
# ("movd mm gpr", ("1*p0", 1)),
|
||||||
|
# ("movd mm mem", ("", 0)),
|
||||||
|
# ("movq mm gpr", ("1*p0", 1)),
|
||||||
|
# ("movq mm mem", ("", 0)),
|
||||||
|
# ("movd gpr xmm", ("1*p5", 1)),
|
||||||
|
# ("movd mem xmm", ("", 0)),
|
||||||
|
# ("movq gpr xmm", ("1*p5", 1)),
|
||||||
|
# ("movq mem xmm", ("", 0)),
|
||||||
|
# ("movd xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("movd xmm mem", ("", 0)),
|
||||||
|
# ("movq xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("movq xmm mem", ("", 0)),
|
||||||
|
# ("vmovd gpr xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovd mem xmm", ("", 0)),
|
||||||
|
# ("vmovq gpr xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovq mem xmm", ("", 0)),
|
||||||
|
# ("vmovd xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("vmovd xmm mem", ("", 0)),
|
||||||
|
# ("vmovq xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("vmovq xmm mem", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movddup
|
||||||
|
# ("movddup xmm xmm", ("1*p5", 1)),
|
||||||
|
# ("movddup mem xmm", ("", 0)),
|
||||||
|
# ("vmovddup xmm xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovddup mem xmm", ("", 0)),
|
||||||
|
# ("vmovddup ymm ymm", ("1*p5", 1)),
|
||||||
|
# ("vmovddup mem ymm", ("", 0)),
|
||||||
|
# ("vmovddup zmm zmm", ("1*p5", 1)),
|
||||||
|
# ("vmovddup mem zmm", ("", 0)),
|
||||||
|
# https://www.felixcloutier.com/x86/movdq2q
|
||||||
|
# ("movdq2q xmm mm", ("1*p015+1*p5", 1)),
|
||||||
|
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||||
|
("movdqa xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movdqa mem xmm", ("", 0)),
|
||||||
|
("movdqa xmm mem", ("", 0)),
|
||||||
|
("vmovdqa xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqa mem xmm", ("", 0)),
|
||||||
|
("vmovdqa xmm mem", ("", 0)),
|
||||||
|
("vmovdqa ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqa mem ymm", ("", 0)),
|
||||||
|
("vmovdqa ymm mem", ("", 0)),
|
||||||
|
("vmovdqa32 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqa32 mem xmm", ("", 0)),
|
||||||
|
("vmovdqa32 xmm mem", ("", 0)),
|
||||||
|
("vmovdqa32 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqa32 mem ymm", ("", 0)),
|
||||||
|
("vmovdqa32 ymm mem", ("", 0)),
|
||||||
|
("vmovdqa32 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqa32 mem zmm", ("", 0)),
|
||||||
|
("vmovdqa32 zmm mem", ("", 0)),
|
||||||
|
("vmovdqa64 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqa64 mem xmm", ("", 0)),
|
||||||
|
("vmovdqa64 xmm mem", ("", 0)),
|
||||||
|
("vmovdqa64 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqa64 mem ymm", ("", 0)),
|
||||||
|
("vmovdqa64 ymm mem", ("", 0)),
|
||||||
|
("vmovdqa64 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqa64 mem zmm", ("", 0)),
|
||||||
|
("vmovdqa64 zmm mem", ("", 0)),
|
||||||
|
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||||
|
("movdqu xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movdqu mem xmm", ("", 0)),
|
||||||
|
("movdqu xmm mem", ("", 0)),
|
||||||
|
("vmovdqu xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu mem xmm", ("", 0)),
|
||||||
|
("vmovdqu xmm mem", ("", 0)),
|
||||||
|
("vmovdqu ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu mem ymm", ("", 0)),
|
||||||
|
("vmovdqu ymm mem", ("", 0)),
|
||||||
|
("vmovdqu8 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu8 mem xmm", ("", 0)),
|
||||||
|
("vmovdqu8 xmm mem", ("", 0)),
|
||||||
|
("vmovdqu8 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu8 mem ymm", ("", 0)),
|
||||||
|
("vmovdqu8 ymm mem", ("", 0)),
|
||||||
|
("vmovdqu8 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu8 mem zmm", ("", 0)),
|
||||||
|
("vmovdqu8 zmm mem", ("", 0)),
|
||||||
|
("vmovdqu16 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu16 mem xmm", ("", 0)),
|
||||||
|
("vmovdqu16 xmm mem", ("", 0)),
|
||||||
|
("vmovdqu16 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu16 mem ymm", ("", 0)),
|
||||||
|
("vmovdqu16 ymm mem", ("", 0)),
|
||||||
|
("vmovdqu16 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu16 mem zmm", ("", 0)),
|
||||||
|
("vmovdqu16 zmm mem", ("", 0)),
|
||||||
|
("vmovdqu32 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu32 mem xmm", ("", 0)),
|
||||||
|
("vmovdqu32 xmm mem", ("", 0)),
|
||||||
|
("vmovdqu32 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu32 mem ymm", ("", 0)),
|
||||||
|
("vmovdqu32 ymm mem", ("", 0)),
|
||||||
|
("vmovdqu32 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu32 mem zmm", ("", 0)),
|
||||||
|
("vmovdqu32 zmm mem", ("", 0)),
|
||||||
|
("vmovdqu64 xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu64 mem xmm", ("", 0)),
|
||||||
|
("vmovdqu64 xmm mem", ("", 0)),
|
||||||
|
("vmovdqu64 ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu64 mem ymm", ("", 0)),
|
||||||
|
("vmovdqu64 ymm mem", ("", 0)),
|
||||||
|
("vmovdqu64 zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovdqu64 mem zmm", ("", 0)),
|
||||||
|
("vmovdqu64 zmm mem", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movhlps
|
||||||
|
# ("movhlps xmm xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovhlps xmm xmm xmm", ("1*p5", 1)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movhpd
|
||||||
|
# ("movhpd mem xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovhpd mem xmm xmm", ("1*p5", 1)),
|
||||||
|
# ("movhpd xmm mem", ("", 0)),
|
||||||
|
# ("vmovhpd mem xmm", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movhps
|
||||||
|
# ("movhps mem xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovhps mem xmm xmm", ("1*p5", 1)),
|
||||||
|
# ("movhps xmm mem", ("", 0)),
|
||||||
|
# ("vmovhps mem xmm", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movlhps
|
||||||
|
# ("movlhps xmm xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovlhps xmm xmm xmm", ("1*p5", 1)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movlpd
|
||||||
|
# ("movlpd mem xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovlpd mem xmm xmm", ("1*p5", 1)),
|
||||||
|
# ("movlpd xmm mem", ("", 0)),
|
||||||
|
# ("vmovlpd mem xmm", ("1*p5", 1)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movlps
|
||||||
|
# ("movlps mem xmm", ("1*p5", 1)),
|
||||||
|
# ("vmovlps mem xmm xmm", ("1*p5", 1)),
|
||||||
|
# ("movlps xmm mem", ("", 0)),
|
||||||
|
# ("vmovlps mem xmm", ("1*p5", 1)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movmskpd
|
||||||
|
# ("movmskpd xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("vmovmskpd xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("vmovmskpd ymm gpr", ("1*p0", 1)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movmskps
|
||||||
|
# ("movmskps xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("vmovmskps xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("vmovmskps ymm gpr", ("1*p0", 1)),
|
||||||
|
# https://www.felixcloutier.com/x86/movntdq
|
||||||
|
("movntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntdq ymm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntdq zmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
# https://www.felixcloutier.com/x86/movntdqa
|
||||||
|
("movntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntdqa mem ymm", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntdqa mem zmm", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
# https://www.felixcloutier.com/x86/movnti
|
||||||
|
("movnti gpr mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
# https://www.felixcloutier.com/x86/movntpd
|
||||||
|
("movntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntpd ymm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntpd zmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
# https://www.felixcloutier.com/x86/movntps
|
||||||
|
("movntps xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntps xmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntps ymm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
("vmovntps zmm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
# https://www.felixcloutier.com/x86/movntq
|
||||||
|
("movntq mm mem", ("", 0)), # TODO NT-store: what latency to use?
|
||||||
|
# https://www.felixcloutier.com/x86/movq
|
||||||
|
("movq mm mm", ("", 0)),
|
||||||
|
("movq mem mm", ("", 0)),
|
||||||
|
("movq mm mem", ("", 0)),
|
||||||
|
("movq xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movq mem xmm", ("", 0)),
|
||||||
|
("movq xmm mem", ("", 0)),
|
||||||
|
("vmovq xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovq mem xmm", ("", 0)),
|
||||||
|
("vmovq xmm mem", ("", 0)),
|
||||||
|
# https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq
|
||||||
|
# TODO combined load-store is currently not supported
|
||||||
|
# ('movs mem mem', ()),
|
||||||
|
# https://www.felixcloutier.com/x86/movsd
|
||||||
|
("movsd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movsd mem xmm", ("", 0)),
|
||||||
|
("movsd xmm mem", ("", 0)),
|
||||||
|
("vmovsd xmm xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovsd mem xmm", ("", 0)),
|
||||||
|
("vmovsd xmm mem", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movshdup
|
||||||
|
# ("movshdup xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("movshdup mem xmm", ("", 0)),
|
||||||
|
# ("vmovshdup xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("vmovshdup mem xmm", ("", 0)),
|
||||||
|
# ("vmovshdup ymm ymm", ("1*p15", 1)),
|
||||||
|
# ("vmovshdup mem ymm", ("", 0)),
|
||||||
|
# ("vmovshdup zmm zmm", ("1*p5", 1)),
|
||||||
|
# ("vmovshdup mem zmm", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movsldup
|
||||||
|
# ("movsldup xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("movsldup mem xmm", ("", 0)),
|
||||||
|
# ("vmovsldup xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("vmovsldup mem xmm", ("", 0)),
|
||||||
|
# ("vmovsldup ymm ymm", ("1*p15", 1)),
|
||||||
|
# ("vmovsldup mem ymm", ("", 0)),
|
||||||
|
# ("vmovsldup zmm zmm", ("1*p5", 1)),
|
||||||
|
# ("vmovsldup mem zmm", ("", 0)),
|
||||||
|
# https://www.felixcloutier.com/x86/movss
|
||||||
|
("movss xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movss mem xmm", ("", 0)),
|
||||||
|
("vmovss xmm xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovss mem xmm", ("", 0)),
|
||||||
|
("vmovss xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovss xmm mem", ("", 0)),
|
||||||
|
("movss mem xmm", ("", 0)),
|
||||||
|
# https://www.felixcloutier.com/x86/movsx:movsxd
|
||||||
|
("movsx gpr gpr", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movsx mem gpr", ("", 0)),
|
||||||
|
("movsxd gpr gpr", ("", 0)),
|
||||||
|
("movsxd mem gpr", ("", 0)),
|
||||||
|
("movsb gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||||
|
("movsb mem gpr", ("", 0)), # AT&T version
|
||||||
|
("movsw gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||||
|
("movsw mem gpr", ("", 0)), # AT&T version
|
||||||
|
("movsl gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||||
|
("movsl mem gpr", ("", 0)), # AT&T version
|
||||||
|
("movsq gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||||
|
("movsq mem gpr", ("", 0)), # AT&T version
|
||||||
|
# https://www.felixcloutier.com/x86/movupd
|
||||||
|
("movupd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movupd mem xmm", ("", 0)),
|
||||||
|
("movupd xmm mem", ("", 0)),
|
||||||
|
("vmovupd xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovupd mem xmm", ("", 0)),
|
||||||
|
("vmovupd xmm mem", ("", 0)),
|
||||||
|
("vmovupd ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovupd mem ymm", ("", 0)),
|
||||||
|
("vmovupd ymm mem", ("", 0)),
|
||||||
|
("vmovupd zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovupd mem zmm", ("", 0)),
|
||||||
|
("vmovupd zmm mem", ("", 0)),
|
||||||
|
# https://www.felixcloutier.com/x86/movups
|
||||||
|
("movups xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("movups mem xmm", ("", 0)),
|
||||||
|
("movups xmm mem", ("", 0)),
|
||||||
|
("vmovups xmm xmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovups mem xmm", ("", 0)),
|
||||||
|
("vmovups xmm mem", ("", 0)),
|
||||||
|
("vmovups ymm ymm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovups mem ymm", ("", 0)),
|
||||||
|
("vmovups ymm mem", ("", 0)),
|
||||||
|
("vmovups zmm zmm", ("1*p0,1,5,6,10", 1)),
|
||||||
|
("vmovups mem zmm", ("", 0)),
|
||||||
|
("vmovups zmm mem", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/movzx
|
||||||
|
# ("movzx gpr gpr", ("1*p0,1,5,6,10", 1)),
|
||||||
|
# ("movzx mem gpr", ("", 0)),
|
||||||
|
# ("movzb gpr gpr", ("1*p0,1,5,6,10", 1)), # AT&T version
|
||||||
|
# ("movzb mem gpr", ("", 0)), # AT&T version
|
||||||
|
# ("movzw gpr gpr", ("1*p0,1,5,6,106", 1)), # AT&T version
|
||||||
|
# ("movzw mem gpr", ("", 0)), # AT&T version
|
||||||
|
# ("movzl gpr gpr", ("1*p0156", 1)), # AT&T version
|
||||||
|
# ("movzl mem gpr", ("", 0)), # AT&T version
|
||||||
|
# ("movzq gpr gpr", ("1*p0156", 1)), # AT&T version
|
||||||
|
# ("movzq mem gpr", ("", 0)), # AT&T version
|
||||||
|
# # https://www.felixcloutier.com/x86/cmovcc
|
||||||
|
# ("cmova gpr gpr", ("2*p06", 1)),
|
||||||
|
# ("cmova mem gpr", ("", 0)),
|
||||||
|
# ("cmovae gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovae mem gpr", ("", 0)),
|
||||||
|
# ("cmovb gpr gpr", ("2*p06", 1)),
|
||||||
|
# ("cmovb mem gpr", ("", 0)),
|
||||||
|
# ("cmovbe gpr gpr", ("2*p06", 1)),
|
||||||
|
# ("cmovbe mem gpr", ("", 0)),
|
||||||
|
# ("cmovc gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovc mem gpr", ("", 0)),
|
||||||
|
# ("cmove gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmove mem gpr", ("", 0)),
|
||||||
|
# ("cmovg gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovg mem gpr", ("", 0)),
|
||||||
|
# ("cmovge gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovge mem gpr", ("", 0)),
|
||||||
|
# ("cmovl gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovl mem gpr", ("", 0)),
|
||||||
|
# ("cmovle gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovle mem gpr", ("", 0)),
|
||||||
|
# ("cmovna gpr gpr", ("2*p06", 1)),
|
||||||
|
# ("cmovna mem gpr", ("", 0)),
|
||||||
|
# ("cmovnae gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovnae mem gpr", ("", 0)),
|
||||||
|
# ("cmovnb gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovnb mem gpr", ("", 0)),
|
||||||
|
# ("cmovnbe gpr gpr", ("2*p06", 1)),
|
||||||
|
# ("cmovnbe mem gpr", ("", 0)),
|
||||||
|
# ("cmovnc gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovnc mem gpr", ("", 0)),
|
||||||
|
# ("cmovne gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovne mem gpr", ("", 0)),
|
||||||
|
# ("cmovng gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovng mem gpr", ("", 0)),
|
||||||
|
# ("cmovnge gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovnge mem gpr", ("", 0)),
|
||||||
|
# ("cmovnl gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovnl mem gpr", ("", 0)),
|
||||||
|
# ("cmovno gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovno mem gpr", ("", 0)),
|
||||||
|
# ("cmovnp gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovnp mem gpr", ("", 0)),
|
||||||
|
# ("cmovns gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovns mem gpr", ("", 0)),
|
||||||
|
# ("cmovnz gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovnz mem gpr", ("", 0)),
|
||||||
|
# ("cmovo gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovo mem gpr", ("", 0)),
|
||||||
|
# ("cmovp gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovp mem gpr", ("", 0)),
|
||||||
|
# ("cmovpe gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovpe mem gpr", ("", 0)),
|
||||||
|
# ("cmovpo gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovpo mem gpr", ("", 0)),
|
||||||
|
# ("cmovs gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovs mem gpr", ("", 0)),
|
||||||
|
# ("cmovz gpr gpr", ("1*p06", 1)),
|
||||||
|
# ("cmovz mem gpr", ("", 0)),
|
||||||
|
# # https://www.felixcloutier.com/x86/pmovmskb
|
||||||
|
# ("pmovmskb mm gpr", ("1*p0", 1)),
|
||||||
|
# ("pmovmskb xmm gpr", ("1*p0", 1)),
|
||||||
|
# ("vpmovmskb xmm gpr", ("1*p0", 1)),
|
||||||
|
# # https://www.felixcloutier.com/x86/pmovsx
|
||||||
|
# ("pmovsxbw xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("pmovsxbw mem xmm", ("1*p15", 1)),
|
||||||
|
# ("pmovsxbd xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("pmovsxbd mem xmm", ("1*p15", 1)),
|
||||||
|
# ("pmovsxbq xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("pmovsxbq mem xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovsxbw xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovsxbw mem xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovsxbd xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovsxbd mem xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovsxbq xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovsxbq mem xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovsxbw xmm ymm", ("1*p5", 1)),
|
||||||
|
# ("vpmovsxbw mem ymm", ("1*p5", 1)),
|
||||||
|
# ("vpmovsxbd xmm ymm", ("1*p5", 1)),
|
||||||
|
# ("vpmovsxbd mem ymm", ("1*p5", 1)),
|
||||||
|
# ("vpmovsxbq xmm ymm", ("1*p5", 1)),
|
||||||
|
# ("vpmovsxbq mem ymm", ("1*p5", 1)),
|
||||||
|
# ("vpmovsxbw ymm zmm", ("1*p5", 3)),
|
||||||
|
# ("vpmovsxbw mem zmm", ("1*p5", 1)),
|
||||||
|
# # https://www.felixcloutier.com/x86/pmovzx
|
||||||
|
# ("pmovzxbw xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("pmovzxbw mem xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovzxbw xmm xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovzxbw mem xmm", ("1*p15", 1)),
|
||||||
|
# ("vpmovzxbw xmm ymm", ("1*p5", 1)),
|
||||||
|
# ("vpmovzxbw mem ymm", ("1*p5", 1)),
|
||||||
|
# ("vpmovzxbw ymm zmm", ("1*p5", 1)),
|
||||||
|
# ("vpmovzxbw mem zmm", ("1*p5", 1)),
|
||||||
|
##################################################################
|
||||||
|
# # https://www.felixcloutier.com/x86/movbe
|
||||||
|
# ("movbe gpr mem", ("1*p15", 6)),
|
||||||
|
# ("movbe mem gpr", ("1*p15", 6)),
|
||||||
|
################################################
|
||||||
|
# https://www.felixcloutier.com/x86/movapd
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movaps
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movddup
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movq2dq
|
||||||
|
# ("movq2dq mm xmm", ("1*p0+1*p015", 1)),
|
||||||
|
# https://www.felixcloutier.com/x86/movsd
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movshdup
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movsldup
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movss
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movupd
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/movups
|
||||||
|
# TODO with masking!
|
||||||
|
# https://www.felixcloutier.com/x86/pmovsx
|
||||||
|
# TODO with masking!
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
|
class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
|
||||||
# for HSW, BDW, SKX and CSX
|
# for HSW, BDW, SKX and CSX
|
||||||
@@ -1612,6 +2075,7 @@ def get_description(arch, rhs_comment=None):
|
|||||||
"skx": "\n".join([p7.process_item(*item) for item in skx_mov_instructions]),
|
"skx": "\n".join([p7.process_item(*item) for item in skx_mov_instructions]),
|
||||||
"csx": "\n".join([p7.process_item(*item) for item in csx_mov_instructions]),
|
"csx": "\n".join([p7.process_item(*item) for item in csx_mov_instructions]),
|
||||||
"icx": "\n".join([p9.process_item(*item) for item in icx_mov_instructions]),
|
"icx": "\n".join([p9.process_item(*item) for item in icx_mov_instructions]),
|
||||||
|
"spr": "\n".join([p11.process_item(*item) for item in spr_mov_instructions]),
|
||||||
"zen3": "\n".join([z3.process_item(*item) for item in zen3_mov_instructions]),
|
"zen3": "\n".join([z3.process_item(*item) for item in zen3_mov_instructions]),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1634,7 +2098,7 @@ if __name__ == "__main__":
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
if len(sys.argv) != 2:
|
if len(sys.argv) != 2:
|
||||||
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx|icx|zen3)".format(sys.argv[0]))
|
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx|icx|spr|zen3)".format(sys.argv[0]))
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ port_model_scheme: |
|
|||||||
| 36 | | 36 | | 36 | | 36 | | 48 | | 24 | | 26 | | 16 | | 12 | | 28 | | 28 |
|
| 36 | | 36 | | 36 | | 36 | | 48 | | 24 | | 26 | | 16 | | 12 | | 28 | | 28 |
|
||||||
+------+ +------+ +------+ +-------------+ +-----------------------------+ +------+ +------+ +------+ +------+ +-------------+ +------+
|
+------+ +------+ +------+ +-------------+ +-----------------------------+ +------+ +------+ +------+ +------+ +-------------+ +------+
|
||||||
0 |FP0 1 |FP1 2 |FP2 3 |FP3 4 |D0 5 |D1 6 |D2 7 |D3 8 |INT0 9 |INT1 10 |INT2 11 |INT3 12 |INT4 13 |INT5
|
0 |FP0 1 |FP1 2 |FP2 3 |FP3 4 |D0 5 |D1 6 |D2 7 |D3 8 |INT0 9 |INT1 10 |INT2 11 |INT3 12 |INT4 13 |INT5
|
||||||
\/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/
|
\/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/
|
||||||
+------+ +------+ +------+ +------+ +----+ +-----+ +-----+ +-----+ +-----+ +------+ +------+ +------+ +------+ +------+ +----+ +------+
|
+------+ +------+ +------+ +------+ +----+ +-----+ +-----+ +-----+ +-----+ +------+ +------+ +------+ +------+ +------+ +----+ +------+
|
||||||
| ALU | | ALU | | ALU | | ALU | | DV | | LD | | ST | | LD | | LD | | ALU | | ALU | | ALU | | ALU | | ALU | | DV | | ALU |
|
| ALU | | ALU | | ALU | | ALU | | DV | | LD | | ST | | LD | | LD | | ALU | | ALU | | ALU | | ALU | | ALU | | DV | | ALU |
|
||||||
+------+ +------+ +------+ +------+ +----+ +-----+ +-----+ +-----+ +-----+ +------+ +------+ +------+ +------+ +------+ +----+ +------+
|
+------+ +------+ +------+ +------+ +----+ +-----+ +-----+ +-----+ +-----+ +------+ +------+ +------+ +------+ +------+ +----+ +------+
|
||||||
@@ -37,15 +37,15 @@ port_model_scheme: |
|
|||||||
+------+ +------+ +------+ +------+ +------+ +------+
|
+------+ +------+ +------+ +------+ +------+ +------+
|
||||||
| FCSEL| | FCSEL| | FLAGS| | FLAGS| |MOV FP| silly | FMA |
|
| FCSEL| | FCSEL| | FLAGS| | FLAGS| |MOV FP| silly | FMA |
|
||||||
+------+ +------+ +------+ +------+ +------+ +------+
|
+------+ +------+ +------+ +------+ +------+ +------+
|
||||||
+------+ +------+
|
+------+ +------+
|
||||||
| 2INT | | 2INT |
|
| 2INT | | 2INT |
|
||||||
+------+ +------+
|
+------+ +------+
|
||||||
+------+
|
+------+
|
||||||
| RCP |
|
| RCP |
|
||||||
+------+
|
+------+
|
||||||
+------+
|
+------+
|
||||||
| SHA |
|
| SHA |
|
||||||
+------+
|
+------+
|
||||||
instruction_forms:
|
instruction_forms:
|
||||||
- name: [adc, adcs]
|
- name: [adc, adcs]
|
||||||
operands:
|
operands:
|
||||||
@@ -105,7 +105,7 @@ instruction_forms:
|
|||||||
- name: adds
|
- name: adds
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: '*'
|
prefix: '*'
|
||||||
- class: register
|
- class: register
|
||||||
prefix: '*'
|
prefix: '*'
|
||||||
- class: register
|
- class: register
|
||||||
@@ -116,7 +116,7 @@ instruction_forms:
|
|||||||
- name: adds
|
- name: adds
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: '*'
|
prefix: '*'
|
||||||
- class: register
|
- class: register
|
||||||
prefix: '*'
|
prefix: '*'
|
||||||
- class: immediate
|
- class: immediate
|
||||||
@@ -127,7 +127,7 @@ instruction_forms:
|
|||||||
- name: adr
|
- name: adr
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: '*'
|
prefix: '*'
|
||||||
- class: identifier
|
- class: identifier
|
||||||
throughput: 0.5
|
throughput: 0.5
|
||||||
latency: ~ # 1*p89
|
latency: ~ # 1*p89
|
||||||
@@ -1521,7 +1521,7 @@ instruction_forms:
|
|||||||
throughput: 0.16666666
|
throughput: 0.16666666
|
||||||
latency: ~ # 1*p89,10,11,12,13
|
latency: ~ # 1*p89,10,11,12,13
|
||||||
port_pressure: [[1, ['8', '9', '10', '11', '12', '13']]]
|
port_pressure: [[1, ['8', '9', '10', '11', '12', '13']]]
|
||||||
- name: [orn, orr]
|
- name: [orn, orr]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: x
|
prefix: x
|
||||||
@@ -1532,7 +1532,7 @@ instruction_forms:
|
|||||||
throughput: 0.2
|
throughput: 0.2
|
||||||
latency: 1.0 # 1*p89,10,12,13
|
latency: 1.0 # 1*p89,10,12,13
|
||||||
port_pressure: [[1, ['8', '9', '10', '12', '13']]]
|
port_pressure: [[1, ['8', '9', '10', '12', '13']]]
|
||||||
- name: [orn, orr]
|
- name: [orn, orr]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: x
|
prefix: x
|
||||||
@@ -1543,7 +1543,7 @@ instruction_forms:
|
|||||||
throughput: 0.16666666
|
throughput: 0.16666666
|
||||||
latency: 1.0 # 1*p89,10,11,12,13
|
latency: 1.0 # 1*p89,10,11,12,13
|
||||||
port_pressure: [[1, ['8', '9', '10', '11', '12', '13']]]
|
port_pressure: [[1, ['8', '9', '10', '11', '12', '13']]]
|
||||||
- name: [orn, orr]
|
- name: [orn, orr]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: w
|
prefix: w
|
||||||
@@ -1554,7 +1554,7 @@ instruction_forms:
|
|||||||
throughput: 0.2
|
throughput: 0.2
|
||||||
latency: 1.0 # 1*p89,10,12,13
|
latency: 1.0 # 1*p89,10,12,13
|
||||||
port_pressure: [[1, ['8', '9', '10', '12', '13']]]
|
port_pressure: [[1, ['8', '9', '10', '12', '13']]]
|
||||||
- name: [orn, orr]
|
- name: [orn, orr]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: w
|
prefix: w
|
||||||
@@ -1596,8 +1596,8 @@ instruction_forms:
|
|||||||
latency: ~
|
latency: ~
|
||||||
port_pressure: []
|
port_pressure: []
|
||||||
- name: ret
|
- name: ret
|
||||||
operands:
|
operands:
|
||||||
- class: identifier
|
- class: identifier
|
||||||
throughput: 0.0
|
throughput: 0.0
|
||||||
latency: ~
|
latency: ~
|
||||||
port_pressure: []
|
port_pressure: []
|
||||||
@@ -1650,7 +1650,7 @@ instruction_forms:
|
|||||||
- name: [scvtf, ucvtf]
|
- name: [scvtf, ucvtf]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: s
|
prefix: s
|
||||||
- class: register
|
- class: register
|
||||||
prefix: w
|
prefix: w
|
||||||
throughput: 0.33333333
|
throughput: 0.33333333
|
||||||
@@ -1659,7 +1659,7 @@ instruction_forms:
|
|||||||
- name: [scvtf, ucvtf]
|
- name: [scvtf, ucvtf]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: d
|
prefix: d
|
||||||
- class: register
|
- class: register
|
||||||
prefix: x
|
prefix: x
|
||||||
throughput: 0.33333333
|
throughput: 0.33333333
|
||||||
@@ -1668,7 +1668,7 @@ instruction_forms:
|
|||||||
- name: [scvtf, ucvtf]
|
- name: [scvtf, ucvtf]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: d
|
prefix: d
|
||||||
- class: register
|
- class: register
|
||||||
prefix: x
|
prefix: x
|
||||||
- class: immediate
|
- class: immediate
|
||||||
@@ -1679,7 +1679,7 @@ instruction_forms:
|
|||||||
- name: [scvtf, ucvtf]
|
- name: [scvtf, ucvtf]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
prefix: s
|
prefix: s
|
||||||
- class: register
|
- class: register
|
||||||
prefix: w
|
prefix: w
|
||||||
- class: immediate
|
- class: immediate
|
||||||
@@ -2831,9 +2831,9 @@ instruction_forms:
|
|||||||
prefix: "*"
|
prefix: "*"
|
||||||
- class: register
|
- class: register
|
||||||
prefix: "*"
|
prefix: "*"
|
||||||
- class: immediate
|
- class: immediate
|
||||||
imd: int
|
imd: int
|
||||||
- class: immediate
|
- class: immediate
|
||||||
imd: int
|
imd: int
|
||||||
throughput: 0.16666666
|
throughput: 0.16666666
|
||||||
latency: 1.0 # 1*p89,10,11,12,13
|
latency: 1.0 # 1*p89,10,11,12,13
|
||||||
@@ -2912,7 +2912,7 @@ instruction_forms:
|
|||||||
prefix: s
|
prefix: s
|
||||||
- class: immediate
|
- class: immediate
|
||||||
imd: int
|
imd: int
|
||||||
- class: condition
|
- class: condition
|
||||||
ccode: "*"
|
ccode: "*"
|
||||||
throughput: 1.0
|
throughput: 1.0
|
||||||
latency: 1.0 # 1*p3
|
latency: 1.0 # 1*p3
|
||||||
@@ -3617,7 +3617,7 @@ instruction_forms:
|
|||||||
width: '*'
|
width: '*'
|
||||||
throughput: 0.25
|
throughput: 0.25
|
||||||
latency: 2.0 # 1*p0123
|
latency: 2.0 # 1*p0123
|
||||||
port_pressure: [[1, '0123']]
|
port_pressure: [[1, '0123']]
|
||||||
- name: [fmla, fmls]
|
- name: [fmla, fmls]
|
||||||
operands:
|
operands:
|
||||||
- class: register
|
- class: register
|
||||||
|
|||||||
5737
osaca/data/spr.yml
Normal file
5737
osaca/data/spr.yml
Normal file
File diff suppressed because it is too large
Load Diff
4777
osaca/data/v2.yml
Normal file
4777
osaca/data/v2.yml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@ SUPPORTED_ARCHS = [
|
|||||||
"CSX",
|
"CSX",
|
||||||
"ICL",
|
"ICL",
|
||||||
"ICX",
|
"ICX",
|
||||||
|
"SPR",
|
||||||
"ZEN1",
|
"ZEN1",
|
||||||
"ZEN2",
|
"ZEN2",
|
||||||
"ZEN3",
|
"ZEN3",
|
||||||
@@ -39,6 +40,7 @@ SUPPORTED_ARCHS = [
|
|||||||
"TSV110",
|
"TSV110",
|
||||||
"A72",
|
"A72",
|
||||||
"M1",
|
"M1",
|
||||||
|
"V2",
|
||||||
]
|
]
|
||||||
DEFAULT_ARCHS = {
|
DEFAULT_ARCHS = {
|
||||||
"aarch64": "A64FX",
|
"aarch64": "A64FX",
|
||||||
@@ -101,8 +103,9 @@ def create_parser(parser=None):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--arch",
|
"--arch",
|
||||||
type=str,
|
type=str,
|
||||||
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ICX, ZEN1, ZEN2, ZEN3, TX2, N1, "
|
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ICX, SPR, ZEN1, ZEN2, ZEN3, "
|
||||||
"A64FX, TSV110, A72, M1). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.",
|
"TX2, N1, A64FX, TSV110, A72, M1, V2). If no architecture is given, OSACA assumes a default uarch for "
|
||||||
|
"x86/AArch64.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--fixed",
|
"--fixed",
|
||||||
|
|||||||
@@ -438,6 +438,7 @@ class MachineModel(object):
|
|||||||
"tx2": "aarch64",
|
"tx2": "aarch64",
|
||||||
"n1": "aarch64",
|
"n1": "aarch64",
|
||||||
"m1": "aarch64",
|
"m1": "aarch64",
|
||||||
|
"v2": "aarch64",
|
||||||
"zen1": "x86",
|
"zen1": "x86",
|
||||||
"zen+": "x86",
|
"zen+": "x86",
|
||||||
"zen2": "x86",
|
"zen2": "x86",
|
||||||
@@ -458,6 +459,7 @@ class MachineModel(object):
|
|||||||
"cfl": "x86",
|
"cfl": "x86",
|
||||||
"icl": "x86",
|
"icl": "x86",
|
||||||
"icx": "x86",
|
"icx": "x86",
|
||||||
|
"spr": "x86",
|
||||||
}
|
}
|
||||||
arch = arch.lower()
|
arch = arch.lower()
|
||||||
if arch in arch_dict:
|
if arch in arch_dict:
|
||||||
|
|||||||
Reference in New Issue
Block a user