From 4f8e37d9fdc09ffcce691821d08874dfd63e45cb Mon Sep 17 00:00:00 2001 From: JanLJL Date: Tue, 27 Sep 2022 18:04:59 +0200 Subject: [PATCH] bugfixes and more features --- osaca/data/create_db_entry.py | 54 +++- osaca/data/generate_mov_entries.py | 413 +++++++++++++++++++++++++++-- 2 files changed, 437 insertions(+), 30 deletions(-) diff --git a/osaca/data/create_db_entry.py b/osaca/data/create_db_entry.py index 38c85f0..d656247 100755 --- a/osaca/data/create_db_entry.py +++ b/osaca/data/create_db_entry.py @@ -16,8 +16,11 @@ class EntryBuilder: def classify(operands_types): load = "mem" in operands_types[:-1] store = "mem" in operands_types[-1:] + vec = False + if any([vecr in operands_types for vecr in ["mm", "xmm", "ymm", "zmm"]]): + vec = True assert not (load and store), "Can not process a combined load-store instruction." - return load, store + return load, store, vec def build_description( self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None @@ -26,7 +29,7 @@ class EntryBuilder: comment = " # " + comment else: comment = "" - description = "- name: {}{}\n operands:\n".format(instruction_name, comment) + description = "- name: {}{}\n operands: {}\n".format(instruction_name, comment, "[]" if len(operand_types) == 0 else "") for ot in operand_types: if ot == "imd": @@ -64,14 +67,18 @@ class EntryBuilder: def parse_port_pressure(self, port_pressure_str): """ Example: - 1*p45+2*p0 -> [[1, '45'], [2, '0']] + 1*p45+2*p0+2*p10,11 -> [[1, '45'], [2, '0'], [2, ['10', '11']]] """ port_pressure = [] if port_pressure_str: for p in port_pressure_str.split("+"): cycles, ports = p.split("*p") - if ports.startswith("(") and ports.endswith(")"): - ports = ports[1:-1].split(",") + ports = ports.split(",") + if len(ports) == 1: + ports = ports[0] + else: + ports = list(filter(lambda p: len(p) > 0, ports)) + port_pressure.append([int(cycles), ports]) return port_pressure @@ -92,23 +99,42 @@ class EntryBuilder: return self.build_description(instruction_name, operand_types, port_pressure, latency) -class EntryBuilderIntelPort9(EntryBuilder): - # for ICX - +class ArchEntryBuilder(EntryBuilder): def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): - load, store = self.classify(operand_types) + # Intel ICX + # LD_pressure = [[1, "23"], [1, ["2D", "3D"]]] + # LD_pressure_vec = LD_pressure + # ST_pressure = [[1, "79"], [1, "48"]] + # ST_pressure_vec = ST_pressure + # LD_lat = 5 + # ST_lat = 0 + # Zen3 + LD_pressure = [[1, ["11", "12", "13"]]] + LD_pressure_vec = [[1, ["11", "12"]]] + ST_pressure = [[1, ["12", "13"]]] + ST_pressure_vec = [[1, ["4"]], [1, ["13"]]] + LD_lat = 4 + ST_lat = 0 + + load, store, vec = self.classify(operand_types) if load: - port_pressure += [[1, "23"], [1, ["2D", "3D"]]] - latency += 5 + if vec: + port_pressure += LD_pressure_vec + else: + port_pressure += LD_pressure + latency += LD_lat comment = "with load" return EntryBuilder.build_description( self, instruction_name, operand_types, port_pressure, latency, comment ) if store: - port_pressure = port_pressure + [[1, "79"], [1, "48"]] + if vec: + port_pressure = port_pressure + ST_pressure_vec + else: + port_pressure = port_pressure + ST_pressure operands = ["mem" if o == "mem" else o for o in operand_types] - latency += 0 + latency += ST_lat return EntryBuilder.build_description( self, instruction_name, @@ -125,7 +151,7 @@ class EntryBuilderIntelPort9(EntryBuilder): def get_description(instruction_form, port_pressure, latency, rhs_comment=None): - entry = EntryBuilderIntelPort9().process_item(instruction_form, (port_pressure, latency)) + entry = ArchEntryBuilder().process_item(instruction_form, (port_pressure, latency)) if rhs_comment is not None: max_length = max([len(line) for line in entry.split("\n")]) diff --git a/osaca/data/generate_mov_entries.py b/osaca/data/generate_mov_entries.py index 8fd4fba..3efe00f 100755 --- a/osaca/data/generate_mov_entries.py +++ b/osaca/data/generate_mov_entries.py @@ -16,8 +16,11 @@ class MOVEntryBuilder: def classify(operands_types): load = "mem" in operands_types[:-1] store = "mem" in operands_types[-1:] + vec = False + if any([vecr in operands_types for vecr in ["mm", "xmm", "ymm", "zmm"]]): + vec = True assert not (load and store), "Can not process a combined load-store instruction." - return load, store + return load, store, vec def build_description( self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None @@ -65,6 +68,10 @@ class MOVEntryBuilder: if port_pressure_str: for p in port_pressure_str.split("+"): cycles, ports = p.split("*p") + ports = ports.split(",") + if len(ports) == 1: + ports = ports[0] + port_pressure.append([int(cycles), ports]) return port_pressure @@ -84,7 +91,7 @@ class MOVEntryBuilder: class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): # for SNB and IVB def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): - load, store = self.classify(operand_types) + load, store, vec = self.classify(operand_types) comment = None if load: @@ -111,9 +118,8 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): class MOVEntryBuilderIntelPort9(MOVEntryBuilder): # for ICX - def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): - load, store = self.classify(operand_types) + load, store, vec = self.classify(operand_types) if load: port_pressure += [[1, "23"], [1, ["2D", "3D"]]] @@ -141,6 +147,381 @@ class MOVEntryBuilderIntelPort9(MOVEntryBuilder): ) +class MOVEntryBuilderAMDZen3(MOVEntryBuilder): + # for Zen 3 + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): + load, store, vec = self.classify(operand_types) + + if load and vec: + port_pressure += [[1, ["11", "12"]]] + latency += 4 + comment = "with load" + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency, comment + ) + elif load: + port_pressure += [[1, ["11", "12", "13"]]] + latency += 4 + comment = "with load" + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency, comment + ) + if store and vec: + port_pressure = port_pressure + [[1, ["4"]], [1, ["13"]]] + operands = ["mem" if o == "mem" else o for o in operand_types] + latency += 0 + return MOVEntryBuilder.build_description( + self, + instruction_name, + operands, + port_pressure, + latency, + "with store", + ) + elif store: + port_pressure = port_pressure + [[1, ["12", "13"]]] + operands = ["mem" if o == "mem" else o for o in operand_types] + latency += 0 + return MOVEntryBuilder.build_description( + self, + instruction_name, + operands, + port_pressure, + latency, + "with store", + ) + # Register only: + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency + ) + + +############################################################################# + +z3 = MOVEntryBuilderAMDZen3() + +zen3_mov_instructions = [ + # https://www.felixcloutier.com/x86/mov + ("mov gpr gpr", ("1*p6789", 1)), + ("mov gpr mem", ("", 0)), + ("mov mem gpr", ("", 0)), + ("mov imd gpr", ("1*p6789", 1)), + ("mov imd mem", ("", 0)), + ("movabs imd gpr", ("1*p6789", 1)), # AT&T version, port util to be verified + # https://www.felixcloutier.com/x86/movapd + ("movapd xmm xmm", ("1*p0123", 1)), + ("movapd xmm mem", ("", 0)), + ("movapd mem xmm", ("", 0)), + ("vmovapd xmm xmm", ("1*p0123", 1)), + ("vmovapd xmm mem", ("", 0)), + ("vmovapd mem xmm", ("", 0)), + ("vmovapd ymm ymm", ("1*p0123", 1)), + ("vmovapd ymm mem", ("", 0)), + ("vmovapd mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movaps + ("movaps xmm xmm", ("1*p0123", 1)), + ("movaps xmm mem", ("", 0)), + ("movaps mem xmm", ("", 0)), + ("vmovaps xmm xmm", ("1*p0123", 1)), + ("vmovaps xmm mem", ("", 0)), + ("vmovaps mem xmm", ("", 0)), + ("vmovaps ymm ymm", ("1*p0123", 1)), + ("vmovaps ymm mem", ("", 0)), + ("vmovaps mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movd:movq + ("movd gpr mm", ("1*p0123", 1)), + ("movd mem mm", ("", 0)), + ("movq gpr mm", ("1*p0123", 1)), + ("movq mem mm", ("", 0)), + ("movd mm gpr", ("1*p0123", 1)), + ("movd mm mem", ("", 0)), + ("movq mm gpr", ("1*p0123", 1)), + ("movq mm mem", ("", 0)), + ("movd gpr xmm", ("1*p0123", 1)), + ("movd mem xmm", ("", 0)), + ("movq gpr xmm", ("1*p0123", 1)), + ("movq mem xmm", ("", 0)), + ("movd xmm gpr", ("1*p0123", 1)), + ("movd xmm mem", ("", 0)), + ("movq xmm gpr", ("1*p0123", 1)), + ("movq xmm mem", ("", 0)), + ("vmovd gpr xmm", ("1*p0123", 1)), + ("vmovd mem xmm", ("", 0)), + ("vmovq gpr xmm", ("1*p0123", 1)), + ("vmovq mem xmm", ("", 0)), + ("vmovd xmm gpr", ("1*p0123", 1)), + ("vmovd xmm mem", ("", 0)), + ("vmovq xmm gpr", ("1*p0123", 1)), + ("vmovq xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movddup + ("movddup xmm xmm", ("1*p12", 1)), + ("movddup mem xmm", ("", 0)), + ("vmovddup xmm xmm", ("1*p12", 1)), + ("vmovddup mem xmm", ("", 0)), + ("vmovddup ymm ymm", ("1*p12", 1)), + ("vmovddup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movdq2q + ("movdq2q xmm mm", ("1*p0123", 1)), + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + ("movdqa xmm xmm", ("1*p0123", 1)), + ("movdqa mem xmm", ("", 0)), + ("movdqa xmm mem", ("", 0)), + ("vmovdqa xmm xmm", ("1*p0123", 1)), + ("vmovdqa mem xmm", ("", 0)), + ("vmovdqa xmm mem", ("", 0)), + ("vmovdqa ymm ymm", ("1*p0123", 1)), + ("vmovdqa mem ymm", ("", 0)), + ("vmovdqa ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + ("movdqu xmm xmm", ("1*p0123", 1)), + ("movdqu mem xmm", ("", 0)), + ("movdqu xmm mem", ("", 0)), + ("vmovdqu xmm xmm", ("1*p0123", 1)), + ("vmovdqu mem xmm", ("", 0)), + ("vmovdqu xmm mem", ("", 0)), + ("vmovdqu ymm ymm", ("1*p0123", 1)), + ("vmovdqu mem ymm", ("", 0)), + ("vmovdqu ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movhlps + ("movhlps xmm xmm", ("1*p12", 1)), + ("vmovhlps xmm xmm xmm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/movhpd + ("movhpd mem xmm", ("1*p12", 1)), + ("vmovhpd mem xmm xmm", ("1*p12", 1)), + ("movhpd xmm mem", ("", 0)), + ("vmovhpd mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movhps + ("movhps mem xmm", ("1*p12", 1)), + ("vmovhps mem xmm xmm", ("1*p12", 1)), + ("movhps xmm mem", ("", 0)), + ("vmovhps mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movlhps + ("movlhps xmm xmm", ("1*p12", 1)), + ("vmovlhps xmm xmm xmm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/movlpd + ("movlpd mem xmm", ("1*p12", 1)), + ("vmovlpd mem xmm xmm", ("1*p12", 1)), + ("movlpd xmm mem", ("1*p12", 0)), + ("vmovlpd mem xmm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/movlps + ("movlps mem xmm", ("1*p12", 1)), + ("vmovlps mem xmm xmm", ("1*p12", 1)), + ("movlps xmm mem", ("1*p12", 0)), + ("vmovlps mem xmm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/movmskpd + ("movmskpd xmm gpr", ("1*p0123", 1)), + ("vmovmskpd xmm gpr", ("1*p0123", 1)), + ("vmovmskpd ymm gpr", ("1*p0123", 1)), + # https://www.felixcloutier.com/x86/movmskps + ("movmskps xmm gpr", ("1*p0123", 1)), + ("vmovmskps xmm gpr", ("1*p0123", 1)), + ("vmovmskps ymm gpr", ("1*p0123", 1)), + # https://www.felixcloutier.com/x86/movntdq + ("movntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntdqa + ("movntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdqa mem ymm", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movnti + ("movnti gpr mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntpd + ("movntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntps + ("movntps xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntq + ("movntq mm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movq + ("movq mm mm", ("", 0)), + ("movq mem mm", ("", 0)), + ("movq mm mem", ("", 0)), + ("movq xmm xmm", ("1*p0123", 1)), + ("movq mem xmm", ("", 0)), + ("movq xmm mem", ("", 0)), + ("vmovq xmm xmm", ("1*p0123", 1)), + ("vmovq mem xmm", ("", 0)), + ("vmovq xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq + # TODO combined load-store is currently not supported + # ('movs mem mem', ()), + # https://www.felixcloutier.com/x86/movsd + ("movsd xmm xmm", ("1*p0123", 1)), + ("movsd mem xmm", ("", 0)), + ("movsd xmm mem", ("", 0)), + ("vmovsd xmm xmm xmm", ("1*p0123", 1)), + ("vmovsd mem xmm", ("", 0)), + ("vmovsd xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movshdup + ("movshdup xmm xmm", ("1*p12", 1)), + ("movshdup mem xmm", ("", 0)), + ("vmovshdup xmm xmm", ("1*p12", 1)), + ("vmovshdup mem xmm", ("", 0)), + ("vmovshdup ymm ymm", ("1*p12", 1)), + ("vmovshdup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movsldup + ("movsldup xmm xmm", ("1*p12", 1)), + ("movsldup mem xmm", ("", 0)), + ("vmovsldup xmm xmm", ("1*p12", 1)), + ("vmovsldup mem xmm", ("", 0)), + ("vmovsldup ymm ymm", ("1*p12", 1)), + ("vmovsldup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movss + ("movss xmm xmm", ("1*p0123", 1)), + ("movss mem xmm", ("", 0)), + ("vmovss xmm xmm xmm", ("1*p0123", 1)), + ("vmovss mem xmm", ("", 0)), + ("vmovss xmm xmm", ("1*p0123", 1)), + ("vmovss xmm mem", ("", 0)), + ("movss mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movsx:movsxd + ("movsx gpr gpr", ("1*p6789", 1)), + ("movsx mem gpr", ("", 0)), + ("movsxd gpr gpr", ("", 0)), + ("movsxd mem gpr", ("", 0)), + ("movsb gpr gpr", ("1*p6789", 1)), # AT&T version + ("movsb mem gpr", ("", 0)), # AT&T version + ("movsw gpr gpr", ("1*p6789", 1)), # AT&T version + ("movsw mem gpr", ("", 0)), # AT&T version + ("movsl gpr gpr", ("1*p6789", 1)), # AT&T version + ("movsl mem gpr", ("", 0)), # AT&T version + ("movsq gpr gpr", ("1*p6789", 1)), # AT&T version + ("movsq mem gpr", ("", 0)), # AT&T version + # https://www.felixcloutier.com/x86/movupd + ("movupd xmm xmm", ("1*p0123", 1)), + ("movupd mem xmm", ("", 0)), + ("movupd xmm mem", ("", 0)), + ("vmovupd xmm xmm", ("1*p0123", 1)), + ("vmovupd mem xmm", ("", 0)), + ("vmovupd xmm mem", ("", 0)), + ("vmovupd ymm ymm", ("1*p0123", 1)), + ("vmovupd mem ymm", ("", 0)), + ("vmovupd ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movups + ("movups xmm xmm", ("1*p0123", 1)), + ("movups mem xmm", ("", 0)), + ("movups xmm mem", ("", 0)), + ("vmovups xmm xmm", ("1*p0123", 1)), + ("vmovups mem xmm", ("", 0)), + ("vmovups xmm mem", ("", 0)), + ("vmovups ymm ymm", ("1*p0123", 1)), + ("vmovups mem ymm", ("", 0)), + ("vmovups ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movzx + ("movzx gpr gpr", ("1*p6789", 1)), + ("movzx mem gpr", ("", 0)), + ("movzb gpr gpr", ("1*p6789", 1)), # AT&T version + ("movzb mem gpr", ("", 0)), # AT&T version + ("movzw gpr gpr", ("1*p6789", 1)), # AT&T version + ("movzw mem gpr", ("", 0)), # AT&T version + ("movzl gpr gpr", ("1*p6789", 1)), # AT&T version + ("movzl mem gpr", ("", 0)), # AT&T version + ("movzq gpr gpr", ("1*p6789", 1)), # AT&T version + ("movzq mem gpr", ("", 0)), # AT&T version + # https://www.felixcloutier.com/x86/cmovcc + ("cmova gpr gpr", ("1*p69", 1)), + ("cmova mem gpr", ("", 0)), + ("cmovae gpr gpr", ("1*p69", 1)), + ("cmovae mem gpr", ("", 0)), + ("cmovb gpr gpr", ("1*p69", 1)), + ("cmovb mem gpr", ("", 0)), + ("cmovbe gpr gpr", ("1*p69", 1)), + ("cmovbe mem gpr", ("", 0)), + ("cmovc gpr gpr", ("1*p69", 1)), + ("cmovc mem gpr", ("", 0)), + ("cmove gpr gpr", ("1*p69", 1)), + ("cmove mem gpr", ("", 0)), + ("cmovg gpr gpr", ("1*p69", 1)), + ("cmovg mem gpr", ("", 0)), + ("cmovge gpr gpr", ("1*p69", 1)), + ("cmovge mem gpr", ("", 0)), + ("cmovl gpr gpr", ("1*p69", 1)), + ("cmovl mem gpr", ("", 0)), + ("cmovle gpr gpr", ("1*p69", 1)), + ("cmovle mem gpr", ("", 0)), + ("cmovna gpr gpr", ("1*p69", 1)), + ("cmovna mem gpr", ("", 0)), + ("cmovnae gpr gpr", ("1*p69", 1)), + ("cmovnae mem gpr", ("", 0)), + ("cmovnb gpr gpr", ("1*p69", 1)), + ("cmovnb mem gpr", ("", 0)), + ("cmovnbe gpr gpr", ("1*p69", 1)), + ("cmovnbe mem gpr", ("", 0)), + ("cmovnc gpr gpr", ("1*p69", 1)), + ("cmovnc mem gpr", ("", 0)), + ("cmovne gpr gpr", ("1*p69", 1)), + ("cmovne mem gpr", ("", 0)), + ("cmovng gpr gpr", ("1*p69", 1)), + ("cmovng mem gpr", ("", 0)), + ("cmovnge gpr gpr", ("1*p69", 1)), + ("cmovnge mem gpr", ("", 0)), + ("cmovnl gpr gpr", ("1*p69", 1)), + ("cmovnl mem gpr", ("", 0)), + ("cmovno gpr gpr", ("1*p69", 1)), + ("cmovno mem gpr", ("", 0)), + ("cmovnp gpr gpr", ("1*p69", 1)), + ("cmovnp mem gpr", ("", 0)), + ("cmovns gpr gpr", ("1*p69", 1)), + ("cmovns mem gpr", ("", 0)), + ("cmovnz gpr gpr", ("1*p69", 1)), + ("cmovnz mem gpr", ("", 0)), + ("cmovo gpr gpr", ("1*p69", 1)), + ("cmovo mem gpr", ("", 0)), + ("cmovp gpr gpr", ("1*p69", 1)), + ("cmovp mem gpr", ("", 0)), + ("cmovpe gpr gpr", ("1*p69", 1)), + ("cmovpe mem gpr", ("", 0)), + ("cmovpo gpr gpr", ("1*p69", 1)), + ("cmovpo mem gpr", ("", 0)), + ("cmovs gpr gpr", ("1*p69", 1)), + ("cmovs mem gpr", ("", 0)), + ("cmovz gpr gpr", ("1*p69", 1)), + ("cmovz mem gpr", ("", 0)), + # https://www.felixcloutier.com/x86/pmovmskb + ("pmovmskb mm gpr", ("1*p0123", 1)), + ("pmovmskb xmm gpr", ("1*p0123", 1)), + ("vpmovmskb xmm gpr", ("1*p0123", 1)), + # https://www.felixcloutier.com/x86/pmovsx + ("pmovsxbw xmm xmm", ("1*p12", 1)), + ("pmovsxbw mem xmm", ("1*p12", 1)), + ("pmovsxbd xmm xmm", ("1*p12", 1)), + ("pmovsxbd mem xmm", ("1*p12", 1)), + ("pmovsxbq xmm xmm", ("1*p12", 1)), + ("pmovsxbq mem xmm", ("1*p12", 1)), + ("vpmovsxbw xmm xmm", ("1*p12", 1)), + ("vpmovsxbw mem xmm", ("1*p12", 1)), + ("vpmovsxbd xmm xmm", ("1*p12", 1)), + ("vpmovsxbd mem xmm", ("1*p12", 1)), + ("vpmovsxbq xmm xmm", ("1*p12", 1)), + ("vpmovsxbq mem xmm", ("1*p12", 1)), + ("vpmovsxbw xmm ymm", ("1*p0123", 1)), + ("vpmovsxbw mem ymm", ("1*p12", 1)), + ("vpmovsxbd xmm ymm", ("1*p0123", 1)), + ("vpmovsxbd mem ymm", ("1*p12", 1)), + ("vpmovsxbq xmm ymm", ("1*p0123", 1)), + ("vpmovsxbq mem ymm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/pmovzx + ("pmovzxbw xmm xmm", ("1*p12", 1)), + ("pmovzxbw mem xmm", ("1*p12", 1)), + ("vpmovzxbw xmm xmm", ("1*p12", 1)), + ("vpmovzxbw mem xmm", ("1*p12", 1)), + ("vpmovzxbw xmm ymm", ("1*p0123", 1)), + ("vpmovzxbw mem ymm", ("1*p12", 1)), + ################################################################# + # https://www.felixcloutier.com/x86/movbe + ("movbe gpr mem", ("1*p67", 5)), + ("movbe mem gpr", ("1*p67", 5)), + ################################################ + # https://www.felixcloutier.com/x86/movq2dq + ("movq2dq mm xmm", ("2*p0123", 1)), +] + + + p9 = MOVEntryBuilderIntelPort9() icx_mov_instructions = [ @@ -517,11 +898,11 @@ icx_mov_instructions = [ ("vpmovsxbd mem xmm", ("1*p15", 1)), ("vpmovsxbq xmm xmm", ("1*p15", 1)), ("vpmovsxbq mem xmm", ("1*p15", 1)), - ("vpmovsxbw ymm ymm", ("1*p5", 1)), + ("vpmovsxbw xmm ymm", ("1*p5", 1)), ("vpmovsxbw mem ymm", ("1*p5", 1)), - ("vpmovsxbd ymm ymm", ("1*p5", 1)), + ("vpmovsxbd xmm ymm", ("1*p5", 1)), ("vpmovsxbd mem ymm", ("1*p5", 1)), - ("vpmovsxbq ymm ymm", ("1*p5", 1)), + ("vpmovsxbq xmm ymm", ("1*p5", 1)), ("vpmovsxbq mem ymm", ("1*p5", 1)), ("vpmovsxbw ymm zmm", ("1*p5", 3)), ("vpmovsxbw mem zmm", ("1*p5", 1)), @@ -572,7 +953,7 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder): # for HSW, BDW, SKX and CSX def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): - load, store = self.classify(operand_types) + load, store, vec = self.classify(operand_types) if load: port_pressure += [[1, "23"], [1, ["2D", "3D"]]] @@ -794,7 +1175,6 @@ snb_mov_instructions = [ ("movss mem xmm", ("", 0)), ("vmovss xmm xmm xmm", ("1*p5", 1)), ("vmovss mem xmm", ("", 0)), - ("vmovss xmm xmm", ("1*p5", 1)), ("vmovss xmm mem", ("", 0)), ("movss mem xmm", ("", 0)), # https://www.felixcloutier.com/x86/movsx:movsxd @@ -919,11 +1299,11 @@ snb_mov_instructions = [ ("vpmovsxbd mem xmm", ("1*p15", 1)), ("vpmovsxbq xmm xmm", ("1*p15", 1)), ("vpmovsxbq mem xmm", ("1*p15", 1)), - ("vpmovsxbw ymm ymm", ("1*p15", 1)), + ("vpmovsxbw xmm ymm", ("1*p15", 1)), ("vpmovsxbw mem ymm", ("1*p15", 1)), - ("vpmovsxbd ymm ymm", ("1*p15", 1)), + ("vpmovsxbd xmm ymm", ("1*p15", 1)), ("vpmovsxbd mem ymm", ("1*p15", 1)), - ("vpmovsxbq ymm ymm", ("1*p15", 1)), + ("vpmovsxbq xmm ymm", ("1*p15", 1)), ("vpmovsxbq mem ymm", ("1*p15", 1)), # https://www.felixcloutier.com/x86/pmovzx ("pmovzxbw xmm xmm", ("1*p15", 1)), @@ -1079,11 +1459,11 @@ hsw_mov_instructions = list( ("vpmovsxbd mem xmm", ("1*p5", 1)), ("vpmovsxbq xmm xmm", ("1*p5", 1)), ("vpmovsxbq mem xmm", ("1*p5", 1)), - ("vpmovsxbw ymm ymm", ("1*p5", 1)), + ("vpmovsxbw xmm ymm", ("1*p5", 1)), ("vpmovsxbw mem ymm", ("1*p5", 1)), - ("vpmovsxbd ymm ymm", ("1*p5", 1)), + ("vpmovsxbd xmm ymm", ("1*p5", 1)), ("vpmovsxbd mem ymm", ("1*p5", 1)), - ("vpmovsxbq ymm ymm", ("1*p5", 1)), + ("vpmovsxbq xmm ymm", ("1*p5", 1)), ("vpmovsxbq mem ymm", ("1*p5", 1)), # https://www.felixcloutier.com/x86/pmovzx ("pmovzxbw xmm xmm", ("1*p5", 1)), @@ -1234,6 +1614,7 @@ def get_description(arch, rhs_comment=None): "skx": "\n".join([p7.process_item(*item) for item in skx_mov_instructions]), "csx": "\n".join([p7.process_item(*item) for item in csx_mov_instructions]), "icx": "\n".join([p9.process_item(*item) for item in icx_mov_instructions]), + "zen3": "\n".join([z3.process_item(*item) for item in zen3_mov_instructions]), } description = descriptions[arch] @@ -1255,7 +1636,7 @@ if __name__ == "__main__": import sys if len(sys.argv) != 2: - print("Usage: {} (snb|ivb|hsw|bdw|skx|csx|icx)".format(sys.argv[0])) + print("Usage: {} (snb|ivb|hsw|bdw|skx|csx|icx|zen3)".format(sys.argv[0])) sys.exit(0) try: