diff --git a/osaca/data/generate_mov_entries.py b/osaca/data/generate_mov_entries.py index 09a4088..958f7c1 100755 --- a/osaca/data/generate_mov_entries.py +++ b/osaca/data/generate_mov_entries.py @@ -9,8 +9,8 @@ class MOVEntryBuilder: port_occupancy = defaultdict(Fraction) for uops, ports in port_pressure: for p in ports: - port_occupancy[p] += Fraction(uops, len(ports)) - return float(max(list(port_occupancy.values())+[0])) + port_occupancy[p] += Fraction(uops, len(ports)) + return float(max(list(port_occupancy.values()) + [0])) @staticmethod def classify(operands_types): @@ -18,10 +18,10 @@ class MOVEntryBuilder: store = 'mem' in operands_types[-1:] assert not (load and store), "Can not process a combined load-store instruction." return load, store - + def build_description( - self, instruction_name, operand_types, - port_pressure=[], latency=0, comment=None): + self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None + ): if comment: comment = " # " + comment else: @@ -32,10 +32,7 @@ class MOVEntryBuilder: if ot == 'imd': description += ' - class: immediate\n imd: int\n' elif ot.startswith('mem'): - description += ( - ' - class: memory\n' - ' base: "*"\n' - ' offset: "*"\n') + description += ' - class: memory\n' ' base: "*"\n' ' offset: "*"\n' if ot == 'mem_simple': description += ' index: ~\n' elif ot == 'mem_complex': @@ -45,18 +42,20 @@ class MOVEntryBuilder: description += ' scale: "*"\n' else: description += ' - class: register\n name: {}\n'.format(ot) - + description += ( ' latency: {latency}\n' ' port_pressure: {port_pressure!r}\n' ' throughput: {throughput}\n' - ' uops: {uops}\n').format( - latency=latency, - port_pressure=port_pressure, - throughput=self.compute_throughput(port_pressure), - uops=sum([i for i,p in port_pressure])) + ' uops: {uops}\n' + ).format( + latency=latency, + port_pressure=port_pressure, + throughput=self.compute_throughput(port_pressure), + uops=sum([i for i, p in port_pressure]), + ) return description - + def parse_port_pressure(self, port_pressure_str): """ Example: @@ -68,7 +67,7 @@ class MOVEntryBuilder: cycles, ports = p.split('*p') port_pressure.append([int(cycles), ports]) return port_pressure - + def process_item(self, instruction_form, resources): """ Example: @@ -84,9 +83,7 @@ class MOVEntryBuilder: class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): # for SNB and IVB - def build_description( - self, instruction_name, operand_types, - port_pressure=[], latency=0): + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): load, store = self.classify(operand_types) comment = None @@ -100,15 +97,14 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): comment = "with store" return MOVEntryBuilder.build_description( - self, instruction_name, operand_types, port_pressure, latency, comment) + self, instruction_name, operand_types, port_pressure, latency, comment + ) class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder): # for HSW, BDW, SKX and CSX - def build_description( - self, instruction_name, operand_types, - port_pressure=[], latency=0): + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): load, store = self.classify(operand_types) if load: @@ -116,7 +112,8 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder): latency += 4 comment = "with load" return MOVEntryBuilder.build_description( - self, instruction_name, operand_types, port_pressure, latency, comment) + self, instruction_name, operand_types, port_pressure, latency, comment + ) if store: port_pressure_simple = port_pressure + [[1, '237'], [1, '4']] operands_simple = ['mem_simple' if o == 'mem' else o for o in operand_types] @@ -125,16 +122,28 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder): latency += 0 return ( MOVEntryBuilder.build_description( - self, instruction_name, operands_simple, port_pressure_simple, latency, - "with store, simple AGU") + - '\n' + - MOVEntryBuilder.build_description( - self, instruction_name, operands_complex, port_pressure_complex, latency, - "with store, complex AGU")) - + self, + instruction_name, + operands_simple, + port_pressure_simple, + latency, + "with store, simple AGU", + ) + + '\n' + + MOVEntryBuilder.build_description( + self, + instruction_name, + operands_complex, + port_pressure_complex, + latency, + "with store, complex AGU", + ) + ) + # Register only: return MOVEntryBuilder.build_description( - self, instruction_name, operand_types, port_pressure, latency) + self, instruction_name, operand_types, port_pressure, latency + ) np7 = MOVEntryBuilderIntelNoPort7AGU() @@ -149,7 +158,6 @@ snb_mov_instructions = [ ('mov imd gpr', ('1*p015', 1)), ('mov imd mem', ('', 0)), ('movabs imd gpr', ('1*p015', 1)), # AT&T version - # https://www.felixcloutier.com/x86/movapd ('movapd xmm xmm', ('1*p5', 1)), ('movapd xmm mem', ('', 0)), @@ -160,7 +168,6 @@ snb_mov_instructions = [ ('vmovapd ymm ymm', ('1*p5', 1)), ('vmovapd ymm mem', ('', 0)), ('vmovapd mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movaps ('movaps xmm xmm', ('1*p5', 1)), ('movaps xmm mem', ('', 0)), @@ -171,7 +178,6 @@ snb_mov_instructions = [ ('vmovaps ymm ymm', ('1*p5', 1)), ('movaps ymm mem', ('', 0)), ('movaps mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movd:movq ('movd gpr mm', ('1*p5', 1)), ('movd mem mm', ('', 0)), @@ -197,7 +203,6 @@ snb_mov_instructions = [ ('vmovd xmm mem', ('', 0)), ('vmovq xmm gpr', ('1*p0', 1)), ('vmovq xmm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movddup ('movddup xmm xmm', ('1*p5', 1)), ('movddup mem xmm', ('', 0)), @@ -205,10 +210,8 @@ snb_mov_instructions = [ ('vmovddup mem xmm', ('', 0)), ('vmovddup ymm ymm', ('1*p5', 1)), ('vmovddup mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movdq2q ('movdq2q xmm mm', ('1*p015+1*p5', 1)), - # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 ('movdqa xmm xmm', ('1*p015', 1)), ('movdqa mem xmm', ('', 0)), @@ -219,7 +222,6 @@ snb_mov_instructions = [ ('vmovdqa ymm ymm', ('1*p05', 1)), ('vmovdqa mem ymm', ('', 0)), ('vmovdqa ymm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 ('movdqu xmm xmm', ('1*p015', 1)), ('movdqu mem xmm', ('', 0)), @@ -230,75 +232,60 @@ snb_mov_instructions = [ ('vmovdqu ymm ymm', ('1*p05', 1)), ('vmovdqu mem ymm', ('', 0)), ('vmovdqu ymm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movhlps ('movhlps xmm xmm', ('1*p5', 1)), ('vmovhlps xmm xmm xmm', ('1*p5', 1)), - # https://www.felixcloutier.com/x86/movhpd ('movhpd mem xmm', ('1*p5', 1)), ('vmovhpd mem xmm xmm', ('1*p5', 1)), ('movhpd xmm mem', ('', 0)), ('vmovhpd mem xmm', ('', 0)), - # https://www.felixcloutier.com/x86/movhps ('movhps mem xmm', ('1*p5', 1)), ('vmovhps mem xmm xmm', ('1*p5', 1)), ('movhps xmm mem', ('', 0)), ('vmovhps mem xmm', ('', 0)), - # https://www.felixcloutier.com/x86/movlhps ('movlhps xmm xmm', ('1*p5', 1)), ('vmovlhps xmm xmm xmm', ('1*p5', 1)), - # https://www.felixcloutier.com/x86/movlpd ('movlpd mem xmm', ('1*p5', 1)), ('vmovlpd mem xmm xmm', ('1*p5', 1)), ('movlpd xmm mem', ('', 0)), ('vmovlpd mem xmm', ('1*p5', 1)), - # https://www.felixcloutier.com/x86/movlps ('movlps mem xmm', ('1*p5', 1)), ('vmovlps mem xmm xmm', ('1*p5', 1)), ('movlps xmm mem', ('', 0)), ('vmovlps mem xmm', ('1*p5', 1)), - # https://www.felixcloutier.com/x86/movmskpd ('movmskpd xmm gpr', ('1*p0', 2)), ('vmovmskpd xmm gpr', ('1*p0', 2)), ('vmovmskpd ymm gpr', ('1*p0', 2)), - # https://www.felixcloutier.com/x86/movmskps ('movmskps xmm gpr', ('1*p0', 1)), ('vmovmskps xmm gpr', ('1*p0', 1)), ('vmovmskps ymm gpr', ('1*p0', 1)), - # https://www.felixcloutier.com/x86/movntdq ('movntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntdq ymm mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movntdqa ('movntdqa mem xmm', ('', 0)), ('vmovntdqa mem xmm', ('', 0)), ('vmovntdqa mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movnti ('movnti gpr mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movntpd ('movntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntpd ymm mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movntps ('movntps xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntps xmm mem', ('', 0)), # TODO NT-store: what latency to use? ('vmovntps ymm mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movntq ('movntq mm mem', ('', 0)), # TODO NT-store: what latency to use? - # https://www.felixcloutier.com/x86/movq ('movq mm mm', ('', 0)), ('movq mem mm', ('', 0)), @@ -309,14 +296,11 @@ snb_mov_instructions = [ ('vmovq xmm xmm', ('1*p015', 1)), ('vmovq mem xmm', ('', 0)), ('vmovq xmm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movq2dq ('movq2dq mm xmm', ('1*p015', 1)), - # https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq # TODO combined load-store is currently not supported # ('movs mem mem', ()), - # https://www.felixcloutier.com/x86/movsd ('movsd xmm xmm', ('1*p5', 1)), ('movsd mem xmm', ('', 0)), @@ -324,7 +308,6 @@ snb_mov_instructions = [ ('vmovsd xmm xmm xmm', ('1*p5', 1)), ('vmovsd mem xmm', ('', 0)), ('vmovsd xmm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movshdup ('movshdup xmm xmm', ('1*p5', 1)), ('movshdup mem xmm', ('', 0)), @@ -332,7 +315,6 @@ snb_mov_instructions = [ ('vmovshdup mem xmm', ('', 0)), ('vmovshdup ymm ymm', ('1*p5', 1)), ('vmovshdup mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movsldup ('movsldup xmm xmm', ('1*p5', 1)), ('movsldup mem xmm', ('', 0)), @@ -340,7 +322,6 @@ snb_mov_instructions = [ ('vmovsldup mem xmm', ('', 0)), ('vmovsldup ymm ymm', ('1*p5', 1)), ('vmovsldup mem ymm', ('', 0)), - # https://www.felixcloutier.com/x86/movss ('movss xmm xmm', ('1*p5', 1)), ('movss mem xmm', ('', 0)), @@ -349,7 +330,6 @@ snb_mov_instructions = [ ('vmovss xmm xmm', ('1*p5', 1)), ('vmovss xmm mem', ('', 0)), ('movss mem xmm', ('', 0)), - # https://www.felixcloutier.com/x86/movsx:movsxd ('movsx gpr gpr', ('1*p015', 1)), ('movsx mem gpr', ('', 0)), @@ -363,7 +343,6 @@ snb_mov_instructions = [ ('movsl mem gpr', ('', 0)), # AT&T version ('movsq gpr gpr', ('1*p015', 1)), # AT&T version ('movsq mem gpr', ('', 0)), # AT&T version - # https://www.felixcloutier.com/x86/movupd ('movupd xmm xmm', ('1*p5', 1)), ('movupd mem xmm', ('', 0)), @@ -374,7 +353,6 @@ snb_mov_instructions = [ ('vmovupd ymm ymm', ('1*p5', 1)), ('vmovupd mem ymm', ('', 0)), ('vmovupd ymm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movups ('movups xmm xmm', ('1*p5', 1)), ('movups mem xmm', ('', 0)), @@ -385,7 +363,6 @@ snb_mov_instructions = [ ('vmovups ymm ymm', ('1*p5', 1)), ('vmovups mem ymm', ('', 0)), ('vmovups ymm mem', ('', 0)), - # https://www.felixcloutier.com/x86/movzx ('movzx gpr gpr', ('1*p015', 1)), ('movzx mem gpr', ('', 0)), @@ -397,7 +374,6 @@ snb_mov_instructions = [ ('movzl mem gpr', ('', 0)), # AT&T version ('movzq gpr gpr', ('1*p015', 1)), # AT&T version ('movzq mem gpr', ('', 0)), # AT&T version - # https://www.felixcloutier.com/x86/cmovcc ('cmova gpr gpr', ('1*p015+2*p05', 2)), ('cmova mem gpr', ('1*p015+2*p05', 2)), @@ -459,12 +435,10 @@ snb_mov_instructions = [ ('cmovs mem gpr', ('1*p015+1*p05', 2)), ('cmovz gpr gpr', ('1*p015+1*p05', 2)), ('cmovz mem gpr', ('1*p015+1*p05', 2)), - # https://www.felixcloutier.com/x86/pmovmskb ('pmovmskb mm gpr', ('1*p0', 2)), ('pmovmskb xmm gpr', ('1*p0', 2)), ('vpmovmskb xmm gpr', ('1*p0', 2)), - # https://www.felixcloutier.com/x86/pmovsx ('pmovsxbw xmm xmm', ('1*p15', 1)), ('pmovsxbw mem xmm', ('1*p15', 1)), @@ -484,7 +458,6 @@ snb_mov_instructions = [ ('vpmovsxbd mem ymm', ('1*p15', 1)), ('vpmovsxbq ymm ymm', ('1*p15', 1)), ('vpmovsxbq mem ymm', ('1*p15', 1)), - # https://www.felixcloutier.com/x86/pmovzx ('pmovzxbw xmm xmm', ('1*p15', 1)), ('pmovzxbw mem xmm', ('1*p15', 1)), @@ -494,307 +467,294 @@ snb_mov_instructions = [ ('vpmovzxbw mem ymm', ('1*p15', 1)), ] -ivb_mov_instructions = list(OrderedDict(snb_mov_instructions + [ - # https://www.felixcloutier.com/x86/mov - ('mov gpr gpr', ('', 0)), - ('mov imd gpr', ('', 0)), +ivb_mov_instructions = list( + OrderedDict( + snb_mov_instructions + + [ + # https://www.felixcloutier.com/x86/mov + ('mov gpr gpr', ('', 0)), + ('mov imd gpr', ('', 0)), + # https://www.felixcloutier.com/x86/movapd + ('movapd xmm xmm', ('', 0)), + ('vmovapd xmm xmm', ('', 0)), + ('vmovapd ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movaps + ('movaps xmm xmm', ('', 0)), + ('vmovaps xmm xmm', ('', 0)), + ('vmovaps ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + ('movdqa xmm xmm', ('', 0)), + ('vmovdqa xmm xmm', ('', 0)), + ('vmovdqa ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + ('movdqu xmm xmm', ('', 0)), + ('vmovdqu xmm xmm', ('', 0)), + ('vmovdqu ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movupd + ('movupd xmm xmm', ('', 0)), + ('vmovupd xmm xmm', ('', 0)), + ('vmovupd ymm ymm', ('', 0)), + # https://www.felixcloutier.com/x86/movupd + ('movups xmm xmm', ('', 0)), + ('vmovups xmm xmm', ('', 0)), + ('vmovups ymm ymm', ('', 0)), + ] + ).items() +) - # https://www.felixcloutier.com/x86/movapd - ('movapd xmm xmm', ('', 0)), - ('vmovapd xmm xmm', ('', 0)), - ('vmovapd ymm ymm', ('', 0)), +hsw_mov_instructions = list( + OrderedDict( + ivb_mov_instructions + + [ + # https://www.felixcloutier.com/x86/mov + ('mov imd gpr', ('1*p0156', 1)), + ('mov gpr gpr', ('1*p0156', 1)), + ('movabs imd gpr', ('1*p0156', 1)), # AT&T version + # https://www.felixcloutier.com/x86/movbe + ('movbe gpr mem', ('1*p15', 6)), + ('movbe mem gpr', ('1*p15', 6)), + # https://www.felixcloutier.com/x86/movmskpd + ('movmskpd xmm gpr', ('1*p0', 3)), + ('vmovmskpd xmm gpr', ('1*p0', 3)), + ('vmovmskpd ymm gpr', ('1*p0', 3)), + # https://www.felixcloutier.com/x86/movmskps + ('movmskps xmm gpr', ('1*p0', 3)), + ('vmovmskps xmm gpr', ('1*p0', 3)), + ('vmovmskps ymm gpr', ('1*p0', 3)), + # https://www.felixcloutier.com/x86/movsx:movsxd + ('movsx gpr gpr', ('1*p0156', 1)), + ('movsb gpr gpr', ('1*p0156', 1)), # AT&T version + ('movsw gpr gpr', ('1*p0156', 1)), # AT&T version + ('movsl gpr gpr', ('1*p0156', 1)), # AT&T version + ('movsq gpr gpr', ('1*p0156', 1)), # AT&T version + # https://www.felixcloutier.com/x86/movzx + ('movzx gpr gpr', ('1*p0156', 1)), + ('movzb gpr gpr', ('1*p0156', 1)), # AT&T version + ('movzw gpr gpr', ('1*p0156', 1)), # AT&T version + ('movzl gpr gpr', ('1*p0156', 1)), # AT&T version + ('movzq gpr gpr', ('1*p0156', 1)), # AT&T version + # https://www.felixcloutier.com/x86/cmovcc + ('cmova gpr gpr', ('1*p0156+2*p06', 2)), + ('cmova mem gpr', ('1*p0156+2*p06', 2)), + ('cmovae gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovae mem gpr', ('1*p0156+2*p06', 2)), + ('cmovb gpr gpr', ('1*p0156+2*p06', 2)), + ('cmovb mem gpr', ('1*p0156+1*p06', 2)), + ('cmovbe gpr gpr', ('1*p0156+2*p06', 2)), + ('cmovbe mem gpr', ('1*p0156+2*p06', 2)), + ('cmovc gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovc mem gpr', ('1*p0156+1*p06', 2)), + ('cmove gpr gpr', ('1*p0156+1*p06', 2)), + ('cmove mem gpr', ('1*p0156+1*p06', 2)), + ('cmovg gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovg mem gpr', ('1*p0156+1*p06', 2)), + ('cmovge gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovge mem gpr', ('1*p0156+1*p06', 2)), + ('cmovl gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovl mem gpr', ('1*p0156+1*p06', 2)), + ('cmovle gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovle mem gpr', ('1*p0156+1*p06', 2)), + ('cmovna gpr gpr', ('1*p0156+2*p06', 2)), + ('cmovna mem gpr', ('1*p0156+2*p06', 2)), + ('cmovnae gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnae mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnb gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnb mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)), + ('cmovnbe mem gpr', ('1*p0156+2*p06', 2)), + ('cmovnb gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnb mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnc gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnc mem gpr', ('1*p0156+1*p06', 2)), + ('cmovne gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovne mem gpr', ('1*p0156+1*p06', 2)), + ('cmovng gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovng mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnge gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnge mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnl gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnl mem gpr', ('1*p0156+1*p06', 2)), + ('cmovno gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovno mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnp gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnp mem gpr', ('1*p0156+1*p06', 2)), + ('cmovns gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovns mem gpr', ('1*p0156+1*p06', 2)), + ('cmovnz gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovnz mem gpr', ('1*p0156+1*p06', 2)), + ('cmovo gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovo mem gpr', ('1*p0156+1*p06', 2)), + ('cmovp gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovp mem gpr', ('1*p0156+1*p06', 2)), + ('cmovpe gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovpe mem gpr', ('1*p0156+1*p06', 2)), + ('cmovpo gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovpo mem gpr', ('1*p0156+1*p06', 2)), + ('cmovs gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovs mem gpr', ('1*p0156+1*p06', 2)), + ('cmovz gpr gpr', ('1*p0156+1*p06', 2)), + ('cmovz mem gpr', ('1*p0156+1*p06', 2)), + # https://www.felixcloutier.com/x86/pmovmskb + ('pmovmskb mm gpr', ('1*p0', 3)), + ('pmovmskb xmm gpr', ('1*p0', 3)), + ('vpmovmskb xmm gpr', ('1*p0', 3)), + ('vpmovmskb ymm gpr', ('1*p0', 3)), + # https://www.felixcloutier.com/x86/pmovsx + ('pmovsxbw xmm xmm', ('1*p5', 1)), + ('pmovsxbw mem xmm', ('1*p5', 1)), + ('pmovsxbd xmm xmm', ('1*p5', 1)), + ('pmovsxbd mem xmm', ('1*p5', 1)), + ('pmovsxbq xmm xmm', ('1*p5', 1)), + ('pmovsxbq mem xmm', ('1*p5', 1)), + ('vpmovsxbw xmm xmm', ('1*p5', 1)), + ('vpmovsxbw mem xmm', ('1*p5', 1)), + ('vpmovsxbd xmm xmm', ('1*p5', 1)), + ('vpmovsxbd mem xmm', ('1*p5', 1)), + ('vpmovsxbq xmm xmm', ('1*p5', 1)), + ('vpmovsxbq mem xmm', ('1*p5', 1)), + ('vpmovsxbw ymm ymm', ('1*p5', 1)), + ('vpmovsxbw mem ymm', ('1*p5', 1)), + ('vpmovsxbd ymm ymm', ('1*p5', 1)), + ('vpmovsxbd mem ymm', ('1*p5', 1)), + ('vpmovsxbq ymm ymm', ('1*p5', 1)), + ('vpmovsxbq mem ymm', ('1*p5', 1)), + # https://www.felixcloutier.com/x86/pmovzx + ('pmovzxbw xmm xmm', ('1*p5', 1)), + ('pmovzxbw mem xmm', ('1*p5', 1)), + ('vpmovzxbw xmm xmm', ('1*p5', 1)), + ('vpmovzxbw mem xmm', ('1*p5', 1)), + ('vpmovzxbw ymm ymm', ('1*p5', 1)), + ('vpmovzxbw mem ymm', ('1*p5', 1)), + ] + ).items() +) - # https://www.felixcloutier.com/x86/movaps - ('movaps xmm xmm', ('', 0)), - ('vmovaps xmm xmm', ('', 0)), - ('vmovaps ymm ymm', ('', 0)), +bdw_mov_instructions = list( + OrderedDict( + hsw_mov_instructions + + [ + # https://www.felixcloutier.com/x86/cmovcc + ('cmova gpr gpr', ('2*p06', 1)), + ('cmova mem gpr', ('2*p06', 1)), + ('cmovae gpr gpr', ('1*p06', 1)), + ('cmovae mem gpr', ('2*p06', 1)), + ('cmovb gpr gpr', ('2*p06', 1)), + ('cmovb mem gpr', ('1*p06', 1)), + ('cmovbe gpr gpr', ('2*p06', 1)), + ('cmovbe mem gpr', ('2*p06', 1)), + ('cmovc gpr gpr', ('1*p06', 1)), + ('cmovc mem gpr', ('1*p06', 1)), + ('cmove gpr gpr', ('1*p06', 1)), + ('cmove mem gpr', ('1*p06', 1)), + ('cmovg gpr gpr', ('1*p06', 1)), + ('cmovg mem gpr', ('1*p06', 1)), + ('cmovge gpr gpr', ('1*p06', 1)), + ('cmovge mem gpr', ('1*p06', 1)), + ('cmovl gpr gpr', ('1*p06', 1)), + ('cmovl mem gpr', ('1*p06', 1)), + ('cmovle gpr gpr', ('1*p06', 1)), + ('cmovle mem gpr', ('1*p06', 1)), + ('cmovna gpr gpr', ('2*p06', 1)), + ('cmovna mem gpr', ('2*p06', 1)), + ('cmovnae gpr gpr', ('1*p06', 1)), + ('cmovnae mem gpr', ('1*p06', 1)), + ('cmovnb gpr gpr', ('1*p06', 1)), + ('cmovnb mem gpr', ('1*p06', 1)), + ('cmovnbe gpr gpr', ('2*p06', 1)), + ('cmovnbe mem gpr', ('2*p06', 1)), + ('cmovnb gpr gpr', ('1*p06', 1)), + ('cmovnb mem gpr', ('1*p06', 1)), + ('cmovnc gpr gpr', ('1*p06', 1)), + ('cmovnc mem gpr', ('1*p06', 1)), + ('cmovne gpr gpr', ('1*p06', 1)), + ('cmovne mem gpr', ('1*p06', 1)), + ('cmovng gpr gpr', ('1*p06', 1)), + ('cmovng mem gpr', ('1*p06', 1)), + ('cmovnge gpr gpr', ('1*p06', 1)), + ('cmovnge mem gpr', ('1*p06', 1)), + ('cmovnl gpr gpr', ('1*p06', 1)), + ('cmovnl mem gpr', ('1*p06', 1)), + ('cmovno gpr gpr', ('1*p06', 1)), + ('cmovno mem gpr', ('1*p06', 1)), + ('cmovnp gpr gpr', ('1*p06', 1)), + ('cmovnp mem gpr', ('1*p06', 1)), + ('cmovns gpr gpr', ('1*p06', 1)), + ('cmovns mem gpr', ('1*p06', 1)), + ('cmovnz gpr gpr', ('1*p06', 1)), + ('cmovnz mem gpr', ('1*p06', 1)), + ('cmovo gpr gpr', ('1*p06', 1)), + ('cmovo mem gpr', ('1*p06', 1)), + ('cmovp gpr gpr', ('1*p06', 1)), + ('cmovp mem gpr', ('1*p06', 1)), + ('cmovpe gpr gpr', ('1*p06', 1)), + ('cmovpe mem gpr', ('1*p06', 1)), + ('cmovpo gpr gpr', ('1*p06', 1)), + ('cmovpo mem gpr', ('1*p06', 1)), + ('cmovs gpr gpr', ('1*p06', 1)), + ('cmovs mem gpr', ('1*p06', 1)), + ('cmovz gpr gpr', ('1*p06', 1)), + ('cmovz mem gpr', ('1*p06', 1)), + ] + ).items() +) - # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 - ('movdqa xmm xmm', ('', 0)), - ('vmovdqa xmm xmm', ('', 0)), - ('vmovdqa ymm ymm', ('', 0)), +skx_mov_instructions = list( + OrderedDict( + bdw_mov_instructions + + [ + # https://www.felixcloutier.com/x86/movapd + # TODO with masking! + # TODO the following may eliminate or be bound to 1*p0156: + # ('movapd xmm xmm', ('1*p5', 1)), + # ('vmovapd xmm xmm', ('1*p5', 1)), + # ('vmovapd ymm ymm', ('1*p5', 1)), + # https://www.felixcloutier.com/x86/movaps + # TODO with masking! + # TODO the following may eliminate or be bound to 1*p0156: + # ('movaps xmm xmm', ('1*p5', 1)), + # ('vmovaps xmm xmm', ('1*p5', 1)), + # ('vmovaps ymm ymm', ('1*p5', 1)), + # https://www.felixcloutier.com/x86/movbe + ('movbe gpr mem', ('1*p15', 4)), + ('movbe mem gpr', ('1*p15', 4)), + # https://www.felixcloutier.com/x86/movddup + # TODO with masking! + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + # TODO with masking! + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + # TODO with masking! + # https://www.felixcloutier.com/x86/movntdq + ('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntdqa + ('vmovntdqa mem zmm', ('', 0)), + # https://www.felixcloutier.com/x86/movntpd + ('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntps + ('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movq2dq + ('movq2dq mm xmm', ('1*p0+1*p015', 1)), + # https://www.felixcloutier.com/x86/movsd + # TODO with masking! + # https://www.felixcloutier.com/x86/movshdup + # TODO with masking! + # https://www.felixcloutier.com/x86/movsldup + # TODO with masking! + # https://www.felixcloutier.com/x86/movss + # TODO with masking! + # https://www.felixcloutier.com/x86/movupd + # TODO with masking! + # https://www.felixcloutier.com/x86/movups + # TODO with masking! + # https://www.felixcloutier.com/x86/pmovsx + # TODO with masking! + ('vpmovsxbw ymm zmm', ('1*p5', 3)), + ('vpmovsxbw mem zmm', ('1*p5', 1)), + ] + ).items() +) - # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 - ('movdqu xmm xmm', ('', 0)), - ('vmovdqu xmm xmm', ('', 0)), - ('vmovdqu ymm ymm', ('', 0)), +csx_mov_instructions = OrderedDict(skx_mov_instructions + []).items() - # https://www.felixcloutier.com/x86/movupd - ('movupd xmm xmm', ('', 0)), - ('vmovupd xmm xmm', ('', 0)), - ('vmovupd ymm ymm', ('', 0)), - - # https://www.felixcloutier.com/x86/movupd - ('movups xmm xmm', ('', 0)), - ('vmovups xmm xmm', ('', 0)), - ('vmovups ymm ymm', ('', 0)), -]).items()) - -hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [ - # https://www.felixcloutier.com/x86/mov - ('mov imd gpr', ('1*p0156', 1)), - ('mov gpr gpr', ('1*p0156', 1)), - ('movabs imd gpr', ('1*p0156', 1)), # AT&T version - - # https://www.felixcloutier.com/x86/movbe - ('movbe gpr mem', ('1*p15', 6)), - ('movbe mem gpr', ('1*p15', 6)), - - # https://www.felixcloutier.com/x86/movmskpd - ('movmskpd xmm gpr', ('1*p0', 3)), - ('vmovmskpd xmm gpr', ('1*p0', 3)), - ('vmovmskpd ymm gpr', ('1*p0', 3)), - - # https://www.felixcloutier.com/x86/movmskps - ('movmskps xmm gpr', ('1*p0', 3)), - ('vmovmskps xmm gpr', ('1*p0', 3)), - ('vmovmskps ymm gpr', ('1*p0', 3)), - - # https://www.felixcloutier.com/x86/movsx:movsxd - ('movsx gpr gpr', ('1*p0156', 1)), - ('movsb gpr gpr', ('1*p0156', 1)), # AT&T version - ('movsw gpr gpr', ('1*p0156', 1)), # AT&T version - ('movsl gpr gpr', ('1*p0156', 1)), # AT&T version - ('movsq gpr gpr', ('1*p0156', 1)), # AT&T version - - # https://www.felixcloutier.com/x86/movzx - ('movzx gpr gpr', ('1*p0156', 1)), - ('movzb gpr gpr', ('1*p0156', 1)), # AT&T version - ('movzw gpr gpr', ('1*p0156', 1)), # AT&T version - ('movzl gpr gpr', ('1*p0156', 1)), # AT&T version - ('movzq gpr gpr', ('1*p0156', 1)), # AT&T version - - # https://www.felixcloutier.com/x86/cmovcc - ('cmova gpr gpr', ('1*p0156+2*p06', 2)), - ('cmova mem gpr', ('1*p0156+2*p06', 2)), - ('cmovae gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovae mem gpr', ('1*p0156+2*p06', 2)), - ('cmovb gpr gpr', ('1*p0156+2*p06', 2)), - ('cmovb mem gpr', ('1*p0156+1*p06', 2)), - ('cmovbe gpr gpr', ('1*p0156+2*p06', 2)), - ('cmovbe mem gpr', ('1*p0156+2*p06', 2)), - ('cmovc gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovc mem gpr', ('1*p0156+1*p06', 2)), - ('cmove gpr gpr', ('1*p0156+1*p06', 2)), - ('cmove mem gpr', ('1*p0156+1*p06', 2)), - ('cmovg gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovg mem gpr', ('1*p0156+1*p06', 2)), - ('cmovge gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovge mem gpr', ('1*p0156+1*p06', 2)), - ('cmovl gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovl mem gpr', ('1*p0156+1*p06', 2)), - ('cmovle gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovle mem gpr', ('1*p0156+1*p06', 2)), - ('cmovna gpr gpr', ('1*p0156+2*p06', 2)), - ('cmovna mem gpr', ('1*p0156+2*p06', 2)), - ('cmovnae gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnae mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnb gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnb mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)), - ('cmovnbe mem gpr', ('1*p0156+2*p06', 2)), - ('cmovnb gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnb mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnc gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnc mem gpr', ('1*p0156+1*p06', 2)), - ('cmovne gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovne mem gpr', ('1*p0156+1*p06', 2)), - ('cmovng gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovng mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnge gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnge mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnl gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnl mem gpr', ('1*p0156+1*p06', 2)), - ('cmovno gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovno mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnp gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnp mem gpr', ('1*p0156+1*p06', 2)), - ('cmovns gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovns mem gpr', ('1*p0156+1*p06', 2)), - ('cmovnz gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovnz mem gpr', ('1*p0156+1*p06', 2)), - ('cmovo gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovo mem gpr', ('1*p0156+1*p06', 2)), - ('cmovp gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovp mem gpr', ('1*p0156+1*p06', 2)), - ('cmovpe gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovpe mem gpr', ('1*p0156+1*p06', 2)), - ('cmovpo gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovpo mem gpr', ('1*p0156+1*p06', 2)), - ('cmovs gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovs mem gpr', ('1*p0156+1*p06', 2)), - ('cmovz gpr gpr', ('1*p0156+1*p06', 2)), - ('cmovz mem gpr', ('1*p0156+1*p06', 2)), - - # https://www.felixcloutier.com/x86/pmovmskb - ('pmovmskb mm gpr', ('1*p0', 3)), - ('pmovmskb xmm gpr', ('1*p0', 3)), - ('vpmovmskb xmm gpr', ('1*p0', 3)), - ('vpmovmskb ymm gpr', ('1*p0', 3)), - - # https://www.felixcloutier.com/x86/pmovsx - ('pmovsxbw xmm xmm', ('1*p5', 1)), - ('pmovsxbw mem xmm', ('1*p5', 1)), - ('pmovsxbd xmm xmm', ('1*p5', 1)), - ('pmovsxbd mem xmm', ('1*p5', 1)), - ('pmovsxbq xmm xmm', ('1*p5', 1)), - ('pmovsxbq mem xmm', ('1*p5', 1)), - ('vpmovsxbw xmm xmm', ('1*p5', 1)), - ('vpmovsxbw mem xmm', ('1*p5', 1)), - ('vpmovsxbd xmm xmm', ('1*p5', 1)), - ('vpmovsxbd mem xmm', ('1*p5', 1)), - ('vpmovsxbq xmm xmm', ('1*p5', 1)), - ('vpmovsxbq mem xmm', ('1*p5', 1)), - ('vpmovsxbw ymm ymm', ('1*p5', 1)), - ('vpmovsxbw mem ymm', ('1*p5', 1)), - ('vpmovsxbd ymm ymm', ('1*p5', 1)), - ('vpmovsxbd mem ymm', ('1*p5', 1)), - ('vpmovsxbq ymm ymm', ('1*p5', 1)), - ('vpmovsxbq mem ymm', ('1*p5', 1)), - - # https://www.felixcloutier.com/x86/pmovzx - ('pmovzxbw xmm xmm', ('1*p5', 1)), - ('pmovzxbw mem xmm', ('1*p5', 1)), - ('vpmovzxbw xmm xmm', ('1*p5', 1)), - ('vpmovzxbw mem xmm', ('1*p5', 1)), - ('vpmovzxbw ymm ymm', ('1*p5', 1)), - ('vpmovzxbw mem ymm', ('1*p5', 1)), -]).items()) - -bdw_mov_instructions = list(OrderedDict(hsw_mov_instructions + [ - # https://www.felixcloutier.com/x86/cmovcc - ('cmova gpr gpr', ('2*p06', 1)), - ('cmova mem gpr', ('2*p06', 1)), - ('cmovae gpr gpr', ('1*p06', 1)), - ('cmovae mem gpr', ('2*p06', 1)), - ('cmovb gpr gpr', ('2*p06', 1)), - ('cmovb mem gpr', ('1*p06', 1)), - ('cmovbe gpr gpr', ('2*p06', 1)), - ('cmovbe mem gpr', ('2*p06', 1)), - ('cmovc gpr gpr', ('1*p06', 1)), - ('cmovc mem gpr', ('1*p06', 1)), - ('cmove gpr gpr', ('1*p06', 1)), - ('cmove mem gpr', ('1*p06', 1)), - ('cmovg gpr gpr', ('1*p06', 1)), - ('cmovg mem gpr', ('1*p06', 1)), - ('cmovge gpr gpr', ('1*p06', 1)), - ('cmovge mem gpr', ('1*p06', 1)), - ('cmovl gpr gpr', ('1*p06', 1)), - ('cmovl mem gpr', ('1*p06', 1)), - ('cmovle gpr gpr', ('1*p06', 1)), - ('cmovle mem gpr', ('1*p06', 1)), - ('cmovna gpr gpr', ('2*p06', 1)), - ('cmovna mem gpr', ('2*p06', 1)), - ('cmovnae gpr gpr', ('1*p06', 1)), - ('cmovnae mem gpr', ('1*p06', 1)), - ('cmovnb gpr gpr', ('1*p06', 1)), - ('cmovnb mem gpr', ('1*p06', 1)), - ('cmovnbe gpr gpr', ('2*p06', 1)), - ('cmovnbe mem gpr', ('2*p06', 1)), - ('cmovnb gpr gpr', ('1*p06', 1)), - ('cmovnb mem gpr', ('1*p06', 1)), - ('cmovnc gpr gpr', ('1*p06', 1)), - ('cmovnc mem gpr', ('1*p06', 1)), - ('cmovne gpr gpr', ('1*p06', 1)), - ('cmovne mem gpr', ('1*p06', 1)), - ('cmovng gpr gpr', ('1*p06', 1)), - ('cmovng mem gpr', ('1*p06', 1)), - ('cmovnge gpr gpr', ('1*p06', 1)), - ('cmovnge mem gpr', ('1*p06', 1)), - ('cmovnl gpr gpr', ('1*p06', 1)), - ('cmovnl mem gpr', ('1*p06', 1)), - ('cmovno gpr gpr', ('1*p06', 1)), - ('cmovno mem gpr', ('1*p06', 1)), - ('cmovnp gpr gpr', ('1*p06', 1)), - ('cmovnp mem gpr', ('1*p06', 1)), - ('cmovns gpr gpr', ('1*p06', 1)), - ('cmovns mem gpr', ('1*p06', 1)), - ('cmovnz gpr gpr', ('1*p06', 1)), - ('cmovnz mem gpr', ('1*p06', 1)), - ('cmovo gpr gpr', ('1*p06', 1)), - ('cmovo mem gpr', ('1*p06', 1)), - ('cmovp gpr gpr', ('1*p06', 1)), - ('cmovp mem gpr', ('1*p06', 1)), - ('cmovpe gpr gpr', ('1*p06', 1)), - ('cmovpe mem gpr', ('1*p06', 1)), - ('cmovpo gpr gpr', ('1*p06', 1)), - ('cmovpo mem gpr', ('1*p06', 1)), - ('cmovs gpr gpr', ('1*p06', 1)), - ('cmovs mem gpr', ('1*p06', 1)), - ('cmovz gpr gpr', ('1*p06', 1)), - ('cmovz mem gpr', ('1*p06', 1)), -]).items()) - -skx_mov_instructions = list(OrderedDict(bdw_mov_instructions + [ - # https://www.felixcloutier.com/x86/movapd - # TODO with masking! - # TODO the following may eliminate or be bound to 1*p0156: - # ('movapd xmm xmm', ('1*p5', 1)), - # ('vmovapd xmm xmm', ('1*p5', 1)), - # ('vmovapd ymm ymm', ('1*p5', 1)), - - # https://www.felixcloutier.com/x86/movaps - # TODO with masking! - # TODO the following may eliminate or be bound to 1*p0156: - # ('movaps xmm xmm', ('1*p5', 1)), - # ('vmovaps xmm xmm', ('1*p5', 1)), - # ('vmovaps ymm ymm', ('1*p5', 1)), - - # https://www.felixcloutier.com/x86/movbe - ('movbe gpr mem', ('1*p15', 4)), - ('movbe mem gpr', ('1*p15', 4)), - - # https://www.felixcloutier.com/x86/movddup - # TODO with masking! - - # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 - # TODO with masking! - - # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 - # TODO with masking! - - # https://www.felixcloutier.com/x86/movntdq - ('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use? - - # https://www.felixcloutier.com/x86/movntdqa - ('vmovntdqa mem zmm', ('', 0)), - - # https://www.felixcloutier.com/x86/movntpd - ('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use? - - # https://www.felixcloutier.com/x86/movntps - ('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use? - - # https://www.felixcloutier.com/x86/movq2dq - ('movq2dq mm xmm', ('1*p0+1*p015', 1)), - - # https://www.felixcloutier.com/x86/movsd - # TODO with masking! - - # https://www.felixcloutier.com/x86/movshdup - # TODO with masking! - - # https://www.felixcloutier.com/x86/movsldup - # TODO with masking! - - # https://www.felixcloutier.com/x86/movss - # TODO with masking! - - # https://www.felixcloutier.com/x86/movupd - # TODO with masking! - - # https://www.felixcloutier.com/x86/movups - # TODO with masking! - - # https://www.felixcloutier.com/x86/pmovsx - # TODO with masking! - ('vpmovsxbw ymm zmm', ('1*p5', 3)), - ('vpmovsxbw mem zmm', ('1*p5', 1)), -]).items()) - -csx_mov_instructions = OrderedDict(skx_mov_instructions + [ - -]).items() def get_description(arch, rhs_comment=None): descriptions = { @@ -803,7 +763,7 @@ def get_description(arch, rhs_comment=None): 'hsw': '\n'.join([p7.process_item(*item) for item in hsw_mov_instructions]), 'bdw': '\n'.join([p7.process_item(*item) for item in bdw_mov_instructions]), 'skx': '\n'.join([p7.process_item(*item) for item in skx_mov_instructions]), - 'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions]) + 'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions]), } description = descriptions[arch] @@ -813,20 +773,21 @@ def get_description(arch, rhs_comment=None): commented_description = "" for l in descriptions[arch].split('\n'): - commented_description += ("{:<"+str(max_length)+"} # {}\n").format(l, rhs_comment) + commented_description += ("{:<" + str(max_length) + "} # {}\n").format(l, rhs_comment) description = commented_description return description + if __name__ == '__main__': import sys + if len(sys.argv) != 2: print("Usage: {} (snb|ivb|hsw|bdw|skx|csx)".format(sys.argv[0])) sys.exit(0) - + try: print(get_description(sys.argv[1], rhs_comment=' '.join(sys.argv))) except KeyError: print("Unknown architecture.") sys.exit(1) - diff --git a/osaca/data/isa/aarch64.yml b/osaca/data/isa/aarch64.yml index 92cb095..683bac1 100644 --- a/osaca/data/isa/aarch64.yml +++ b/osaca/data/isa/aarch64.yml @@ -18,45 +18,28 @@ instruction_forms: - name: "fmla" operands: - class: "register" - prefix: "v" - shape: "s" + prefix: "*" + shape: "*" source: true destination: true - class: "register" - prefix: "v" - shape: "s" + prefix: "*" + shape: "*" source: true destination: false - class: "register" - prefix: "v" - shape: "s" + prefix: "*" + shape: "*" source: true destination: false - - name: "fmla" + - name: ldp operands: - class: "register" - prefix: "v" - shape: "d" - source: true - destination: true - - class: "register" - prefix: "v" - shape: "d" - source: true - destination: false - - class: "register" - prefix: "v" - shape: "d" - source: true - destination: false - - name: "ldp" - operands: - - class: "register" - prefix: "d" + prefix: "*" source: false destination: true - class: "register" - prefix: "d" + prefix: "*" source: false destination: true - class: "memory" @@ -68,52 +51,14 @@ instruction_forms: post-indexed: false source: true destination: false - - name: "ldp" + - name: ldp operands: - class: "register" - prefix: "q" + prefix: "*" source: false destination: true - class: "register" - prefix: "q" - source: false - destination: true - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: false - post-indexed: false - source: true - destination: false - - name: "ldp" - operands: - - class: "register" - prefix: "q" - source: false - destination: true - - class: "register" - prefix: "q" - source: false - destination: true - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: true - post-indexed: false - source: true - destination: true - - name: "ldp" - operands: - - class: "register" - prefix: "q" - source: false - destination: true - - class: "register" - prefix: "q" + prefix: "*" source: false destination: true - class: "memory" @@ -125,14 +70,63 @@ instruction_forms: post-indexed: true source: true destination: true - - name: "stp" + - name: ldp operands: - class: "register" - prefix: "d" + prefix: "*" + source: false + destination: true + - class: "register" + prefix: "*" + source: false + destination: true + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: true + post-indexed: false + source: true + destination: true + - name: [ldr, ldur] + operands: + - class: "register" + prefix: "*" + source: false + destination: true + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: false + post-indexed: true + source: true + destination: true + - name: [ldr, ldur] + operands: + - class: "register" + prefix: "*" + source: false + destination: true + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: true + post-indexed: false + source: true + destination: true + - name: stp + operands: + - class: "register" + prefix: "*" source: true destination: false - class: "register" - prefix: "d" + prefix: "*" source: true destination: false - class: "memory" @@ -144,14 +138,33 @@ instruction_forms: post-indexed: false source: false destination: true - - name: "stp" + - name: stp operands: - class: "register" - prefix: "q" + prefix: "*" source: true destination: false - class: "register" - prefix: "q" + prefix: "*" + source: true + destination: false + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: true + post-indexed: false + source: false + destination: true + - name: stp + operands: + - class: "register" + prefix: "*" + source: true + destination: false + - class: "register" + prefix: "*" source: true destination: false - class: "memory" @@ -160,73 +173,13 @@ instruction_forms: index: "*" scale: "*" pre-indexed: false - post-indexed: false + post-indexed: true source: false destination: true - - name: "str" + - name: [str, stur] operands: - class: "register" - prefix: "x" - source: true - destination: false - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: "*" - post-indexed: "*" - source: false - destination: true - - name: "str" - operands: - - class: "register" - prefix: "d" - source: true - destination: false - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: "*" - post-indexed: "*" - source: false - destination: true - - name: "str" - operands: - - class: "register" - prefix: "q" - source: true - destination: false - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: "*" - post-indexed: "*" - source: false - destination: true - - name: "stur" - operands: - - class: "register" - prefix: "q" - source: true - destination: false - - class: "memory" - base: "*" - offset: "*" - index: "*" - scale: "*" - pre-indexed: "*" - post-indexed: "*" - source: false - destination: true - - name: "stur" - operands: - - class: "register" - prefix: "d" + prefix: "*" source: true destination: false - class: "memory" diff --git a/osaca/data/model_importer.py b/osaca/data/model_importer.py index d9b7f7d..3b42eea 100755 --- a/osaca/data/model_importer.py +++ b/osaca/data/model_importer.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import os.path import argparse +import os.path import sys import xml.etree.ElementTree as ET from distutils.version import StrictVersion @@ -8,8 +8,23 @@ from distutils.version import StrictVersion from osaca.parser import get_parser from osaca.semantics import MachineModel -intel_archs = ['CON', 'WOL', 'NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL', - 'CNL', 'ICL'] +intel_archs = [ + 'CON', + 'WOL', + 'NHM', + 'WSM', + 'SNB', + 'IVB', + 'HSW', + 'BDW', + 'SKL', + 'SKX', + 'KBL', + 'CFL', + 'CNL', + 'ICL', +] +amd_archs = ['ZEN1', 'ZEN+', 'ZEN2'] def port_pressure_from_tag_attributes(attrib): @@ -19,6 +34,7 @@ def port_pressure_from_tag_attributes(attrib): for p in attrib['ports'].split('+'): cycles, ports = p.split('*') ports = ports.lstrip('p') + ports = ports.lstrip('FP') port_occupation.append([int(cycles), ports]) # Also consider div on DIV pipeline @@ -88,10 +104,10 @@ def extract_paramters(instruction_tag, parser, isa): return parameters -def extract_model(tree, arch): +def extract_model(tree, arch, skip_mem=True): try: isa = MachineModel.get_isa_for_arch(arch) - except: + except Exception: print("Skipping...", file=sys.stderr) return None mm = MachineModel(isa=isa) @@ -101,6 +117,7 @@ def extract_model(tree, arch): ignore = False mnemonic = instruction_tag.attrib['asm'] + iform = instruction_tag.attrib['iform'] # skip any mnemonic which contain spaces (e.g., "REX CRC32") if ' ' in mnemonic: continue @@ -118,6 +135,26 @@ def extract_model(tree, arch): arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]') if arch_tag is None: continue + # skip any instructions without port utilization + if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]): + print("Couldn't find port utilization, skip: ", iform, file=sys.stderr) + continue + # skip if computed and measured TP don't match + if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][ + 0 + ]: + print( + "Calculated TP from port utilization doesn't match TP, skip: ", + iform, + file=sys.stderr, + ) + continue + # skip if instruction contains memory operand + if skip_mem and any( + [x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')] + ): + print("Contains memory operand, skip: ", iform, file=sys.stderr) + continue # We collect all measurement and IACA information and compare them later for measurement_tag in arch_tag.iter('measurement'): if 'TP_ports' in measurement_tag.attrib: @@ -143,10 +180,14 @@ def extract_model(tree, arch): if 'max_cycles' in l_tag.attrib ] if latencies[1:] != latencies[:-1]: - print("Contradicting latencies found, using first:", mnemonic, latencies, - file=sys.stderr) + print( + "Contradicting latencies found, using smallest:", + iform, + latencies, + file=sys.stderr, + ) if latencies: - latency = latencies[0] + latency = min(latencies) if ignore: continue @@ -160,16 +201,14 @@ def extract_model(tree, arch): # Check if all are equal if port_pressure: if port_pressure[1:] != port_pressure[:-1]: - print( - "Contradicting port occupancies, using latest IACA:", - mnemonic, file=sys.stderr) + print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr) port_pressure = port_pressure[-1] else: # print("No data available for this architecture:", mnemonic, file=sys.stderr) continue - + # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake: - if arch.upper() in intel_archs and not arch.upper() in ['ICL']: + if arch.upper() in intel_archs and not arch.upper() in ['ICL']: if any([p['class'] == 'memory' for p in parameters]): # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode @@ -183,7 +222,7 @@ def extract_model(tree, arch): # Add (1, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4) if port_23 and not port_4: port_pressure.append((1, ['2D', '3D'])) - + # Add missing ports: for ports in [pp[1] for pp in port_pressure]: for p in ports: @@ -201,7 +240,7 @@ def rhs_comment(uncommented_string, comment): commented_string = "" for l in uncommented_string.split('\n'): - commented_string += ("{:<"+str(max_length)+"} # {}\n").format(l, comment) + commented_string += ("{:<" + str(max_length) + "} # {}\n").format(l, comment) return commented_string @@ -218,21 +257,33 @@ def main(): help='architecture to extract, use IACA abbreviations (e.g., SNB). ' 'if not given, all will be extracted and saved to file in CWD.', ) + parser.add_argument( + '--mem', + dest='skip_mem', + action='store_false', + help='add instruction forms including memory addressing operands, which are ' + 'skipped by default' + ) args = parser.parse_args() basename = os.path.basename(__file__) tree = ET.parse(args.xml) - print('Available architectures:', ', '.join(architectures(tree))) + print('# Available architectures:', ', '.join(architectures(tree))) if args.arch: - model = extract_model(tree, args.arch) + print('# Chosen architecture: {}'.format(args.arch)) + model = extract_model(tree, args.arch, args.skip_mem) if model is not None: - print(rhs_comment(model.dump(), basename+" "+sys.argv[0])) + print( + rhs_comment( + model.dump(), basename + " " + args.xml.split('/')[-1] + " " + args.arch + ) + ) else: for arch in architectures(tree): print(arch, end='') - model = extract_model(tree, arch.lower()) + model = extract_model(tree, arch.lower(), args.skip_mem) if model: - model_string = rhs_comment(model.dump(), basename+" "+arch) + model_string = rhs_comment(model.dump(), basename + " " + arch) with open('{}.yml'.format(arch.lower()), 'w') as f: f.write(model_string) diff --git a/osaca/data/n1.yml b/osaca/data/n1.yml new file mode 100644 index 0000000..3738df4 --- /dev/null +++ b/osaca/data/n1.yml @@ -0,0 +1,771 @@ +osaca_version: 0.3.3 +micro_architecture: Arm Neoverse N1 +arch_code: n1 +isa: AArch64 +ROB_size: 128 # wikichip +retired_uOps_per_cycle: 8 # wikichip +scheduler_size: 120 # wikichip +hidden_loads: false +load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 5.0, q: 6.0, v: 5.0, z: 4.0} +load_throughput: +- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]} +- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]} +- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]} +- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]} +- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]} +- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]} +load_throughput_default: [[1, '67']] +store_throughput: [] +store_throughput_default: [[1, '56'], [1, '67']] +ports: ['0', '1', '2', '3', '4', '4DV', '5', '6', '7'] +port_model_scheme: | + +----------------------------------------------------------------------------+ + | 120 entries | + +----------------------------------------------------------------------------+ + 0 |BR 1 |IS0 2 |IS1 3 |IM0 4 |FP0 5 |FP1 6 |LDST 7 |LDST + \/ \/ \/ \/ \/ \/ \/ \/ + +------+ +-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+ + |Branch| | INT | | INT | | INT | | FP ALU | | FP ALU | | AGU | | AGU | + +------+ | ALU | | ALU | | ALU | +--------+ +--------+ +-------+ +-------+ + +-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+ + +-----+ +-----+ | FP MUL | | FP MUL | |LD DATA| |LD DATA| + | ST | | INT | +--------+ +--------+ +-------+ +-------+ + | INT | | MUL | +--------+ +---------+ + +-----+ +-----+ | FP DIV | |SIMD SHFT| + +-----+ +--------+ +---------+ + | INT | +--------+ +--------+ + | DIV | | FMA | | FMA | + +-----+ +--------+ +--------+ + +-----+ +--------+ +--------+ + |SHIFT| | ST SIMD| | ST SIMD| + +-----+ | DATA | | DATA | + +-----+ +--------+ +--------+ + | ST | + | INT | + +-----+ +instruction_forms: +- name: add + operands: + - class: register + prefix: x + - class: register + prefix: x + - class: register + prefix: x + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] +- name: add + operands: + - class: register + prefix: x + - class: register + prefix: x + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] +- name: adds + operands: + - class: register + prefix: x + - class: register + prefix: x + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '132']] +- name: b.ne + operands: + - class: identifier + throughput: 1.0 + latency: 0.0 + port_pressure: [[1, '0']] +- name: b.gt + operands: + - class: identifier + throughput: 1.0 + latency: 0.0 + port_pressure: [[1, '0']] +- name: bne + operands: + - class: identifier + throughput: 1.0 + latency: 0.0 + port_pressure: [[1, '0']] +- name: cmp + operands: + - class: register + prefix: w + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] +- name: cmp + operands: + - class: register + prefix: x + - class: register + prefix: x + throughput: 0.3333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] +- name: dup + operands: + - class: register + prefix: d + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fadd + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fadd + operands: + - class: register + prefix: d + width: '*' + - class: register + prefix: d + width: '*' + - class: register + prefix: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fadd + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fdiv + operands: + - class: register + prefix: v + shape: s + width: 128 + - class: register + prefix: v + shape: s + width: 128 + - class: register + prefix: v + shape: s + width: 128 + throughput: 6.0 + latency: 8.0 # 1*p4+6*p4DV + port_pressure: [[1, '4'], [6, [4DV]]] +- name: fdiv + operands: + - class: register + prefix: v + shape: d + width: 128 + - class: register + prefix: v + shape: d + width: 128 + - class: register + prefix: v + shape: d + width: 128 + throughput: 10.0 + latency: 12.0 # 1*p4+10*p4DV + port_pressure: [[4, '0'], [10, [4DV]]] +- name: fmla + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fmla + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fmov + operands: + - {class: register, prefix: s} + - {class: immediate, imd: double} + latency: ~ # 1*p45 + port_pressure: [[1, '45']] + throughput: 0.5 +- name: fmul + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 0.5 + latency: 3.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fmul + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 3.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fmul + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: register + prefix: d + throughput: 0.5 + latency: 3.0 # 1*p45 + port_pressure: [[1, '45']] +- name: frecpe + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 2.0 + latency: 4.0 # 1*p4 + port_pressure: [[2, '4']] +- name: frecpe + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 1.0 + latency: 3.0 # 1*p4 + port_pressure: [[1, '4']] +- name: fsub + operands: + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + - class: register + prefix: v + shape: s + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: fsub + operands: + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + - class: register + prefix: v + shape: d + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: ldp + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: memory + base: x + offset: imd + index: ~ + scale: 1 + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 5.0 # 2*p67, from n1 opt guide + port_pressure: [[2, '67']] +- name: ldp + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: memory + base: x + offset: imd + index: ~ + scale: 1 + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 5.0 # 2*p67+1*p123, from n1 opt guide + port_pressure: [[2, '67'], [1, '123']] +- name: ldp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: 1 + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 7.0 # 2*p67, from n1 opt guide + port_pressure: [[2, '67']] +- name: ldp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: ~ + index: ~ + scale: 1 + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 7.0 # 2*p67+1*p123, from n1 opt guide + port_pressure: [[2, '56'], [1, '123']] +- name: ldp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 7.0 # 2*p67 + port_pressure: [[2, '67']] +- name: ldp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: true + post-indexed: false + throughput: 1.0 + latency: 7.0 # 2*p67+1*p123 + port_pressure: [[2, '67'], [1, '123']] +- name: ldp + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 5.0 # 2*p67+1*p123 + port_pressure: [[2, '67'], [1, '123']] +- name: ldur # JL: assumed from n1 opt guide + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 6.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 6.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 5.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: d + - class: memory + base: x + offset: imd + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 5.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + post-indexed: false + pre-indexed: false + throughput: 0.5 + latency: 5.0 # 1*p67 + port_pressure: [[1, '67']] +- name: ldr + operands: + - class: register + prefix: x + - class: register + prefix: x + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: ldr + operands: + - class: register + prefix: q + - class: register + prefix: q + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: ldr + operands: + - class: register + prefix: d + - class: register + prefix: d + throughput: 0.0 + latency: 0.0 + port_pressure: [] +- name: mov + operands: + - class: register + prefix: x + - class: register + prefix: x + throughput: 0.25 + latency: 1.0 # 1*p3456 + port_pressure: [[1, '3456']] +- name: mov + operands: + - class: register + prefix: v + shape: b + width: '*' + - class: register + prefix: v + shape: b + width: '*' + throughput: 0.5 + latency: 2.0 # 1*p45 + port_pressure: [[1, '45']] +- name: stp + operands: + - class: register + prefix: d + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 2*p45+1*p67 + port_pressure: [[2, '45'], [1, '67']] +- name: stp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 0 # 2*p45+2*p67+1*123 + port_pressure: [[2, '45'], [2, '67'], [1, '123']] +- name: stp + operands: + - class: register + prefix: q + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 2*p45+2*p67 + port_pressure: [[2, '45'], [2, '67']] +- name: stur # JL: assumed from n1 opt guide + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 0.5 + latency: 0 # 1*p67+1*p23 + port_pressure: [[1, '56'], [1, '23']] +- name: stur # JL: assumed from n1 opt guide + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 2*p67+1*p45 + port_pressure: [[2, '67'], [1, '45']] +- name: str + operands: + - class: register + prefix: x + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 0.5 + latency: 0 # 1*p67+1*p23 + port_pressure: [[1, '56'], [1, '23']] +- name: str + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: false + throughput: 0.5 + latency: 0 # 1*p67+1*p45 + port_pressure: [[1, '67'], [1, '45']] +- name: str + operands: + - class: register + prefix: d + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 0.5 + latency: 0 # 1*p67+1*p45+1*p123 + port_pressure: [[1, '67'], [1, '45'], [1, '123']] +- name: str + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: 1 + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: 0 # 2*p67+1*p45 + port_pressure: [[1, '67'], [1, '45']] +- name: str + operands: + - class: register + prefix: q + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 0 # 1*p67+1*p45+1*123 + port_pressure: [[1, '67'], [1, '45'], [1, '123']] +- name: str + operands: + - class: register + prefix: x + - class: memory + base: x + offset: '*' + index: '*' + scale: '*' + pre-indexed: false + post-indexed: true + throughput: 1.0 + latency: 0 # 1*p67+1*p23+1*p123 + port_pressure: [[1, '67'], [1, '23'], [1, '123']] +- name: sub + operands: + - class: register + prefix: w + - class: register + prefix: w + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p123 + port_pressure: [[1, '123']] diff --git a/osaca/data/tx2.yml b/osaca/data/tx2.yml index 78f85bd..8c9765f 100644 --- a/osaca/data/tx2.yml +++ b/osaca/data/tx2.yml @@ -267,6 +267,34 @@ instruction_forms: throughput: 0.5 latency: 6.0 # 1*p01 port_pressure: [[1, '01']] +- name: frecpe + operands: + - class: register + prefix: v + shape: s + - class: register + prefix: v + shape: s + - class: register + prefix: v + shape: s + throughput: 0.5 + latency: 5.0 # 1*p01 + port_pressure: [[1, '01']] +- name: frecpe + operands: + - class: register + prefix: v + shape: d + - class: register + prefix: v + shape: d + - class: register + prefix: v + shape: d + throughput: 0.5 + latency: 5.0 # 1*p01 + port_pressure: [[1, '01']] - name: fsub operands: - class: register diff --git a/osaca/osaca.py b/osaca/osaca.py index 35d5965..afe95e9 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -17,7 +17,20 @@ MODULE_DATA_DIR = os.path.join( ) LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/') DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/') -SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ICL', 'ZEN1', 'ZEN2', 'TX2', 'A64FX'] +SUPPORTED_ARCHS = [ + 'SNB', + 'IVB', + 'HSW', + 'BDW', + 'SKX', + 'CSX', + 'ICL', + 'ZEN1', + 'ZEN2', + 'TX2', + 'N1', + 'A64FX', +] # Stolen from pip @@ -71,7 +84,8 @@ def create_parser(parser=None): parser.add_argument( '--arch', type=str, - help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, A64FX).', + help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, N1, ' + 'A64FX).', ) parser.add_argument( '--fixed', diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 1fa3a1e..91019ce 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -242,6 +242,7 @@ class MachineModel(object): arch_dict = { 'a64fx': 'aarch64', 'tx2': 'aarch64', + 'n1': 'aarch64', 'zen1': 'x86', 'zen+': 'x86', 'zen2': 'x86',