Merge branch 'master' into A64FX

This commit is contained in:
JanLJL
2020-07-13 14:41:49 +02:00
7 changed files with 1306 additions and 527 deletions

View File

@@ -9,8 +9,8 @@ class MOVEntryBuilder:
port_occupancy = defaultdict(Fraction)
for uops, ports in port_pressure:
for p in ports:
port_occupancy[p] += Fraction(uops, len(ports))
return float(max(list(port_occupancy.values())+[0]))
port_occupancy[p] += Fraction(uops, len(ports))
return float(max(list(port_occupancy.values()) + [0]))
@staticmethod
def classify(operands_types):
@@ -18,10 +18,10 @@ class MOVEntryBuilder:
store = 'mem' in operands_types[-1:]
assert not (load and store), "Can not process a combined load-store instruction."
return load, store
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0, comment=None):
self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None
):
if comment:
comment = " # " + comment
else:
@@ -32,10 +32,7 @@ class MOVEntryBuilder:
if ot == 'imd':
description += ' - class: immediate\n imd: int\n'
elif ot.startswith('mem'):
description += (
' - class: memory\n'
' base: "*"\n'
' offset: "*"\n')
description += ' - class: memory\n' ' base: "*"\n' ' offset: "*"\n'
if ot == 'mem_simple':
description += ' index: ~\n'
elif ot == 'mem_complex':
@@ -45,18 +42,20 @@ class MOVEntryBuilder:
description += ' scale: "*"\n'
else:
description += ' - class: register\n name: {}\n'.format(ot)
description += (
' latency: {latency}\n'
' port_pressure: {port_pressure!r}\n'
' throughput: {throughput}\n'
' uops: {uops}\n').format(
latency=latency,
port_pressure=port_pressure,
throughput=self.compute_throughput(port_pressure),
uops=sum([i for i,p in port_pressure]))
' uops: {uops}\n'
).format(
latency=latency,
port_pressure=port_pressure,
throughput=self.compute_throughput(port_pressure),
uops=sum([i for i, p in port_pressure]),
)
return description
def parse_port_pressure(self, port_pressure_str):
"""
Example:
@@ -68,7 +67,7 @@ class MOVEntryBuilder:
cycles, ports = p.split('*p')
port_pressure.append([int(cycles), ports])
return port_pressure
def process_item(self, instruction_form, resources):
"""
Example:
@@ -84,9 +83,7 @@ class MOVEntryBuilder:
class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
# for SNB and IVB
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0):
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
load, store = self.classify(operand_types)
comment = None
@@ -100,15 +97,14 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
comment = "with store"
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency, comment)
self, instruction_name, operand_types, port_pressure, latency, comment
)
class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
# for HSW, BDW, SKX and CSX
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0):
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
load, store = self.classify(operand_types)
if load:
@@ -116,7 +112,8 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
latency += 4
comment = "with load"
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency, comment)
self, instruction_name, operand_types, port_pressure, latency, comment
)
if store:
port_pressure_simple = port_pressure + [[1, '237'], [1, '4']]
operands_simple = ['mem_simple' if o == 'mem' else o for o in operand_types]
@@ -125,16 +122,28 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
latency += 0
return (
MOVEntryBuilder.build_description(
self, instruction_name, operands_simple, port_pressure_simple, latency,
"with store, simple AGU") +
'\n' +
MOVEntryBuilder.build_description(
self, instruction_name, operands_complex, port_pressure_complex, latency,
"with store, complex AGU"))
self,
instruction_name,
operands_simple,
port_pressure_simple,
latency,
"with store, simple AGU",
)
+ '\n'
+ MOVEntryBuilder.build_description(
self,
instruction_name,
operands_complex,
port_pressure_complex,
latency,
"with store, complex AGU",
)
)
# Register only:
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency)
self, instruction_name, operand_types, port_pressure, latency
)
np7 = MOVEntryBuilderIntelNoPort7AGU()
@@ -149,7 +158,6 @@ snb_mov_instructions = [
('mov imd gpr', ('1*p015', 1)),
('mov imd mem', ('', 0)),
('movabs imd gpr', ('1*p015', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('1*p5', 1)),
('movapd xmm mem', ('', 0)),
@@ -160,7 +168,6 @@ snb_mov_instructions = [
('vmovapd ymm ymm', ('1*p5', 1)),
('vmovapd ymm mem', ('', 0)),
('vmovapd mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('1*p5', 1)),
('movaps xmm mem', ('', 0)),
@@ -171,7 +178,6 @@ snb_mov_instructions = [
('vmovaps ymm ymm', ('1*p5', 1)),
('movaps ymm mem', ('', 0)),
('movaps mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movd:movq
('movd gpr mm', ('1*p5', 1)),
('movd mem mm', ('', 0)),
@@ -197,7 +203,6 @@ snb_mov_instructions = [
('vmovd xmm mem', ('', 0)),
('vmovq xmm gpr', ('1*p0', 1)),
('vmovq xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movddup
('movddup xmm xmm', ('1*p5', 1)),
('movddup mem xmm', ('', 0)),
@@ -205,10 +210,8 @@ snb_mov_instructions = [
('vmovddup mem xmm', ('', 0)),
('vmovddup ymm ymm', ('1*p5', 1)),
('vmovddup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdq2q
('movdq2q xmm mm', ('1*p015+1*p5', 1)),
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('1*p015', 1)),
('movdqa mem xmm', ('', 0)),
@@ -219,7 +222,6 @@ snb_mov_instructions = [
('vmovdqa ymm ymm', ('1*p05', 1)),
('vmovdqa mem ymm', ('', 0)),
('vmovdqa ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('1*p015', 1)),
('movdqu mem xmm', ('', 0)),
@@ -230,75 +232,60 @@ snb_mov_instructions = [
('vmovdqu ymm ymm', ('1*p05', 1)),
('vmovdqu mem ymm', ('', 0)),
('vmovdqu ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movhlps
('movhlps xmm xmm', ('1*p5', 1)),
('vmovhlps xmm xmm xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movhpd
('movhpd mem xmm', ('1*p5', 1)),
('vmovhpd mem xmm xmm', ('1*p5', 1)),
('movhpd xmm mem', ('', 0)),
('vmovhpd mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movhps
('movhps mem xmm', ('1*p5', 1)),
('vmovhps mem xmm xmm', ('1*p5', 1)),
('movhps xmm mem', ('', 0)),
('vmovhps mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movlhps
('movlhps xmm xmm', ('1*p5', 1)),
('vmovlhps xmm xmm xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movlpd
('movlpd mem xmm', ('1*p5', 1)),
('vmovlpd mem xmm xmm', ('1*p5', 1)),
('movlpd xmm mem', ('', 0)),
('vmovlpd mem xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movlps
('movlps mem xmm', ('1*p5', 1)),
('vmovlps mem xmm xmm', ('1*p5', 1)),
('movlps xmm mem', ('', 0)),
('vmovlps mem xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 2)),
('vmovmskpd xmm gpr', ('1*p0', 2)),
('vmovmskpd ymm gpr', ('1*p0', 2)),
# https://www.felixcloutier.com/x86/movmskps
('movmskps xmm gpr', ('1*p0', 1)),
('vmovmskps xmm gpr', ('1*p0', 1)),
('vmovmskps ymm gpr', ('1*p0', 1)),
# https://www.felixcloutier.com/x86/movntdq
('movntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntdq ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('movntdqa mem xmm', ('', 0)),
('vmovntdqa mem xmm', ('', 0)),
('vmovntdqa mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movnti
('movnti gpr mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntpd
('movntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntpd ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('movntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntps ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntq
('movntq mm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq
('movq mm mm', ('', 0)),
('movq mem mm', ('', 0)),
@@ -309,14 +296,11 @@ snb_mov_instructions = [
('vmovq xmm xmm', ('1*p015', 1)),
('vmovq mem xmm', ('', 0)),
('vmovq xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p015', 1)),
# https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq
# TODO combined load-store is currently not supported
# ('movs mem mem', ()),
# https://www.felixcloutier.com/x86/movsd
('movsd xmm xmm', ('1*p5', 1)),
('movsd mem xmm', ('', 0)),
@@ -324,7 +308,6 @@ snb_mov_instructions = [
('vmovsd xmm xmm xmm', ('1*p5', 1)),
('vmovsd mem xmm', ('', 0)),
('vmovsd xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movshdup
('movshdup xmm xmm', ('1*p5', 1)),
('movshdup mem xmm', ('', 0)),
@@ -332,7 +315,6 @@ snb_mov_instructions = [
('vmovshdup mem xmm', ('', 0)),
('vmovshdup ymm ymm', ('1*p5', 1)),
('vmovshdup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movsldup
('movsldup xmm xmm', ('1*p5', 1)),
('movsldup mem xmm', ('', 0)),
@@ -340,7 +322,6 @@ snb_mov_instructions = [
('vmovsldup mem xmm', ('', 0)),
('vmovsldup ymm ymm', ('1*p5', 1)),
('vmovsldup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movss
('movss xmm xmm', ('1*p5', 1)),
('movss mem xmm', ('', 0)),
@@ -349,7 +330,6 @@ snb_mov_instructions = [
('vmovss xmm xmm', ('1*p5', 1)),
('vmovss xmm mem', ('', 0)),
('movss mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('1*p015', 1)),
('movsx mem gpr', ('', 0)),
@@ -363,7 +343,6 @@ snb_mov_instructions = [
('movsl mem gpr', ('', 0)), # AT&T version
('movsq gpr gpr', ('1*p015', 1)), # AT&T version
('movsq mem gpr', ('', 0)), # AT&T version
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('1*p5', 1)),
('movupd mem xmm', ('', 0)),
@@ -374,7 +353,6 @@ snb_mov_instructions = [
('vmovupd ymm ymm', ('1*p5', 1)),
('vmovupd mem ymm', ('', 0)),
('vmovupd ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movups
('movups xmm xmm', ('1*p5', 1)),
('movups mem xmm', ('', 0)),
@@ -385,7 +363,6 @@ snb_mov_instructions = [
('vmovups ymm ymm', ('1*p5', 1)),
('vmovups mem ymm', ('', 0)),
('vmovups ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('1*p015', 1)),
('movzx mem gpr', ('', 0)),
@@ -397,7 +374,6 @@ snb_mov_instructions = [
('movzl mem gpr', ('', 0)), # AT&T version
('movzq gpr gpr', ('1*p015', 1)), # AT&T version
('movzq mem gpr', ('', 0)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p015+2*p05', 2)),
('cmova mem gpr', ('1*p015+2*p05', 2)),
@@ -459,12 +435,10 @@ snb_mov_instructions = [
('cmovs mem gpr', ('1*p015+1*p05', 2)),
('cmovz gpr gpr', ('1*p015+1*p05', 2)),
('cmovz mem gpr', ('1*p015+1*p05', 2)),
# https://www.felixcloutier.com/x86/pmovmskb
('pmovmskb mm gpr', ('1*p0', 2)),
('pmovmskb xmm gpr', ('1*p0', 2)),
('vpmovmskb xmm gpr', ('1*p0', 2)),
# https://www.felixcloutier.com/x86/pmovsx
('pmovsxbw xmm xmm', ('1*p15', 1)),
('pmovsxbw mem xmm', ('1*p15', 1)),
@@ -484,7 +458,6 @@ snb_mov_instructions = [
('vpmovsxbd mem ymm', ('1*p15', 1)),
('vpmovsxbq ymm ymm', ('1*p15', 1)),
('vpmovsxbq mem ymm', ('1*p15', 1)),
# https://www.felixcloutier.com/x86/pmovzx
('pmovzxbw xmm xmm', ('1*p15', 1)),
('pmovzxbw mem xmm', ('1*p15', 1)),
@@ -494,307 +467,294 @@ snb_mov_instructions = [
('vpmovzxbw mem ymm', ('1*p15', 1)),
]
ivb_mov_instructions = list(OrderedDict(snb_mov_instructions + [
# https://www.felixcloutier.com/x86/mov
('mov gpr gpr', ('', 0)),
('mov imd gpr', ('', 0)),
ivb_mov_instructions = list(
OrderedDict(
snb_mov_instructions
+ [
# https://www.felixcloutier.com/x86/mov
('mov gpr gpr', ('', 0)),
('mov imd gpr', ('', 0)),
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('', 0)),
('vmovapd xmm xmm', ('', 0)),
('vmovapd ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('', 0)),
('vmovaps xmm xmm', ('', 0)),
('vmovaps ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('', 0)),
('vmovdqa xmm xmm', ('', 0)),
('vmovdqa ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('', 0)),
('vmovdqu xmm xmm', ('', 0)),
('vmovdqu ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('', 0)),
('vmovupd xmm xmm', ('', 0)),
('vmovupd ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movupd
('movups xmm xmm', ('', 0)),
('vmovups xmm xmm', ('', 0)),
('vmovups ymm ymm', ('', 0)),
]
).items()
)
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('', 0)),
('vmovapd xmm xmm', ('', 0)),
('vmovapd ymm ymm', ('', 0)),
hsw_mov_instructions = list(
OrderedDict(
ivb_mov_instructions
+ [
# https://www.felixcloutier.com/x86/mov
('mov imd gpr', ('1*p0156', 1)),
('mov gpr gpr', ('1*p0156', 1)),
('movabs imd gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 6)),
('movbe mem gpr', ('1*p15', 6)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movmskps
('movmskps xmm gpr', ('1*p0', 3)),
('vmovmskps xmm gpr', ('1*p0', 3)),
('vmovmskps ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('1*p0156', 1)),
('movsb gpr gpr', ('1*p0156', 1)), # AT&T version
('movsw gpr gpr', ('1*p0156', 1)), # AT&T version
('movsl gpr gpr', ('1*p0156', 1)), # AT&T version
('movsq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('1*p0156', 1)),
('movzb gpr gpr', ('1*p0156', 1)), # AT&T version
('movzw gpr gpr', ('1*p0156', 1)), # AT&T version
('movzl gpr gpr', ('1*p0156', 1)), # AT&T version
('movzq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p0156+2*p06', 2)),
('cmova mem gpr', ('1*p0156+2*p06', 2)),
('cmovae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovae mem gpr', ('1*p0156+2*p06', 2)),
('cmovb gpr gpr', ('1*p0156+2*p06', 2)),
('cmovb mem gpr', ('1*p0156+1*p06', 2)),
('cmovbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovc mem gpr', ('1*p0156+1*p06', 2)),
('cmove gpr gpr', ('1*p0156+1*p06', 2)),
('cmove mem gpr', ('1*p0156+1*p06', 2)),
('cmovg gpr gpr', ('1*p0156+1*p06', 2)),
('cmovg mem gpr', ('1*p0156+1*p06', 2)),
('cmovge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovge mem gpr', ('1*p0156+1*p06', 2)),
('cmovl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovl mem gpr', ('1*p0156+1*p06', 2)),
('cmovle gpr gpr', ('1*p0156+1*p06', 2)),
('cmovle mem gpr', ('1*p0156+1*p06', 2)),
('cmovna gpr gpr', ('1*p0156+2*p06', 2)),
('cmovna mem gpr', ('1*p0156+2*p06', 2)),
('cmovnae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnae mem gpr', ('1*p0156+1*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovnbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnc mem gpr', ('1*p0156+1*p06', 2)),
('cmovne gpr gpr', ('1*p0156+1*p06', 2)),
('cmovne mem gpr', ('1*p0156+1*p06', 2)),
('cmovng gpr gpr', ('1*p0156+1*p06', 2)),
('cmovng mem gpr', ('1*p0156+1*p06', 2)),
('cmovnge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnge mem gpr', ('1*p0156+1*p06', 2)),
('cmovnl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnl mem gpr', ('1*p0156+1*p06', 2)),
('cmovno gpr gpr', ('1*p0156+1*p06', 2)),
('cmovno mem gpr', ('1*p0156+1*p06', 2)),
('cmovnp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnp mem gpr', ('1*p0156+1*p06', 2)),
('cmovns gpr gpr', ('1*p0156+1*p06', 2)),
('cmovns mem gpr', ('1*p0156+1*p06', 2)),
('cmovnz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnz mem gpr', ('1*p0156+1*p06', 2)),
('cmovo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovo mem gpr', ('1*p0156+1*p06', 2)),
('cmovp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovp mem gpr', ('1*p0156+1*p06', 2)),
('cmovpe gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpe mem gpr', ('1*p0156+1*p06', 2)),
('cmovpo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpo mem gpr', ('1*p0156+1*p06', 2)),
('cmovs gpr gpr', ('1*p0156+1*p06', 2)),
('cmovs mem gpr', ('1*p0156+1*p06', 2)),
('cmovz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovz mem gpr', ('1*p0156+1*p06', 2)),
# https://www.felixcloutier.com/x86/pmovmskb
('pmovmskb mm gpr', ('1*p0', 3)),
('pmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/pmovsx
('pmovsxbw xmm xmm', ('1*p5', 1)),
('pmovsxbw mem xmm', ('1*p5', 1)),
('pmovsxbd xmm xmm', ('1*p5', 1)),
('pmovsxbd mem xmm', ('1*p5', 1)),
('pmovsxbq xmm xmm', ('1*p5', 1)),
('pmovsxbq mem xmm', ('1*p5', 1)),
('vpmovsxbw xmm xmm', ('1*p5', 1)),
('vpmovsxbw mem xmm', ('1*p5', 1)),
('vpmovsxbd xmm xmm', ('1*p5', 1)),
('vpmovsxbd mem xmm', ('1*p5', 1)),
('vpmovsxbq xmm xmm', ('1*p5', 1)),
('vpmovsxbq mem xmm', ('1*p5', 1)),
('vpmovsxbw ymm ymm', ('1*p5', 1)),
('vpmovsxbw mem ymm', ('1*p5', 1)),
('vpmovsxbd ymm ymm', ('1*p5', 1)),
('vpmovsxbd mem ymm', ('1*p5', 1)),
('vpmovsxbq ymm ymm', ('1*p5', 1)),
('vpmovsxbq mem ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/pmovzx
('pmovzxbw xmm xmm', ('1*p5', 1)),
('pmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw xmm xmm', ('1*p5', 1)),
('vpmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw ymm ymm', ('1*p5', 1)),
('vpmovzxbw mem ymm', ('1*p5', 1)),
]
).items()
)
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('', 0)),
('vmovaps xmm xmm', ('', 0)),
('vmovaps ymm ymm', ('', 0)),
bdw_mov_instructions = list(
OrderedDict(
hsw_mov_instructions
+ [
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('2*p06', 1)),
('cmova mem gpr', ('2*p06', 1)),
('cmovae gpr gpr', ('1*p06', 1)),
('cmovae mem gpr', ('2*p06', 1)),
('cmovb gpr gpr', ('2*p06', 1)),
('cmovb mem gpr', ('1*p06', 1)),
('cmovbe gpr gpr', ('2*p06', 1)),
('cmovbe mem gpr', ('2*p06', 1)),
('cmovc gpr gpr', ('1*p06', 1)),
('cmovc mem gpr', ('1*p06', 1)),
('cmove gpr gpr', ('1*p06', 1)),
('cmove mem gpr', ('1*p06', 1)),
('cmovg gpr gpr', ('1*p06', 1)),
('cmovg mem gpr', ('1*p06', 1)),
('cmovge gpr gpr', ('1*p06', 1)),
('cmovge mem gpr', ('1*p06', 1)),
('cmovl gpr gpr', ('1*p06', 1)),
('cmovl mem gpr', ('1*p06', 1)),
('cmovle gpr gpr', ('1*p06', 1)),
('cmovle mem gpr', ('1*p06', 1)),
('cmovna gpr gpr', ('2*p06', 1)),
('cmovna mem gpr', ('2*p06', 1)),
('cmovnae gpr gpr', ('1*p06', 1)),
('cmovnae mem gpr', ('1*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnbe gpr gpr', ('2*p06', 1)),
('cmovnbe mem gpr', ('2*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnc gpr gpr', ('1*p06', 1)),
('cmovnc mem gpr', ('1*p06', 1)),
('cmovne gpr gpr', ('1*p06', 1)),
('cmovne mem gpr', ('1*p06', 1)),
('cmovng gpr gpr', ('1*p06', 1)),
('cmovng mem gpr', ('1*p06', 1)),
('cmovnge gpr gpr', ('1*p06', 1)),
('cmovnge mem gpr', ('1*p06', 1)),
('cmovnl gpr gpr', ('1*p06', 1)),
('cmovnl mem gpr', ('1*p06', 1)),
('cmovno gpr gpr', ('1*p06', 1)),
('cmovno mem gpr', ('1*p06', 1)),
('cmovnp gpr gpr', ('1*p06', 1)),
('cmovnp mem gpr', ('1*p06', 1)),
('cmovns gpr gpr', ('1*p06', 1)),
('cmovns mem gpr', ('1*p06', 1)),
('cmovnz gpr gpr', ('1*p06', 1)),
('cmovnz mem gpr', ('1*p06', 1)),
('cmovo gpr gpr', ('1*p06', 1)),
('cmovo mem gpr', ('1*p06', 1)),
('cmovp gpr gpr', ('1*p06', 1)),
('cmovp mem gpr', ('1*p06', 1)),
('cmovpe gpr gpr', ('1*p06', 1)),
('cmovpe mem gpr', ('1*p06', 1)),
('cmovpo gpr gpr', ('1*p06', 1)),
('cmovpo mem gpr', ('1*p06', 1)),
('cmovs gpr gpr', ('1*p06', 1)),
('cmovs mem gpr', ('1*p06', 1)),
('cmovz gpr gpr', ('1*p06', 1)),
('cmovz mem gpr', ('1*p06', 1)),
]
).items()
)
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('', 0)),
('vmovdqa xmm xmm', ('', 0)),
('vmovdqa ymm ymm', ('', 0)),
skx_mov_instructions = list(
OrderedDict(
bdw_mov_instructions
+ [
# https://www.felixcloutier.com/x86/movapd
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movapd xmm xmm', ('1*p5', 1)),
# ('vmovapd xmm xmm', ('1*p5', 1)),
# ('vmovapd ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movaps
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movaps xmm xmm', ('1*p5', 1)),
# ('vmovaps xmm xmm', ('1*p5', 1)),
# ('vmovaps ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 4)),
('movbe mem gpr', ('1*p15', 4)),
# https://www.felixcloutier.com/x86/movddup
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
# TODO with masking!
# https://www.felixcloutier.com/x86/movntdq
('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('vmovntdqa mem zmm', ('', 0)),
# https://www.felixcloutier.com/x86/movntpd
('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p0+1*p015', 1)),
# https://www.felixcloutier.com/x86/movsd
# TODO with masking!
# https://www.felixcloutier.com/x86/movshdup
# TODO with masking!
# https://www.felixcloutier.com/x86/movsldup
# TODO with masking!
# https://www.felixcloutier.com/x86/movss
# TODO with masking!
# https://www.felixcloutier.com/x86/movupd
# TODO with masking!
# https://www.felixcloutier.com/x86/movups
# TODO with masking!
# https://www.felixcloutier.com/x86/pmovsx
# TODO with masking!
('vpmovsxbw ymm zmm', ('1*p5', 3)),
('vpmovsxbw mem zmm', ('1*p5', 1)),
]
).items()
)
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('', 0)),
('vmovdqu xmm xmm', ('', 0)),
('vmovdqu ymm ymm', ('', 0)),
csx_mov_instructions = OrderedDict(skx_mov_instructions + []).items()
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('', 0)),
('vmovupd xmm xmm', ('', 0)),
('vmovupd ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movupd
('movups xmm xmm', ('', 0)),
('vmovups xmm xmm', ('', 0)),
('vmovups ymm ymm', ('', 0)),
]).items())
hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [
# https://www.felixcloutier.com/x86/mov
('mov imd gpr', ('1*p0156', 1)),
('mov gpr gpr', ('1*p0156', 1)),
('movabs imd gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 6)),
('movbe mem gpr', ('1*p15', 6)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movmskps
('movmskps xmm gpr', ('1*p0', 3)),
('vmovmskps xmm gpr', ('1*p0', 3)),
('vmovmskps ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('1*p0156', 1)),
('movsb gpr gpr', ('1*p0156', 1)), # AT&T version
('movsw gpr gpr', ('1*p0156', 1)), # AT&T version
('movsl gpr gpr', ('1*p0156', 1)), # AT&T version
('movsq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('1*p0156', 1)),
('movzb gpr gpr', ('1*p0156', 1)), # AT&T version
('movzw gpr gpr', ('1*p0156', 1)), # AT&T version
('movzl gpr gpr', ('1*p0156', 1)), # AT&T version
('movzq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p0156+2*p06', 2)),
('cmova mem gpr', ('1*p0156+2*p06', 2)),
('cmovae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovae mem gpr', ('1*p0156+2*p06', 2)),
('cmovb gpr gpr', ('1*p0156+2*p06', 2)),
('cmovb mem gpr', ('1*p0156+1*p06', 2)),
('cmovbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovc mem gpr', ('1*p0156+1*p06', 2)),
('cmove gpr gpr', ('1*p0156+1*p06', 2)),
('cmove mem gpr', ('1*p0156+1*p06', 2)),
('cmovg gpr gpr', ('1*p0156+1*p06', 2)),
('cmovg mem gpr', ('1*p0156+1*p06', 2)),
('cmovge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovge mem gpr', ('1*p0156+1*p06', 2)),
('cmovl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovl mem gpr', ('1*p0156+1*p06', 2)),
('cmovle gpr gpr', ('1*p0156+1*p06', 2)),
('cmovle mem gpr', ('1*p0156+1*p06', 2)),
('cmovna gpr gpr', ('1*p0156+2*p06', 2)),
('cmovna mem gpr', ('1*p0156+2*p06', 2)),
('cmovnae gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnae mem gpr', ('1*p0156+1*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)),
('cmovnbe mem gpr', ('1*p0156+2*p06', 2)),
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
('cmovnc gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnc mem gpr', ('1*p0156+1*p06', 2)),
('cmovne gpr gpr', ('1*p0156+1*p06', 2)),
('cmovne mem gpr', ('1*p0156+1*p06', 2)),
('cmovng gpr gpr', ('1*p0156+1*p06', 2)),
('cmovng mem gpr', ('1*p0156+1*p06', 2)),
('cmovnge gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnge mem gpr', ('1*p0156+1*p06', 2)),
('cmovnl gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnl mem gpr', ('1*p0156+1*p06', 2)),
('cmovno gpr gpr', ('1*p0156+1*p06', 2)),
('cmovno mem gpr', ('1*p0156+1*p06', 2)),
('cmovnp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnp mem gpr', ('1*p0156+1*p06', 2)),
('cmovns gpr gpr', ('1*p0156+1*p06', 2)),
('cmovns mem gpr', ('1*p0156+1*p06', 2)),
('cmovnz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovnz mem gpr', ('1*p0156+1*p06', 2)),
('cmovo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovo mem gpr', ('1*p0156+1*p06', 2)),
('cmovp gpr gpr', ('1*p0156+1*p06', 2)),
('cmovp mem gpr', ('1*p0156+1*p06', 2)),
('cmovpe gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpe mem gpr', ('1*p0156+1*p06', 2)),
('cmovpo gpr gpr', ('1*p0156+1*p06', 2)),
('cmovpo mem gpr', ('1*p0156+1*p06', 2)),
('cmovs gpr gpr', ('1*p0156+1*p06', 2)),
('cmovs mem gpr', ('1*p0156+1*p06', 2)),
('cmovz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovz mem gpr', ('1*p0156+1*p06', 2)),
# https://www.felixcloutier.com/x86/pmovmskb
('pmovmskb mm gpr', ('1*p0', 3)),
('pmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/pmovsx
('pmovsxbw xmm xmm', ('1*p5', 1)),
('pmovsxbw mem xmm', ('1*p5', 1)),
('pmovsxbd xmm xmm', ('1*p5', 1)),
('pmovsxbd mem xmm', ('1*p5', 1)),
('pmovsxbq xmm xmm', ('1*p5', 1)),
('pmovsxbq mem xmm', ('1*p5', 1)),
('vpmovsxbw xmm xmm', ('1*p5', 1)),
('vpmovsxbw mem xmm', ('1*p5', 1)),
('vpmovsxbd xmm xmm', ('1*p5', 1)),
('vpmovsxbd mem xmm', ('1*p5', 1)),
('vpmovsxbq xmm xmm', ('1*p5', 1)),
('vpmovsxbq mem xmm', ('1*p5', 1)),
('vpmovsxbw ymm ymm', ('1*p5', 1)),
('vpmovsxbw mem ymm', ('1*p5', 1)),
('vpmovsxbd ymm ymm', ('1*p5', 1)),
('vpmovsxbd mem ymm', ('1*p5', 1)),
('vpmovsxbq ymm ymm', ('1*p5', 1)),
('vpmovsxbq mem ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/pmovzx
('pmovzxbw xmm xmm', ('1*p5', 1)),
('pmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw xmm xmm', ('1*p5', 1)),
('vpmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw ymm ymm', ('1*p5', 1)),
('vpmovzxbw mem ymm', ('1*p5', 1)),
]).items())
bdw_mov_instructions = list(OrderedDict(hsw_mov_instructions + [
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('2*p06', 1)),
('cmova mem gpr', ('2*p06', 1)),
('cmovae gpr gpr', ('1*p06', 1)),
('cmovae mem gpr', ('2*p06', 1)),
('cmovb gpr gpr', ('2*p06', 1)),
('cmovb mem gpr', ('1*p06', 1)),
('cmovbe gpr gpr', ('2*p06', 1)),
('cmovbe mem gpr', ('2*p06', 1)),
('cmovc gpr gpr', ('1*p06', 1)),
('cmovc mem gpr', ('1*p06', 1)),
('cmove gpr gpr', ('1*p06', 1)),
('cmove mem gpr', ('1*p06', 1)),
('cmovg gpr gpr', ('1*p06', 1)),
('cmovg mem gpr', ('1*p06', 1)),
('cmovge gpr gpr', ('1*p06', 1)),
('cmovge mem gpr', ('1*p06', 1)),
('cmovl gpr gpr', ('1*p06', 1)),
('cmovl mem gpr', ('1*p06', 1)),
('cmovle gpr gpr', ('1*p06', 1)),
('cmovle mem gpr', ('1*p06', 1)),
('cmovna gpr gpr', ('2*p06', 1)),
('cmovna mem gpr', ('2*p06', 1)),
('cmovnae gpr gpr', ('1*p06', 1)),
('cmovnae mem gpr', ('1*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnbe gpr gpr', ('2*p06', 1)),
('cmovnbe mem gpr', ('2*p06', 1)),
('cmovnb gpr gpr', ('1*p06', 1)),
('cmovnb mem gpr', ('1*p06', 1)),
('cmovnc gpr gpr', ('1*p06', 1)),
('cmovnc mem gpr', ('1*p06', 1)),
('cmovne gpr gpr', ('1*p06', 1)),
('cmovne mem gpr', ('1*p06', 1)),
('cmovng gpr gpr', ('1*p06', 1)),
('cmovng mem gpr', ('1*p06', 1)),
('cmovnge gpr gpr', ('1*p06', 1)),
('cmovnge mem gpr', ('1*p06', 1)),
('cmovnl gpr gpr', ('1*p06', 1)),
('cmovnl mem gpr', ('1*p06', 1)),
('cmovno gpr gpr', ('1*p06', 1)),
('cmovno mem gpr', ('1*p06', 1)),
('cmovnp gpr gpr', ('1*p06', 1)),
('cmovnp mem gpr', ('1*p06', 1)),
('cmovns gpr gpr', ('1*p06', 1)),
('cmovns mem gpr', ('1*p06', 1)),
('cmovnz gpr gpr', ('1*p06', 1)),
('cmovnz mem gpr', ('1*p06', 1)),
('cmovo gpr gpr', ('1*p06', 1)),
('cmovo mem gpr', ('1*p06', 1)),
('cmovp gpr gpr', ('1*p06', 1)),
('cmovp mem gpr', ('1*p06', 1)),
('cmovpe gpr gpr', ('1*p06', 1)),
('cmovpe mem gpr', ('1*p06', 1)),
('cmovpo gpr gpr', ('1*p06', 1)),
('cmovpo mem gpr', ('1*p06', 1)),
('cmovs gpr gpr', ('1*p06', 1)),
('cmovs mem gpr', ('1*p06', 1)),
('cmovz gpr gpr', ('1*p06', 1)),
('cmovz mem gpr', ('1*p06', 1)),
]).items())
skx_mov_instructions = list(OrderedDict(bdw_mov_instructions + [
# https://www.felixcloutier.com/x86/movapd
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movapd xmm xmm', ('1*p5', 1)),
# ('vmovapd xmm xmm', ('1*p5', 1)),
# ('vmovapd ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movaps
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movaps xmm xmm', ('1*p5', 1)),
# ('vmovaps xmm xmm', ('1*p5', 1)),
# ('vmovaps ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 4)),
('movbe mem gpr', ('1*p15', 4)),
# https://www.felixcloutier.com/x86/movddup
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
# TODO with masking!
# https://www.felixcloutier.com/x86/movntdq
('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('vmovntdqa mem zmm', ('', 0)),
# https://www.felixcloutier.com/x86/movntpd
('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p0+1*p015', 1)),
# https://www.felixcloutier.com/x86/movsd
# TODO with masking!
# https://www.felixcloutier.com/x86/movshdup
# TODO with masking!
# https://www.felixcloutier.com/x86/movsldup
# TODO with masking!
# https://www.felixcloutier.com/x86/movss
# TODO with masking!
# https://www.felixcloutier.com/x86/movupd
# TODO with masking!
# https://www.felixcloutier.com/x86/movups
# TODO with masking!
# https://www.felixcloutier.com/x86/pmovsx
# TODO with masking!
('vpmovsxbw ymm zmm', ('1*p5', 3)),
('vpmovsxbw mem zmm', ('1*p5', 1)),
]).items())
csx_mov_instructions = OrderedDict(skx_mov_instructions + [
]).items()
def get_description(arch, rhs_comment=None):
descriptions = {
@@ -803,7 +763,7 @@ def get_description(arch, rhs_comment=None):
'hsw': '\n'.join([p7.process_item(*item) for item in hsw_mov_instructions]),
'bdw': '\n'.join([p7.process_item(*item) for item in bdw_mov_instructions]),
'skx': '\n'.join([p7.process_item(*item) for item in skx_mov_instructions]),
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions])
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions]),
}
description = descriptions[arch]
@@ -813,20 +773,21 @@ def get_description(arch, rhs_comment=None):
commented_description = ""
for l in descriptions[arch].split('\n'):
commented_description += ("{:<"+str(max_length)+"} # {}\n").format(l, rhs_comment)
commented_description += ("{:<" + str(max_length) + "} # {}\n").format(l, rhs_comment)
description = commented_description
return description
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx)".format(sys.argv[0]))
sys.exit(0)
try:
print(get_description(sys.argv[1], rhs_comment=' '.join(sys.argv)))
except KeyError:
print("Unknown architecture.")
sys.exit(1)

View File

@@ -18,45 +18,28 @@ instruction_forms:
- name: "fmla"
operands:
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: true
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: false
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: false
- name: "fmla"
- name: ldp
operands:
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: true
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: false
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "d"
prefix: "*"
source: false
destination: true
- class: "memory"
@@ -68,52 +51,14 @@ instruction_forms:
post-indexed: false
source: true
destination: false
- name: "ldp"
- name: ldp
operands:
- class: "register"
prefix: "q"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
prefix: "*"
source: false
destination: true
- class: "memory"
@@ -125,14 +70,63 @@ instruction_forms:
post-indexed: true
source: true
destination: true
- name: "stp"
- name: ldp
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: [ldr, ldur]
operands:
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: true
source: true
destination: true
- name: [ldr, ldur]
operands:
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: stp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "d"
prefix: "*"
source: true
destination: false
- class: "memory"
@@ -144,14 +138,33 @@ instruction_forms:
post-indexed: false
source: false
destination: true
- name: "stp"
- name: stp
operands:
- class: "register"
prefix: "q"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "q"
prefix: "*"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: false
destination: true
- name: stp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- class: "memory"
@@ -160,73 +173,13 @@ instruction_forms:
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
post-indexed: true
source: false
destination: true
- name: "str"
- name: [str, stur]
operands:
- class: "register"
prefix: "x"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "str"
operands:
- class: "register"
prefix: "d"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "str"
operands:
- class: "register"
prefix: "q"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "stur"
operands:
- class: "register"
prefix: "q"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "stur"
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: true
destination: false
- class: "memory"

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
import os.path
import argparse
import os.path
import sys
import xml.etree.ElementTree as ET
from distutils.version import StrictVersion
@@ -8,8 +8,23 @@ from distutils.version import StrictVersion
from osaca.parser import get_parser
from osaca.semantics import MachineModel
intel_archs = ['CON', 'WOL', 'NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL',
'CNL', 'ICL']
intel_archs = [
'CON',
'WOL',
'NHM',
'WSM',
'SNB',
'IVB',
'HSW',
'BDW',
'SKL',
'SKX',
'KBL',
'CFL',
'CNL',
'ICL',
]
amd_archs = ['ZEN1', 'ZEN+', 'ZEN2']
def port_pressure_from_tag_attributes(attrib):
@@ -19,6 +34,7 @@ def port_pressure_from_tag_attributes(attrib):
for p in attrib['ports'].split('+'):
cycles, ports = p.split('*')
ports = ports.lstrip('p')
ports = ports.lstrip('FP')
port_occupation.append([int(cycles), ports])
# Also consider div on DIV pipeline
@@ -88,10 +104,10 @@ def extract_paramters(instruction_tag, parser, isa):
return parameters
def extract_model(tree, arch):
def extract_model(tree, arch, skip_mem=True):
try:
isa = MachineModel.get_isa_for_arch(arch)
except:
except Exception:
print("Skipping...", file=sys.stderr)
return None
mm = MachineModel(isa=isa)
@@ -101,6 +117,7 @@ def extract_model(tree, arch):
ignore = False
mnemonic = instruction_tag.attrib['asm']
iform = instruction_tag.attrib['iform']
# skip any mnemonic which contain spaces (e.g., "REX CRC32")
if ' ' in mnemonic:
continue
@@ -118,6 +135,26 @@ def extract_model(tree, arch):
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
if arch_tag is None:
continue
# skip any instructions without port utilization
if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]):
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
continue
# skip if computed and measured TP don't match
if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][
0
]:
print(
"Calculated TP from port utilization doesn't match TP, skip: ",
iform,
file=sys.stderr,
)
continue
# skip if instruction contains memory operand
if skip_mem and any(
[x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')]
):
print("Contains memory operand, skip: ", iform, file=sys.stderr)
continue
# We collect all measurement and IACA information and compare them later
for measurement_tag in arch_tag.iter('measurement'):
if 'TP_ports' in measurement_tag.attrib:
@@ -143,10 +180,14 @@ def extract_model(tree, arch):
if 'max_cycles' in l_tag.attrib
]
if latencies[1:] != latencies[:-1]:
print("Contradicting latencies found, using first:", mnemonic, latencies,
file=sys.stderr)
print(
"Contradicting latencies found, using smallest:",
iform,
latencies,
file=sys.stderr,
)
if latencies:
latency = latencies[0]
latency = min(latencies)
if ignore:
continue
@@ -160,16 +201,14 @@ def extract_model(tree, arch):
# Check if all are equal
if port_pressure:
if port_pressure[1:] != port_pressure[:-1]:
print(
"Contradicting port occupancies, using latest IACA:",
mnemonic, file=sys.stderr)
print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
port_pressure = port_pressure[-1]
else:
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
continue
# Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
if arch.upper() in intel_archs and not arch.upper() in ['ICL']:
if arch.upper() in intel_archs and not arch.upper() in ['ICL']:
if any([p['class'] == 'memory' for p in parameters]):
# We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
# TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
@@ -183,7 +222,7 @@ def extract_model(tree, arch):
# Add (1, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
if port_23 and not port_4:
port_pressure.append((1, ['2D', '3D']))
# Add missing ports:
for ports in [pp[1] for pp in port_pressure]:
for p in ports:
@@ -201,7 +240,7 @@ def rhs_comment(uncommented_string, comment):
commented_string = ""
for l in uncommented_string.split('\n'):
commented_string += ("{:<"+str(max_length)+"} # {}\n").format(l, comment)
commented_string += ("{:<" + str(max_length) + "} # {}\n").format(l, comment)
return commented_string
@@ -218,21 +257,33 @@ def main():
help='architecture to extract, use IACA abbreviations (e.g., SNB). '
'if not given, all will be extracted and saved to file in CWD.',
)
parser.add_argument(
'--mem',
dest='skip_mem',
action='store_false',
help='add instruction forms including memory addressing operands, which are '
'skipped by default'
)
args = parser.parse_args()
basename = os.path.basename(__file__)
tree = ET.parse(args.xml)
print('Available architectures:', ', '.join(architectures(tree)))
print('# Available architectures:', ', '.join(architectures(tree)))
if args.arch:
model = extract_model(tree, args.arch)
print('# Chosen architecture: {}'.format(args.arch))
model = extract_model(tree, args.arch, args.skip_mem)
if model is not None:
print(rhs_comment(model.dump(), basename+" "+sys.argv[0]))
print(
rhs_comment(
model.dump(), basename + " " + args.xml.split('/')[-1] + " " + args.arch
)
)
else:
for arch in architectures(tree):
print(arch, end='')
model = extract_model(tree, arch.lower())
model = extract_model(tree, arch.lower(), args.skip_mem)
if model:
model_string = rhs_comment(model.dump(), basename+" "+arch)
model_string = rhs_comment(model.dump(), basename + " " + arch)
with open('{}.yml'.format(arch.lower()), 'w') as f:
f.write(model_string)

771
osaca/data/n1.yml Normal file
View File

@@ -0,0 +1,771 @@
osaca_version: 0.3.3
micro_architecture: Arm Neoverse N1
arch_code: n1
isa: AArch64
ROB_size: 128 # wikichip
retired_uOps_per_cycle: 8 # wikichip
scheduler_size: 120 # wikichip
hidden_loads: false
load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 5.0, q: 6.0, v: 5.0, z: 4.0}
load_throughput:
- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
load_throughput_default: [[1, '67']]
store_throughput: []
store_throughput_default: [[1, '56'], [1, '67']]
ports: ['0', '1', '2', '3', '4', '4DV', '5', '6', '7']
port_model_scheme: |
+----------------------------------------------------------------------------+
| 120 entries |
+----------------------------------------------------------------------------+
0 |BR 1 |IS0 2 |IS1 3 |IM0 4 |FP0 5 |FP1 6 |LDST 7 |LDST
\/ \/ \/ \/ \/ \/ \/ \/
+------+ +-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+
|Branch| | INT | | INT | | INT | | FP ALU | | FP ALU | | AGU | | AGU |
+------+ | ALU | | ALU | | ALU | +--------+ +--------+ +-------+ +-------+
+-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+
+-----+ +-----+ | FP MUL | | FP MUL | |LD DATA| |LD DATA|
| ST | | INT | +--------+ +--------+ +-------+ +-------+
| INT | | MUL | +--------+ +---------+
+-----+ +-----+ | FP DIV | |SIMD SHFT|
+-----+ +--------+ +---------+
| INT | +--------+ +--------+
| DIV | | FMA | | FMA |
+-----+ +--------+ +--------+
+-----+ +--------+ +--------+
|SHIFT| | ST SIMD| | ST SIMD|
+-----+ | DATA | | DATA |
+-----+ +--------+ +--------+
| ST |
| INT |
+-----+
instruction_forms:
- name: add
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: add
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: adds
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '132']]
- name: b.ne
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: b.gt
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: bne
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: cmp
operands:
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: cmp
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.3333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: dup
operands:
- class: register
prefix: d
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: d
width: '*'
- class: register
prefix: d
width: '*'
- class: register
prefix: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fdiv
operands:
- class: register
prefix: v
shape: s
width: 128
- class: register
prefix: v
shape: s
width: 128
- class: register
prefix: v
shape: s
width: 128
throughput: 6.0
latency: 8.0 # 1*p4+6*p4DV
port_pressure: [[1, '4'], [6, [4DV]]]
- name: fdiv
operands:
- class: register
prefix: v
shape: d
width: 128
- class: register
prefix: v
shape: d
width: 128
- class: register
prefix: v
shape: d
width: 128
throughput: 10.0
latency: 12.0 # 1*p4+10*p4DV
port_pressure: [[4, '0'], [10, [4DV]]]
- name: fmla
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmla
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmov
operands:
- {class: register, prefix: s}
- {class: immediate, imd: double}
latency: ~ # 1*p45
port_pressure: [[1, '45']]
throughput: 0.5
- name: fmul
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmul
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmul
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: register
prefix: d
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: frecpe
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 2.0
latency: 4.0 # 1*p4
port_pressure: [[2, '4']]
- name: frecpe
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 1.0
latency: 3.0 # 1*p4
port_pressure: [[1, '4']]
- name: fsub
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fsub
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 5.0 # 2*p67, from n1 opt guide
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 5.0 # 2*p67+1*p123, from n1 opt guide
port_pressure: [[2, '67'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67, from n1 opt guide
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: ~
index: ~
scale: 1
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 7.0 # 2*p67+1*p123, from n1 opt guide
port_pressure: [[2, '56'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: true
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67+1*p123
port_pressure: [[2, '67'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 5.0 # 2*p67+1*p123
port_pressure: [[2, '67'], [1, '123']]
- name: ldur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 6.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 6.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
prefix: q
- class: register
prefix: q
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
prefix: d
- class: register
prefix: d
throughput: 0.0
latency: 0.0
port_pressure: []
- name: mov
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.25
latency: 1.0 # 1*p3456
port_pressure: [[1, '3456']]
- name: mov
operands:
- class: register
prefix: v
shape: b
width: '*'
- class: register
prefix: v
shape: b
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: stp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p45+1*p67
port_pressure: [[2, '45'], [1, '67']]
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 2*p45+2*p67+1*123
port_pressure: [[2, '45'], [2, '67'], [1, '123']]
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p45+2*p67
port_pressure: [[2, '45'], [2, '67']]
- name: stur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p23
port_pressure: [[1, '56'], [1, '23']]
- name: stur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p67+1*p45
port_pressure: [[2, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p23
port_pressure: [[1, '56'], [1, '23']]
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p45
port_pressure: [[1, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 0.5
latency: 0 # 1*p67+1*p45+1*p123
port_pressure: [[1, '67'], [1, '45'], [1, '123']]
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p67+1*p45
port_pressure: [[1, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 1*p67+1*p45+1*123
port_pressure: [[1, '67'], [1, '45'], [1, '123']]
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 1*p67+1*p23+1*p123
port_pressure: [[1, '67'], [1, '23'], [1, '123']]
- name: sub
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]

View File

@@ -267,6 +267,34 @@ instruction_forms:
throughput: 0.5
latency: 6.0 # 1*p01
port_pressure: [[1, '01']]
- name: frecpe
operands:
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: frecpe
operands:
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: fsub
operands:
- class: register

View File

@@ -17,7 +17,20 @@ MODULE_DATA_DIR = os.path.join(
)
LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/')
DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')
SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ICL', 'ZEN1', 'ZEN2', 'TX2', 'A64FX']
SUPPORTED_ARCHS = [
'SNB',
'IVB',
'HSW',
'BDW',
'SKX',
'CSX',
'ICL',
'ZEN1',
'ZEN2',
'TX2',
'N1',
'A64FX',
]
# Stolen from pip
@@ -71,7 +84,8 @@ def create_parser(parser=None):
parser.add_argument(
'--arch',
type=str,
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, A64FX).',
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, N1, '
'A64FX).',
)
parser.add_argument(
'--fixed',

View File

@@ -242,6 +242,7 @@ class MachineModel(object):
arch_dict = {
'a64fx': 'aarch64',
'tx2': 'aarch64',
'n1': 'aarch64',
'zen1': 'x86',
'zen+': 'x86',
'zen2': 'x86',