mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 00:50:06 +01:00
Merge branch 'master' of github.com:RRZE-HPC/osaca
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
"""Open Source Architecture Code Analyzer"""
|
||||
name = 'osaca'
|
||||
__version__ = '0.3.3.dev0'
|
||||
__version__ = '0.3.6'
|
||||
|
||||
# To trigger travis deployment to pypi, do the following:
|
||||
# 1. Increment __version___
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: Intel Broadwell
|
||||
arch_code: BDW
|
||||
isa: x86
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: Cascade Lake SP
|
||||
arch_code: CSX
|
||||
isa: x86
|
||||
|
||||
@@ -9,8 +9,8 @@ class MOVEntryBuilder:
|
||||
port_occupancy = defaultdict(Fraction)
|
||||
for uops, ports in port_pressure:
|
||||
for p in ports:
|
||||
port_occupancy[p] += Fraction(uops, len(ports))
|
||||
return float(max(list(port_occupancy.values())+[0]))
|
||||
port_occupancy[p] += Fraction(uops, len(ports))
|
||||
return float(max(list(port_occupancy.values()) + [0]))
|
||||
|
||||
@staticmethod
|
||||
def classify(operands_types):
|
||||
@@ -18,10 +18,10 @@ class MOVEntryBuilder:
|
||||
store = 'mem' in operands_types[-1:]
|
||||
assert not (load and store), "Can not process a combined load-store instruction."
|
||||
return load, store
|
||||
|
||||
|
||||
def build_description(
|
||||
self, instruction_name, operand_types,
|
||||
port_pressure=[], latency=0, comment=None):
|
||||
self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None
|
||||
):
|
||||
if comment:
|
||||
comment = " # " + comment
|
||||
else:
|
||||
@@ -32,10 +32,7 @@ class MOVEntryBuilder:
|
||||
if ot == 'imd':
|
||||
description += ' - class: immediate\n imd: int\n'
|
||||
elif ot.startswith('mem'):
|
||||
description += (
|
||||
' - class: memory\n'
|
||||
' base: "*"\n'
|
||||
' offset: "*"\n')
|
||||
description += ' - class: memory\n' ' base: "*"\n' ' offset: "*"\n'
|
||||
if ot == 'mem_simple':
|
||||
description += ' index: ~\n'
|
||||
elif ot == 'mem_complex':
|
||||
@@ -45,18 +42,20 @@ class MOVEntryBuilder:
|
||||
description += ' scale: "*"\n'
|
||||
else:
|
||||
description += ' - class: register\n name: {}\n'.format(ot)
|
||||
|
||||
|
||||
description += (
|
||||
' latency: {latency}\n'
|
||||
' port_pressure: {port_pressure!r}\n'
|
||||
' throughput: {throughput}\n'
|
||||
' uops: {uops}\n').format(
|
||||
latency=latency,
|
||||
port_pressure=port_pressure,
|
||||
throughput=self.compute_throughput(port_pressure),
|
||||
uops=sum([i for i,p in port_pressure]))
|
||||
' uops: {uops}\n'
|
||||
).format(
|
||||
latency=latency,
|
||||
port_pressure=port_pressure,
|
||||
throughput=self.compute_throughput(port_pressure),
|
||||
uops=sum([i for i, p in port_pressure]),
|
||||
)
|
||||
return description
|
||||
|
||||
|
||||
def parse_port_pressure(self, port_pressure_str):
|
||||
"""
|
||||
Example:
|
||||
@@ -68,7 +67,7 @@ class MOVEntryBuilder:
|
||||
cycles, ports = p.split('*p')
|
||||
port_pressure.append([int(cycles), ports])
|
||||
return port_pressure
|
||||
|
||||
|
||||
def process_item(self, instruction_form, resources):
|
||||
"""
|
||||
Example:
|
||||
@@ -84,9 +83,7 @@ class MOVEntryBuilder:
|
||||
|
||||
class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
||||
# for SNB and IVB
|
||||
def build_description(
|
||||
self, instruction_name, operand_types,
|
||||
port_pressure=[], latency=0):
|
||||
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
|
||||
load, store = self.classify(operand_types)
|
||||
|
||||
comment = None
|
||||
@@ -100,15 +97,14 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
||||
comment = "with store"
|
||||
|
||||
return MOVEntryBuilder.build_description(
|
||||
self, instruction_name, operand_types, port_pressure, latency, comment)
|
||||
self, instruction_name, operand_types, port_pressure, latency, comment
|
||||
)
|
||||
|
||||
|
||||
class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
|
||||
# for HSW, BDW, SKX and CSX
|
||||
|
||||
def build_description(
|
||||
self, instruction_name, operand_types,
|
||||
port_pressure=[], latency=0):
|
||||
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
|
||||
load, store = self.classify(operand_types)
|
||||
|
||||
if load:
|
||||
@@ -116,7 +112,8 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
|
||||
latency += 4
|
||||
comment = "with load"
|
||||
return MOVEntryBuilder.build_description(
|
||||
self, instruction_name, operand_types, port_pressure, latency, comment)
|
||||
self, instruction_name, operand_types, port_pressure, latency, comment
|
||||
)
|
||||
if store:
|
||||
port_pressure_simple = port_pressure + [[1, '237'], [1, '4']]
|
||||
operands_simple = ['mem_simple' if o == 'mem' else o for o in operand_types]
|
||||
@@ -125,16 +122,28 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
|
||||
latency += 0
|
||||
return (
|
||||
MOVEntryBuilder.build_description(
|
||||
self, instruction_name, operands_simple, port_pressure_simple, latency,
|
||||
"with store, simple AGU") +
|
||||
'\n' +
|
||||
MOVEntryBuilder.build_description(
|
||||
self, instruction_name, operands_complex, port_pressure_complex, latency,
|
||||
"with store, complex AGU"))
|
||||
|
||||
self,
|
||||
instruction_name,
|
||||
operands_simple,
|
||||
port_pressure_simple,
|
||||
latency,
|
||||
"with store, simple AGU",
|
||||
)
|
||||
+ '\n'
|
||||
+ MOVEntryBuilder.build_description(
|
||||
self,
|
||||
instruction_name,
|
||||
operands_complex,
|
||||
port_pressure_complex,
|
||||
latency,
|
||||
"with store, complex AGU",
|
||||
)
|
||||
)
|
||||
|
||||
# Register only:
|
||||
return MOVEntryBuilder.build_description(
|
||||
self, instruction_name, operand_types, port_pressure, latency)
|
||||
self, instruction_name, operand_types, port_pressure, latency
|
||||
)
|
||||
|
||||
|
||||
np7 = MOVEntryBuilderIntelNoPort7AGU()
|
||||
@@ -149,7 +158,6 @@ snb_mov_instructions = [
|
||||
('mov imd gpr', ('1*p015', 1)),
|
||||
('mov imd mem', ('', 0)),
|
||||
('movabs imd gpr', ('1*p015', 1)), # AT&T version
|
||||
|
||||
# https://www.felixcloutier.com/x86/movapd
|
||||
('movapd xmm xmm', ('1*p5', 1)),
|
||||
('movapd xmm mem', ('', 0)),
|
||||
@@ -160,7 +168,6 @@ snb_mov_instructions = [
|
||||
('vmovapd ymm ymm', ('1*p5', 1)),
|
||||
('vmovapd ymm mem', ('', 0)),
|
||||
('vmovapd mem ymm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
('movaps xmm xmm', ('1*p5', 1)),
|
||||
('movaps xmm mem', ('', 0)),
|
||||
@@ -171,7 +178,6 @@ snb_mov_instructions = [
|
||||
('vmovaps ymm ymm', ('1*p5', 1)),
|
||||
('movaps ymm mem', ('', 0)),
|
||||
('movaps mem ymm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movd:movq
|
||||
('movd gpr mm', ('1*p5', 1)),
|
||||
('movd mem mm', ('', 0)),
|
||||
@@ -197,7 +203,6 @@ snb_mov_instructions = [
|
||||
('vmovd xmm mem', ('', 0)),
|
||||
('vmovq xmm gpr', ('1*p0', 1)),
|
||||
('vmovq xmm mem', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movddup
|
||||
('movddup xmm xmm', ('1*p5', 1)),
|
||||
('movddup mem xmm', ('', 0)),
|
||||
@@ -205,10 +210,8 @@ snb_mov_instructions = [
|
||||
('vmovddup mem xmm', ('', 0)),
|
||||
('vmovddup ymm ymm', ('1*p5', 1)),
|
||||
('vmovddup mem ymm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movdq2q
|
||||
('movdq2q xmm mm', ('1*p015+1*p5', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||
('movdqa xmm xmm', ('1*p015', 1)),
|
||||
('movdqa mem xmm', ('', 0)),
|
||||
@@ -219,7 +222,6 @@ snb_mov_instructions = [
|
||||
('vmovdqa ymm ymm', ('1*p05', 1)),
|
||||
('vmovdqa mem ymm', ('', 0)),
|
||||
('vmovdqa ymm mem', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||
('movdqu xmm xmm', ('1*p015', 1)),
|
||||
('movdqu mem xmm', ('', 0)),
|
||||
@@ -230,75 +232,60 @@ snb_mov_instructions = [
|
||||
('vmovdqu ymm ymm', ('1*p05', 1)),
|
||||
('vmovdqu mem ymm', ('', 0)),
|
||||
('vmovdqu ymm mem', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movhlps
|
||||
('movhlps xmm xmm', ('1*p5', 1)),
|
||||
('vmovhlps xmm xmm xmm', ('1*p5', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movhpd
|
||||
('movhpd mem xmm', ('1*p5', 1)),
|
||||
('vmovhpd mem xmm xmm', ('1*p5', 1)),
|
||||
('movhpd xmm mem', ('', 0)),
|
||||
('vmovhpd mem xmm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movhps
|
||||
('movhps mem xmm', ('1*p5', 1)),
|
||||
('vmovhps mem xmm xmm', ('1*p5', 1)),
|
||||
('movhps xmm mem', ('', 0)),
|
||||
('vmovhps mem xmm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movlhps
|
||||
('movlhps xmm xmm', ('1*p5', 1)),
|
||||
('vmovlhps xmm xmm xmm', ('1*p5', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movlpd
|
||||
('movlpd mem xmm', ('1*p5', 1)),
|
||||
('vmovlpd mem xmm xmm', ('1*p5', 1)),
|
||||
('movlpd xmm mem', ('', 0)),
|
||||
('vmovlpd mem xmm', ('1*p5', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movlps
|
||||
('movlps mem xmm', ('1*p5', 1)),
|
||||
('vmovlps mem xmm xmm', ('1*p5', 1)),
|
||||
('movlps xmm mem', ('', 0)),
|
||||
('vmovlps mem xmm', ('1*p5', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movmskpd
|
||||
('movmskpd xmm gpr', ('1*p0', 2)),
|
||||
('vmovmskpd xmm gpr', ('1*p0', 2)),
|
||||
('vmovmskpd ymm gpr', ('1*p0', 2)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movmskps
|
||||
('movmskps xmm gpr', ('1*p0', 1)),
|
||||
('vmovmskps xmm gpr', ('1*p0', 1)),
|
||||
('vmovmskps ymm gpr', ('1*p0', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntdq
|
||||
('movntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
('vmovntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
('vmovntdq ymm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntdqa
|
||||
('movntdqa mem xmm', ('', 0)),
|
||||
('vmovntdqa mem xmm', ('', 0)),
|
||||
('vmovntdqa mem ymm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movnti
|
||||
('movnti gpr mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntpd
|
||||
('movntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
('vmovntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
('vmovntpd ymm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntps
|
||||
('movntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
('vmovntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
('vmovntps ymm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntq
|
||||
('movntq mm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
|
||||
# https://www.felixcloutier.com/x86/movq
|
||||
('movq mm mm', ('', 0)),
|
||||
('movq mem mm', ('', 0)),
|
||||
@@ -309,14 +296,11 @@ snb_mov_instructions = [
|
||||
('vmovq xmm xmm', ('1*p015', 1)),
|
||||
('vmovq mem xmm', ('', 0)),
|
||||
('vmovq xmm mem', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movq2dq
|
||||
('movq2dq mm xmm', ('1*p015', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq
|
||||
# TODO combined load-store is currently not supported
|
||||
# ('movs mem mem', ()),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movsd
|
||||
('movsd xmm xmm', ('1*p5', 1)),
|
||||
('movsd mem xmm', ('', 0)),
|
||||
@@ -324,7 +308,6 @@ snb_mov_instructions = [
|
||||
('vmovsd xmm xmm xmm', ('1*p5', 1)),
|
||||
('vmovsd mem xmm', ('', 0)),
|
||||
('vmovsd xmm mem', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movshdup
|
||||
('movshdup xmm xmm', ('1*p5', 1)),
|
||||
('movshdup mem xmm', ('', 0)),
|
||||
@@ -332,7 +315,6 @@ snb_mov_instructions = [
|
||||
('vmovshdup mem xmm', ('', 0)),
|
||||
('vmovshdup ymm ymm', ('1*p5', 1)),
|
||||
('vmovshdup mem ymm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movsldup
|
||||
('movsldup xmm xmm', ('1*p5', 1)),
|
||||
('movsldup mem xmm', ('', 0)),
|
||||
@@ -340,7 +322,6 @@ snb_mov_instructions = [
|
||||
('vmovsldup mem xmm', ('', 0)),
|
||||
('vmovsldup ymm ymm', ('1*p5', 1)),
|
||||
('vmovsldup mem ymm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movss
|
||||
('movss xmm xmm', ('1*p5', 1)),
|
||||
('movss mem xmm', ('', 0)),
|
||||
@@ -349,7 +330,6 @@ snb_mov_instructions = [
|
||||
('vmovss xmm xmm', ('1*p5', 1)),
|
||||
('vmovss xmm mem', ('', 0)),
|
||||
('movss mem xmm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movsx:movsxd
|
||||
('movsx gpr gpr', ('1*p015', 1)),
|
||||
('movsx mem gpr', ('', 0)),
|
||||
@@ -363,7 +343,6 @@ snb_mov_instructions = [
|
||||
('movsl mem gpr', ('', 0)), # AT&T version
|
||||
('movsq gpr gpr', ('1*p015', 1)), # AT&T version
|
||||
('movsq mem gpr', ('', 0)), # AT&T version
|
||||
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
('movupd xmm xmm', ('1*p5', 1)),
|
||||
('movupd mem xmm', ('', 0)),
|
||||
@@ -374,7 +353,6 @@ snb_mov_instructions = [
|
||||
('vmovupd ymm ymm', ('1*p5', 1)),
|
||||
('vmovupd mem ymm', ('', 0)),
|
||||
('vmovupd ymm mem', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movups
|
||||
('movups xmm xmm', ('1*p5', 1)),
|
||||
('movups mem xmm', ('', 0)),
|
||||
@@ -385,7 +363,6 @@ snb_mov_instructions = [
|
||||
('vmovups ymm ymm', ('1*p5', 1)),
|
||||
('vmovups mem ymm', ('', 0)),
|
||||
('vmovups ymm mem', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movzx
|
||||
('movzx gpr gpr', ('1*p015', 1)),
|
||||
('movzx mem gpr', ('', 0)),
|
||||
@@ -397,7 +374,6 @@ snb_mov_instructions = [
|
||||
('movzl mem gpr', ('', 0)), # AT&T version
|
||||
('movzq gpr gpr', ('1*p015', 1)), # AT&T version
|
||||
('movzq mem gpr', ('', 0)), # AT&T version
|
||||
|
||||
# https://www.felixcloutier.com/x86/cmovcc
|
||||
('cmova gpr gpr', ('1*p015+2*p05', 2)),
|
||||
('cmova mem gpr', ('1*p015+2*p05', 2)),
|
||||
@@ -459,12 +435,10 @@ snb_mov_instructions = [
|
||||
('cmovs mem gpr', ('1*p015+1*p05', 2)),
|
||||
('cmovz gpr gpr', ('1*p015+1*p05', 2)),
|
||||
('cmovz mem gpr', ('1*p015+1*p05', 2)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/pmovmskb
|
||||
('pmovmskb mm gpr', ('1*p0', 2)),
|
||||
('pmovmskb xmm gpr', ('1*p0', 2)),
|
||||
('vpmovmskb xmm gpr', ('1*p0', 2)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/pmovsx
|
||||
('pmovsxbw xmm xmm', ('1*p15', 1)),
|
||||
('pmovsxbw mem xmm', ('1*p15', 1)),
|
||||
@@ -484,7 +458,6 @@ snb_mov_instructions = [
|
||||
('vpmovsxbd mem ymm', ('1*p15', 1)),
|
||||
('vpmovsxbq ymm ymm', ('1*p15', 1)),
|
||||
('vpmovsxbq mem ymm', ('1*p15', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/pmovzx
|
||||
('pmovzxbw xmm xmm', ('1*p15', 1)),
|
||||
('pmovzxbw mem xmm', ('1*p15', 1)),
|
||||
@@ -494,307 +467,294 @@ snb_mov_instructions = [
|
||||
('vpmovzxbw mem ymm', ('1*p15', 1)),
|
||||
]
|
||||
|
||||
ivb_mov_instructions = list(OrderedDict(snb_mov_instructions + [
|
||||
# https://www.felixcloutier.com/x86/mov
|
||||
('mov gpr gpr', ('', 0)),
|
||||
('mov imd gpr', ('', 0)),
|
||||
ivb_mov_instructions = list(
|
||||
OrderedDict(
|
||||
snb_mov_instructions
|
||||
+ [
|
||||
# https://www.felixcloutier.com/x86/mov
|
||||
('mov gpr gpr', ('', 0)),
|
||||
('mov imd gpr', ('', 0)),
|
||||
# https://www.felixcloutier.com/x86/movapd
|
||||
('movapd xmm xmm', ('', 0)),
|
||||
('vmovapd xmm xmm', ('', 0)),
|
||||
('vmovapd ymm ymm', ('', 0)),
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
('movaps xmm xmm', ('', 0)),
|
||||
('vmovaps xmm xmm', ('', 0)),
|
||||
('vmovaps ymm ymm', ('', 0)),
|
||||
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||
('movdqa xmm xmm', ('', 0)),
|
||||
('vmovdqa xmm xmm', ('', 0)),
|
||||
('vmovdqa ymm ymm', ('', 0)),
|
||||
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||
('movdqu xmm xmm', ('', 0)),
|
||||
('vmovdqu xmm xmm', ('', 0)),
|
||||
('vmovdqu ymm ymm', ('', 0)),
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
('movupd xmm xmm', ('', 0)),
|
||||
('vmovupd xmm xmm', ('', 0)),
|
||||
('vmovupd ymm ymm', ('', 0)),
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
('movups xmm xmm', ('', 0)),
|
||||
('vmovups xmm xmm', ('', 0)),
|
||||
('vmovups ymm ymm', ('', 0)),
|
||||
]
|
||||
).items()
|
||||
)
|
||||
|
||||
# https://www.felixcloutier.com/x86/movapd
|
||||
('movapd xmm xmm', ('', 0)),
|
||||
('vmovapd xmm xmm', ('', 0)),
|
||||
('vmovapd ymm ymm', ('', 0)),
|
||||
hsw_mov_instructions = list(
|
||||
OrderedDict(
|
||||
ivb_mov_instructions
|
||||
+ [
|
||||
# https://www.felixcloutier.com/x86/mov
|
||||
('mov imd gpr', ('1*p0156', 1)),
|
||||
('mov gpr gpr', ('1*p0156', 1)),
|
||||
('movabs imd gpr', ('1*p0156', 1)), # AT&T version
|
||||
# https://www.felixcloutier.com/x86/movbe
|
||||
('movbe gpr mem', ('1*p15', 6)),
|
||||
('movbe mem gpr', ('1*p15', 6)),
|
||||
# https://www.felixcloutier.com/x86/movmskpd
|
||||
('movmskpd xmm gpr', ('1*p0', 3)),
|
||||
('vmovmskpd xmm gpr', ('1*p0', 3)),
|
||||
('vmovmskpd ymm gpr', ('1*p0', 3)),
|
||||
# https://www.felixcloutier.com/x86/movmskps
|
||||
('movmskps xmm gpr', ('1*p0', 3)),
|
||||
('vmovmskps xmm gpr', ('1*p0', 3)),
|
||||
('vmovmskps ymm gpr', ('1*p0', 3)),
|
||||
# https://www.felixcloutier.com/x86/movsx:movsxd
|
||||
('movsx gpr gpr', ('1*p0156', 1)),
|
||||
('movsb gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movsw gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movsl gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movsq gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
# https://www.felixcloutier.com/x86/movzx
|
||||
('movzx gpr gpr', ('1*p0156', 1)),
|
||||
('movzb gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movzw gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movzl gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movzq gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
# https://www.felixcloutier.com/x86/cmovcc
|
||||
('cmova gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmova mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovae gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovae mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovb gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovb mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovbe gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovbe mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovc gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovc mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmove gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmove mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovg gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovg mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovge gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovge mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovl gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovl mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovle gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovle mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovna gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovna mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovnae gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnae mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovnbe mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnc gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnc mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovne gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovne mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovng gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovng mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnge gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnge mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnl gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnl mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovno gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovno mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnp gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnp mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovns gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovns mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnz gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnz mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovo gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovo mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovp gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovp mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovpe gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovpe mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovpo gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovpo mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovs gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovs mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovz gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovz mem gpr', ('1*p0156+1*p06', 2)),
|
||||
# https://www.felixcloutier.com/x86/pmovmskb
|
||||
('pmovmskb mm gpr', ('1*p0', 3)),
|
||||
('pmovmskb xmm gpr', ('1*p0', 3)),
|
||||
('vpmovmskb xmm gpr', ('1*p0', 3)),
|
||||
('vpmovmskb ymm gpr', ('1*p0', 3)),
|
||||
# https://www.felixcloutier.com/x86/pmovsx
|
||||
('pmovsxbw xmm xmm', ('1*p5', 1)),
|
||||
('pmovsxbw mem xmm', ('1*p5', 1)),
|
||||
('pmovsxbd xmm xmm', ('1*p5', 1)),
|
||||
('pmovsxbd mem xmm', ('1*p5', 1)),
|
||||
('pmovsxbq xmm xmm', ('1*p5', 1)),
|
||||
('pmovsxbq mem xmm', ('1*p5', 1)),
|
||||
('vpmovsxbw xmm xmm', ('1*p5', 1)),
|
||||
('vpmovsxbw mem xmm', ('1*p5', 1)),
|
||||
('vpmovsxbd xmm xmm', ('1*p5', 1)),
|
||||
('vpmovsxbd mem xmm', ('1*p5', 1)),
|
||||
('vpmovsxbq xmm xmm', ('1*p5', 1)),
|
||||
('vpmovsxbq mem xmm', ('1*p5', 1)),
|
||||
('vpmovsxbw ymm ymm', ('1*p5', 1)),
|
||||
('vpmovsxbw mem ymm', ('1*p5', 1)),
|
||||
('vpmovsxbd ymm ymm', ('1*p5', 1)),
|
||||
('vpmovsxbd mem ymm', ('1*p5', 1)),
|
||||
('vpmovsxbq ymm ymm', ('1*p5', 1)),
|
||||
('vpmovsxbq mem ymm', ('1*p5', 1)),
|
||||
# https://www.felixcloutier.com/x86/pmovzx
|
||||
('pmovzxbw xmm xmm', ('1*p5', 1)),
|
||||
('pmovzxbw mem xmm', ('1*p5', 1)),
|
||||
('vpmovzxbw xmm xmm', ('1*p5', 1)),
|
||||
('vpmovzxbw mem xmm', ('1*p5', 1)),
|
||||
('vpmovzxbw ymm ymm', ('1*p5', 1)),
|
||||
('vpmovzxbw mem ymm', ('1*p5', 1)),
|
||||
]
|
||||
).items()
|
||||
)
|
||||
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
('movaps xmm xmm', ('', 0)),
|
||||
('vmovaps xmm xmm', ('', 0)),
|
||||
('vmovaps ymm ymm', ('', 0)),
|
||||
bdw_mov_instructions = list(
|
||||
OrderedDict(
|
||||
hsw_mov_instructions
|
||||
+ [
|
||||
# https://www.felixcloutier.com/x86/cmovcc
|
||||
('cmova gpr gpr', ('2*p06', 1)),
|
||||
('cmova mem gpr', ('2*p06', 1)),
|
||||
('cmovae gpr gpr', ('1*p06', 1)),
|
||||
('cmovae mem gpr', ('2*p06', 1)),
|
||||
('cmovb gpr gpr', ('2*p06', 1)),
|
||||
('cmovb mem gpr', ('1*p06', 1)),
|
||||
('cmovbe gpr gpr', ('2*p06', 1)),
|
||||
('cmovbe mem gpr', ('2*p06', 1)),
|
||||
('cmovc gpr gpr', ('1*p06', 1)),
|
||||
('cmovc mem gpr', ('1*p06', 1)),
|
||||
('cmove gpr gpr', ('1*p06', 1)),
|
||||
('cmove mem gpr', ('1*p06', 1)),
|
||||
('cmovg gpr gpr', ('1*p06', 1)),
|
||||
('cmovg mem gpr', ('1*p06', 1)),
|
||||
('cmovge gpr gpr', ('1*p06', 1)),
|
||||
('cmovge mem gpr', ('1*p06', 1)),
|
||||
('cmovl gpr gpr', ('1*p06', 1)),
|
||||
('cmovl mem gpr', ('1*p06', 1)),
|
||||
('cmovle gpr gpr', ('1*p06', 1)),
|
||||
('cmovle mem gpr', ('1*p06', 1)),
|
||||
('cmovna gpr gpr', ('2*p06', 1)),
|
||||
('cmovna mem gpr', ('2*p06', 1)),
|
||||
('cmovnae gpr gpr', ('1*p06', 1)),
|
||||
('cmovnae mem gpr', ('1*p06', 1)),
|
||||
('cmovnb gpr gpr', ('1*p06', 1)),
|
||||
('cmovnb mem gpr', ('1*p06', 1)),
|
||||
('cmovnbe gpr gpr', ('2*p06', 1)),
|
||||
('cmovnbe mem gpr', ('2*p06', 1)),
|
||||
('cmovnb gpr gpr', ('1*p06', 1)),
|
||||
('cmovnb mem gpr', ('1*p06', 1)),
|
||||
('cmovnc gpr gpr', ('1*p06', 1)),
|
||||
('cmovnc mem gpr', ('1*p06', 1)),
|
||||
('cmovne gpr gpr', ('1*p06', 1)),
|
||||
('cmovne mem gpr', ('1*p06', 1)),
|
||||
('cmovng gpr gpr', ('1*p06', 1)),
|
||||
('cmovng mem gpr', ('1*p06', 1)),
|
||||
('cmovnge gpr gpr', ('1*p06', 1)),
|
||||
('cmovnge mem gpr', ('1*p06', 1)),
|
||||
('cmovnl gpr gpr', ('1*p06', 1)),
|
||||
('cmovnl mem gpr', ('1*p06', 1)),
|
||||
('cmovno gpr gpr', ('1*p06', 1)),
|
||||
('cmovno mem gpr', ('1*p06', 1)),
|
||||
('cmovnp gpr gpr', ('1*p06', 1)),
|
||||
('cmovnp mem gpr', ('1*p06', 1)),
|
||||
('cmovns gpr gpr', ('1*p06', 1)),
|
||||
('cmovns mem gpr', ('1*p06', 1)),
|
||||
('cmovnz gpr gpr', ('1*p06', 1)),
|
||||
('cmovnz mem gpr', ('1*p06', 1)),
|
||||
('cmovo gpr gpr', ('1*p06', 1)),
|
||||
('cmovo mem gpr', ('1*p06', 1)),
|
||||
('cmovp gpr gpr', ('1*p06', 1)),
|
||||
('cmovp mem gpr', ('1*p06', 1)),
|
||||
('cmovpe gpr gpr', ('1*p06', 1)),
|
||||
('cmovpe mem gpr', ('1*p06', 1)),
|
||||
('cmovpo gpr gpr', ('1*p06', 1)),
|
||||
('cmovpo mem gpr', ('1*p06', 1)),
|
||||
('cmovs gpr gpr', ('1*p06', 1)),
|
||||
('cmovs mem gpr', ('1*p06', 1)),
|
||||
('cmovz gpr gpr', ('1*p06', 1)),
|
||||
('cmovz mem gpr', ('1*p06', 1)),
|
||||
]
|
||||
).items()
|
||||
)
|
||||
|
||||
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||
('movdqa xmm xmm', ('', 0)),
|
||||
('vmovdqa xmm xmm', ('', 0)),
|
||||
('vmovdqa ymm ymm', ('', 0)),
|
||||
skx_mov_instructions = list(
|
||||
OrderedDict(
|
||||
bdw_mov_instructions
|
||||
+ [
|
||||
# https://www.felixcloutier.com/x86/movapd
|
||||
# TODO with masking!
|
||||
# TODO the following may eliminate or be bound to 1*p0156:
|
||||
# ('movapd xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovapd xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovapd ymm ymm', ('1*p5', 1)),
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
# TODO with masking!
|
||||
# TODO the following may eliminate or be bound to 1*p0156:
|
||||
# ('movaps xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovaps xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovaps ymm ymm', ('1*p5', 1)),
|
||||
# https://www.felixcloutier.com/x86/movbe
|
||||
('movbe gpr mem', ('1*p15', 4)),
|
||||
('movbe mem gpr', ('1*p15', 4)),
|
||||
# https://www.felixcloutier.com/x86/movddup
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movntdq
|
||||
('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movntdqa
|
||||
('vmovntdqa mem zmm', ('', 0)),
|
||||
# https://www.felixcloutier.com/x86/movntpd
|
||||
('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movntps
|
||||
('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
# https://www.felixcloutier.com/x86/movq2dq
|
||||
('movq2dq mm xmm', ('1*p0+1*p015', 1)),
|
||||
# https://www.felixcloutier.com/x86/movsd
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movshdup
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movsldup
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movss
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/movups
|
||||
# TODO with masking!
|
||||
# https://www.felixcloutier.com/x86/pmovsx
|
||||
# TODO with masking!
|
||||
('vpmovsxbw ymm zmm', ('1*p5', 3)),
|
||||
('vpmovsxbw mem zmm', ('1*p5', 1)),
|
||||
]
|
||||
).items()
|
||||
)
|
||||
|
||||
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||
('movdqu xmm xmm', ('', 0)),
|
||||
('vmovdqu xmm xmm', ('', 0)),
|
||||
('vmovdqu ymm ymm', ('', 0)),
|
||||
csx_mov_instructions = OrderedDict(skx_mov_instructions + []).items()
|
||||
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
('movupd xmm xmm', ('', 0)),
|
||||
('vmovupd xmm xmm', ('', 0)),
|
||||
('vmovupd ymm ymm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
('movups xmm xmm', ('', 0)),
|
||||
('vmovups xmm xmm', ('', 0)),
|
||||
('vmovups ymm ymm', ('', 0)),
|
||||
]).items())
|
||||
|
||||
hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [
|
||||
# https://www.felixcloutier.com/x86/mov
|
||||
('mov imd gpr', ('1*p0156', 1)),
|
||||
('mov gpr gpr', ('1*p0156', 1)),
|
||||
('movabs imd gpr', ('1*p0156', 1)), # AT&T version
|
||||
|
||||
# https://www.felixcloutier.com/x86/movbe
|
||||
('movbe gpr mem', ('1*p15', 6)),
|
||||
('movbe mem gpr', ('1*p15', 6)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movmskpd
|
||||
('movmskpd xmm gpr', ('1*p0', 3)),
|
||||
('vmovmskpd xmm gpr', ('1*p0', 3)),
|
||||
('vmovmskpd ymm gpr', ('1*p0', 3)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movmskps
|
||||
('movmskps xmm gpr', ('1*p0', 3)),
|
||||
('vmovmskps xmm gpr', ('1*p0', 3)),
|
||||
('vmovmskps ymm gpr', ('1*p0', 3)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movsx:movsxd
|
||||
('movsx gpr gpr', ('1*p0156', 1)),
|
||||
('movsb gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movsw gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movsl gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movsq gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
|
||||
# https://www.felixcloutier.com/x86/movzx
|
||||
('movzx gpr gpr', ('1*p0156', 1)),
|
||||
('movzb gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movzw gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movzl gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
('movzq gpr gpr', ('1*p0156', 1)), # AT&T version
|
||||
|
||||
# https://www.felixcloutier.com/x86/cmovcc
|
||||
('cmova gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmova mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovae gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovae mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovb gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovb mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovbe gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovbe mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovc gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovc mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmove gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmove mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovg gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovg mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovge gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovge mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovl gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovl mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovle gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovle mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovna gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovna mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovnae gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnae mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnbe gpr gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovnbe mem gpr', ('1*p0156+2*p06', 2)),
|
||||
('cmovnb gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnb mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnc gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnc mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovne gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovne mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovng gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovng mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnge gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnge mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnl gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnl mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovno gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovno mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnp gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnp mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovns gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovns mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnz gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovnz mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovo gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovo mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovp gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovp mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovpe gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovpe mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovpo gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovpo mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovs gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovs mem gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovz gpr gpr', ('1*p0156+1*p06', 2)),
|
||||
('cmovz mem gpr', ('1*p0156+1*p06', 2)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/pmovmskb
|
||||
('pmovmskb mm gpr', ('1*p0', 3)),
|
||||
('pmovmskb xmm gpr', ('1*p0', 3)),
|
||||
('vpmovmskb xmm gpr', ('1*p0', 3)),
|
||||
('vpmovmskb ymm gpr', ('1*p0', 3)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/pmovsx
|
||||
('pmovsxbw xmm xmm', ('1*p5', 1)),
|
||||
('pmovsxbw mem xmm', ('1*p5', 1)),
|
||||
('pmovsxbd xmm xmm', ('1*p5', 1)),
|
||||
('pmovsxbd mem xmm', ('1*p5', 1)),
|
||||
('pmovsxbq xmm xmm', ('1*p5', 1)),
|
||||
('pmovsxbq mem xmm', ('1*p5', 1)),
|
||||
('vpmovsxbw xmm xmm', ('1*p5', 1)),
|
||||
('vpmovsxbw mem xmm', ('1*p5', 1)),
|
||||
('vpmovsxbd xmm xmm', ('1*p5', 1)),
|
||||
('vpmovsxbd mem xmm', ('1*p5', 1)),
|
||||
('vpmovsxbq xmm xmm', ('1*p5', 1)),
|
||||
('vpmovsxbq mem xmm', ('1*p5', 1)),
|
||||
('vpmovsxbw ymm ymm', ('1*p5', 1)),
|
||||
('vpmovsxbw mem ymm', ('1*p5', 1)),
|
||||
('vpmovsxbd ymm ymm', ('1*p5', 1)),
|
||||
('vpmovsxbd mem ymm', ('1*p5', 1)),
|
||||
('vpmovsxbq ymm ymm', ('1*p5', 1)),
|
||||
('vpmovsxbq mem ymm', ('1*p5', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/pmovzx
|
||||
('pmovzxbw xmm xmm', ('1*p5', 1)),
|
||||
('pmovzxbw mem xmm', ('1*p5', 1)),
|
||||
('vpmovzxbw xmm xmm', ('1*p5', 1)),
|
||||
('vpmovzxbw mem xmm', ('1*p5', 1)),
|
||||
('vpmovzxbw ymm ymm', ('1*p5', 1)),
|
||||
('vpmovzxbw mem ymm', ('1*p5', 1)),
|
||||
]).items())
|
||||
|
||||
bdw_mov_instructions = list(OrderedDict(hsw_mov_instructions + [
|
||||
# https://www.felixcloutier.com/x86/cmovcc
|
||||
('cmova gpr gpr', ('2*p06', 1)),
|
||||
('cmova mem gpr', ('2*p06', 1)),
|
||||
('cmovae gpr gpr', ('1*p06', 1)),
|
||||
('cmovae mem gpr', ('2*p06', 1)),
|
||||
('cmovb gpr gpr', ('2*p06', 1)),
|
||||
('cmovb mem gpr', ('1*p06', 1)),
|
||||
('cmovbe gpr gpr', ('2*p06', 1)),
|
||||
('cmovbe mem gpr', ('2*p06', 1)),
|
||||
('cmovc gpr gpr', ('1*p06', 1)),
|
||||
('cmovc mem gpr', ('1*p06', 1)),
|
||||
('cmove gpr gpr', ('1*p06', 1)),
|
||||
('cmove mem gpr', ('1*p06', 1)),
|
||||
('cmovg gpr gpr', ('1*p06', 1)),
|
||||
('cmovg mem gpr', ('1*p06', 1)),
|
||||
('cmovge gpr gpr', ('1*p06', 1)),
|
||||
('cmovge mem gpr', ('1*p06', 1)),
|
||||
('cmovl gpr gpr', ('1*p06', 1)),
|
||||
('cmovl mem gpr', ('1*p06', 1)),
|
||||
('cmovle gpr gpr', ('1*p06', 1)),
|
||||
('cmovle mem gpr', ('1*p06', 1)),
|
||||
('cmovna gpr gpr', ('2*p06', 1)),
|
||||
('cmovna mem gpr', ('2*p06', 1)),
|
||||
('cmovnae gpr gpr', ('1*p06', 1)),
|
||||
('cmovnae mem gpr', ('1*p06', 1)),
|
||||
('cmovnb gpr gpr', ('1*p06', 1)),
|
||||
('cmovnb mem gpr', ('1*p06', 1)),
|
||||
('cmovnbe gpr gpr', ('2*p06', 1)),
|
||||
('cmovnbe mem gpr', ('2*p06', 1)),
|
||||
('cmovnb gpr gpr', ('1*p06', 1)),
|
||||
('cmovnb mem gpr', ('1*p06', 1)),
|
||||
('cmovnc gpr gpr', ('1*p06', 1)),
|
||||
('cmovnc mem gpr', ('1*p06', 1)),
|
||||
('cmovne gpr gpr', ('1*p06', 1)),
|
||||
('cmovne mem gpr', ('1*p06', 1)),
|
||||
('cmovng gpr gpr', ('1*p06', 1)),
|
||||
('cmovng mem gpr', ('1*p06', 1)),
|
||||
('cmovnge gpr gpr', ('1*p06', 1)),
|
||||
('cmovnge mem gpr', ('1*p06', 1)),
|
||||
('cmovnl gpr gpr', ('1*p06', 1)),
|
||||
('cmovnl mem gpr', ('1*p06', 1)),
|
||||
('cmovno gpr gpr', ('1*p06', 1)),
|
||||
('cmovno mem gpr', ('1*p06', 1)),
|
||||
('cmovnp gpr gpr', ('1*p06', 1)),
|
||||
('cmovnp mem gpr', ('1*p06', 1)),
|
||||
('cmovns gpr gpr', ('1*p06', 1)),
|
||||
('cmovns mem gpr', ('1*p06', 1)),
|
||||
('cmovnz gpr gpr', ('1*p06', 1)),
|
||||
('cmovnz mem gpr', ('1*p06', 1)),
|
||||
('cmovo gpr gpr', ('1*p06', 1)),
|
||||
('cmovo mem gpr', ('1*p06', 1)),
|
||||
('cmovp gpr gpr', ('1*p06', 1)),
|
||||
('cmovp mem gpr', ('1*p06', 1)),
|
||||
('cmovpe gpr gpr', ('1*p06', 1)),
|
||||
('cmovpe mem gpr', ('1*p06', 1)),
|
||||
('cmovpo gpr gpr', ('1*p06', 1)),
|
||||
('cmovpo mem gpr', ('1*p06', 1)),
|
||||
('cmovs gpr gpr', ('1*p06', 1)),
|
||||
('cmovs mem gpr', ('1*p06', 1)),
|
||||
('cmovz gpr gpr', ('1*p06', 1)),
|
||||
('cmovz mem gpr', ('1*p06', 1)),
|
||||
]).items())
|
||||
|
||||
skx_mov_instructions = list(OrderedDict(bdw_mov_instructions + [
|
||||
# https://www.felixcloutier.com/x86/movapd
|
||||
# TODO with masking!
|
||||
# TODO the following may eliminate or be bound to 1*p0156:
|
||||
# ('movapd xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovapd xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovapd ymm ymm', ('1*p5', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movaps
|
||||
# TODO with masking!
|
||||
# TODO the following may eliminate or be bound to 1*p0156:
|
||||
# ('movaps xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovaps xmm xmm', ('1*p5', 1)),
|
||||
# ('vmovaps ymm ymm', ('1*p5', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movbe
|
||||
('movbe gpr mem', ('1*p15', 4)),
|
||||
('movbe mem gpr', ('1*p15', 4)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movddup
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntdq
|
||||
('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntdqa
|
||||
('vmovntdqa mem zmm', ('', 0)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntpd
|
||||
('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
|
||||
# https://www.felixcloutier.com/x86/movntps
|
||||
('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use?
|
||||
|
||||
# https://www.felixcloutier.com/x86/movq2dq
|
||||
('movq2dq mm xmm', ('1*p0+1*p015', 1)),
|
||||
|
||||
# https://www.felixcloutier.com/x86/movsd
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/movshdup
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/movsldup
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/movss
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/movupd
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/movups
|
||||
# TODO with masking!
|
||||
|
||||
# https://www.felixcloutier.com/x86/pmovsx
|
||||
# TODO with masking!
|
||||
('vpmovsxbw ymm zmm', ('1*p5', 3)),
|
||||
('vpmovsxbw mem zmm', ('1*p5', 1)),
|
||||
]).items())
|
||||
|
||||
csx_mov_instructions = OrderedDict(skx_mov_instructions + [
|
||||
|
||||
]).items()
|
||||
|
||||
def get_description(arch, rhs_comment=None):
|
||||
descriptions = {
|
||||
@@ -803,7 +763,7 @@ def get_description(arch, rhs_comment=None):
|
||||
'hsw': '\n'.join([p7.process_item(*item) for item in hsw_mov_instructions]),
|
||||
'bdw': '\n'.join([p7.process_item(*item) for item in bdw_mov_instructions]),
|
||||
'skx': '\n'.join([p7.process_item(*item) for item in skx_mov_instructions]),
|
||||
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions])
|
||||
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions]),
|
||||
}
|
||||
|
||||
description = descriptions[arch]
|
||||
@@ -813,20 +773,21 @@ def get_description(arch, rhs_comment=None):
|
||||
|
||||
commented_description = ""
|
||||
for l in descriptions[arch].split('\n'):
|
||||
commented_description += ("{:<"+str(max_length)+"} # {}\n").format(l, rhs_comment)
|
||||
commented_description += ("{:<" + str(max_length) + "} # {}\n").format(l, rhs_comment)
|
||||
description = commented_description
|
||||
|
||||
return description
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx)".format(sys.argv[0]))
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
try:
|
||||
print(get_description(sys.argv[1], rhs_comment=' '.join(sys.argv)))
|
||||
except KeyError:
|
||||
print("Unknown architecture.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: Intel Haswell
|
||||
arch_code: HSW
|
||||
isa: x86
|
||||
|
||||
@@ -1,52 +1,35 @@
|
||||
osaca_version: 0.3.0
|
||||
osaca_version: 0.3.4
|
||||
isa: "AArch64"
|
||||
# Contains all operand-irregular instruction forms OSACA supports for AArch64.
|
||||
# Operand-regular for a AArch64 instruction form with N operands in the shape of
|
||||
# mnemonic op1 ... opN
|
||||
# means that op1 is the only destination operand and op2 to op(N) are source operands.
|
||||
instruction_forms:
|
||||
- name: "fmla"
|
||||
- name: fmla
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "s"
|
||||
prefix: "*"
|
||||
shape: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "s"
|
||||
prefix: "*"
|
||||
shape: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "s"
|
||||
prefix: "*"
|
||||
shape: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- name: "fmla"
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "d"
|
||||
source: true
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "v"
|
||||
shape: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
prefix: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
prefix: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
@@ -58,52 +41,14 @@ instruction_forms:
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
prefix: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: false
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: true
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: true
|
||||
- name: "ldp"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
prefix: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
@@ -115,14 +60,63 @@ instruction_forms:
|
||||
post-indexed: true
|
||||
source: true
|
||||
destination: true
|
||||
- name: "stp"
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
prefix: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "register"
|
||||
prefix: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: true
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: true
|
||||
- name: [ldr, ldur]
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
source: true
|
||||
destination: true
|
||||
- name: [ldr, ldur]
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: true
|
||||
post-indexed: false
|
||||
source: true
|
||||
destination: true
|
||||
- name: stp
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
prefix: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
@@ -134,14 +128,33 @@ instruction_forms:
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: "stp"
|
||||
- name: stp
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
prefix: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
prefix: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: true
|
||||
post-indexed: false
|
||||
source: false
|
||||
destination: true
|
||||
- name: stp
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
prefix: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
@@ -150,73 +163,13 @@ instruction_forms:
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
post-indexed: true
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
- name: [str, stur]
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "x"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: "*"
|
||||
post-indexed: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: "*"
|
||||
post-indexed: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- name: "str"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: "*"
|
||||
post-indexed: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- name: "stur"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "q"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
pre-indexed: "*"
|
||||
post-indexed: "*"
|
||||
source: false
|
||||
destination: true
|
||||
- name: "stur"
|
||||
operands:
|
||||
- class: "register"
|
||||
prefix: "d"
|
||||
prefix: "*"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.0
|
||||
osaca_version: 0.3.4
|
||||
isa: "x86"
|
||||
# Contains all operand-irregular instruction forms OSACA supports for x86.
|
||||
# Operand-regular for a x86 AT&T instruction form with N operands in the shape of
|
||||
@@ -2549,7 +2549,7 @@ instruction_forms:
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: mulpd
|
||||
- name: [mulpd, mulps]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
@@ -2559,7 +2559,597 @@ instruction_forms:
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pand, por, pxor]
|
||||
- name: [packssdw, packsswb, packusdw, packuswb]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [packssdw, packsswb, packusdw, packuswb]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [paddb, paddw, paddd, paddq, paddsb, paddsw, paddusb, paddusw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [paddb, paddw, paddd, paddq, paddsb, paddsw, paddusb, paddusw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pand, por, pxor, pandn]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pand, por, pxor, pandn]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pavgb, pavgw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pavgb, pavgw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pcmpeqb, pcmpeqw, pcmpeqd, pcmpeqq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pcmpeqb, pcmpeqw, pcmpeqd, pcmpeqq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pcmpgtb, pcmpgtw, pcmpgtd, pcmpgtq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pcmpgtb, pcmpgtw, pcmpgtd, pcmpgtq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pmaddubsw
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pmaddubsw
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pmaddwd
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pmaddwd
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pmaxsb, pmaxsw, pmaxsd, pmaxsq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pmaxsb, pmaxsw, pmaxsd, pmaxsq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pmaxub, pmaxuw, pmaxud, pmaxuq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pmaxub, pmaxuw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pminsb, pminsw, pminsd, pminsq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pminsb, pminsw, pminsd, pminsq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pminub, pminuw, pminud, pminuq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pminub, pminuw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pmuldq
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pmulhrsw, pmulhuw, pmulhw, pmullw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [pmulhrsw, pmulhuw, pmulhw, pmullw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pmuludq
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pmuludq
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: psadbw
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: psadbw
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pshufb
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pshufb
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psignb, psignw, psignd]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psignb, psignw, psignd]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psllw, pslld, psllq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psllw, pslld, psllq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psllw, pslld, psllq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psllw, pslld, psllq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: pslldq
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psraw, psrad, psraq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psraw, psrad, psraq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psraw, psrad, psraq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psraw, psrad, psraq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psrlw, psrld, psrlq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psrlw, psrld, psrlq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psrlw, psrld, psrlq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psrlw, psrld, psrlq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: psrldq
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psubb, psubw, psubd, psubq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psubb, psubw, psubd, psubq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psubsb, psubsw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psubsb, psubsw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psubusb, psubusw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [psubusb, psubusw]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [punpckhbw, punpckhwd, punpckhdq, punpckhqdq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [punpckhbw, punpckhwd, punpckhdq, punpckhqdq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [punpcklbw, punpcklwd, punpckldq, punpcklqdq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [punpcklbw, punpcklwd, punpckldq, punpcklqdq]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "mm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [sha1msg2, sha1nexte]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
@@ -2808,6 +3398,16 @@ instruction_forms:
|
||||
name: "CF"
|
||||
source: false
|
||||
destination: true
|
||||
- name: [subsd, subpd]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [subss, subps]
|
||||
operands:
|
||||
- class: "register"
|
||||
@@ -2829,6 +3429,10 @@ instruction_forms:
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
@@ -2855,6 +3459,10 @@ instruction_forms:
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
@@ -2881,6 +3489,10 @@ instruction_forms:
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
@@ -2904,6 +3516,10 @@ instruction_forms:
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
@@ -2916,6 +3532,16 @@ instruction_forms:
|
||||
name: "PF"
|
||||
source: false
|
||||
destination: true
|
||||
- name: [unpcklps, unpcklpd]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: vldmxcsr
|
||||
operands:
|
||||
- class: "memory"
|
||||
@@ -3125,6 +3751,60 @@ instruction_forms:
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [vptest, ptest]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: false
|
||||
destination: true
|
||||
- name: vptest
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: false
|
||||
destination: true
|
||||
- name: vzeroall
|
||||
operands: []
|
||||
hidden_operands:
|
||||
@@ -3281,7 +3961,7 @@ instruction_forms:
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [shl, shr]
|
||||
- name: [shl, shr, shlq, shrq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: Intel Ivy Bridge
|
||||
arch_code: IVB
|
||||
isa: x86
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
import os.path
|
||||
import argparse
|
||||
import os.path
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from distutils.version import StrictVersion
|
||||
@@ -8,8 +8,23 @@ from distutils.version import StrictVersion
|
||||
from osaca.parser import get_parser
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
intel_archs = ['CON', 'WOL', 'NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL',
|
||||
'CNL', 'ICL']
|
||||
intel_archs = [
|
||||
'CON',
|
||||
'WOL',
|
||||
'NHM',
|
||||
'WSM',
|
||||
'SNB',
|
||||
'IVB',
|
||||
'HSW',
|
||||
'BDW',
|
||||
'SKL',
|
||||
'SKX',
|
||||
'KBL',
|
||||
'CFL',
|
||||
'CNL',
|
||||
'ICL',
|
||||
]
|
||||
amd_archs = ['ZEN1', 'ZEN+', 'ZEN2']
|
||||
|
||||
|
||||
def port_pressure_from_tag_attributes(attrib):
|
||||
@@ -19,6 +34,7 @@ def port_pressure_from_tag_attributes(attrib):
|
||||
for p in attrib['ports'].split('+'):
|
||||
cycles, ports = p.split('*')
|
||||
ports = ports.lstrip('p')
|
||||
ports = ports.lstrip('FP')
|
||||
port_occupation.append([int(cycles), ports])
|
||||
|
||||
# Also consider div on DIV pipeline
|
||||
@@ -88,10 +104,10 @@ def extract_paramters(instruction_tag, parser, isa):
|
||||
return parameters
|
||||
|
||||
|
||||
def extract_model(tree, arch):
|
||||
def extract_model(tree, arch, skip_mem=True):
|
||||
try:
|
||||
isa = MachineModel.get_isa_for_arch(arch)
|
||||
except:
|
||||
except Exception:
|
||||
print("Skipping...", file=sys.stderr)
|
||||
return None
|
||||
mm = MachineModel(isa=isa)
|
||||
@@ -101,6 +117,7 @@ def extract_model(tree, arch):
|
||||
ignore = False
|
||||
|
||||
mnemonic = instruction_tag.attrib['asm']
|
||||
iform = instruction_tag.attrib['iform']
|
||||
# skip any mnemonic which contain spaces (e.g., "REX CRC32")
|
||||
if ' ' in mnemonic:
|
||||
continue
|
||||
@@ -118,6 +135,26 @@ def extract_model(tree, arch):
|
||||
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
|
||||
if arch_tag is None:
|
||||
continue
|
||||
# skip any instructions without port utilization
|
||||
if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]):
|
||||
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
|
||||
continue
|
||||
# skip if computed and measured TP don't match
|
||||
if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][
|
||||
0
|
||||
]:
|
||||
print(
|
||||
"Calculated TP from port utilization doesn't match TP, skip: ",
|
||||
iform,
|
||||
file=sys.stderr,
|
||||
)
|
||||
continue
|
||||
# skip if instruction contains memory operand
|
||||
if skip_mem and any(
|
||||
[x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')]
|
||||
):
|
||||
print("Contains memory operand, skip: ", iform, file=sys.stderr)
|
||||
continue
|
||||
# We collect all measurement and IACA information and compare them later
|
||||
for measurement_tag in arch_tag.iter('measurement'):
|
||||
if 'TP_ports' in measurement_tag.attrib:
|
||||
@@ -143,10 +180,14 @@ def extract_model(tree, arch):
|
||||
if 'max_cycles' in l_tag.attrib
|
||||
]
|
||||
if latencies[1:] != latencies[:-1]:
|
||||
print("Contradicting latencies found, using first:", mnemonic, latencies,
|
||||
file=sys.stderr)
|
||||
print(
|
||||
"Contradicting latencies found, using smallest:",
|
||||
iform,
|
||||
latencies,
|
||||
file=sys.stderr,
|
||||
)
|
||||
if latencies:
|
||||
latency = latencies[0]
|
||||
latency = min(latencies)
|
||||
if ignore:
|
||||
continue
|
||||
|
||||
@@ -160,16 +201,14 @@ def extract_model(tree, arch):
|
||||
# Check if all are equal
|
||||
if port_pressure:
|
||||
if port_pressure[1:] != port_pressure[:-1]:
|
||||
print(
|
||||
"Contradicting port occupancies, using latest IACA:",
|
||||
mnemonic, file=sys.stderr)
|
||||
print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
|
||||
port_pressure = port_pressure[-1]
|
||||
else:
|
||||
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
||||
continue
|
||||
|
||||
|
||||
# Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
|
||||
if arch.upper() in intel_archs and not arch.upper() in ['ICL']:
|
||||
if arch.upper() in intel_archs and not arch.upper() in ['ICL']:
|
||||
if any([p['class'] == 'memory' for p in parameters]):
|
||||
# We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
|
||||
# TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
|
||||
@@ -183,7 +222,7 @@ def extract_model(tree, arch):
|
||||
# Add (1, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
|
||||
if port_23 and not port_4:
|
||||
port_pressure.append((1, ['2D', '3D']))
|
||||
|
||||
|
||||
# Add missing ports:
|
||||
for ports in [pp[1] for pp in port_pressure]:
|
||||
for p in ports:
|
||||
@@ -201,7 +240,7 @@ def rhs_comment(uncommented_string, comment):
|
||||
|
||||
commented_string = ""
|
||||
for l in uncommented_string.split('\n'):
|
||||
commented_string += ("{:<"+str(max_length)+"} # {}\n").format(l, comment)
|
||||
commented_string += ("{:<" + str(max_length) + "} # {}\n").format(l, comment)
|
||||
return commented_string
|
||||
|
||||
|
||||
@@ -218,21 +257,33 @@ def main():
|
||||
help='architecture to extract, use IACA abbreviations (e.g., SNB). '
|
||||
'if not given, all will be extracted and saved to file in CWD.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--mem',
|
||||
dest='skip_mem',
|
||||
action='store_false',
|
||||
help='add instruction forms including memory addressing operands, which are '
|
||||
'skipped by default'
|
||||
)
|
||||
args = parser.parse_args()
|
||||
basename = os.path.basename(__file__)
|
||||
|
||||
tree = ET.parse(args.xml)
|
||||
print('Available architectures:', ', '.join(architectures(tree)))
|
||||
print('# Available architectures:', ', '.join(architectures(tree)))
|
||||
if args.arch:
|
||||
model = extract_model(tree, args.arch)
|
||||
print('# Chosen architecture: {}'.format(args.arch))
|
||||
model = extract_model(tree, args.arch, args.skip_mem)
|
||||
if model is not None:
|
||||
print(rhs_comment(model.dump(), basename+" "+sys.argv[0]))
|
||||
print(
|
||||
rhs_comment(
|
||||
model.dump(), basename + " " + args.xml.split('/')[-1] + " " + args.arch
|
||||
)
|
||||
)
|
||||
else:
|
||||
for arch in architectures(tree):
|
||||
print(arch, end='')
|
||||
model = extract_model(tree, arch.lower())
|
||||
model = extract_model(tree, arch.lower(), args.skip_mem)
|
||||
if model:
|
||||
model_string = rhs_comment(model.dump(), basename+" "+arch)
|
||||
model_string = rhs_comment(model.dump(), basename + " " + arch)
|
||||
|
||||
with open('{}.yml'.format(arch.lower()), 'w') as f:
|
||||
f.write(model_string)
|
||||
|
||||
771
osaca/data/n1.yml
Normal file
771
osaca/data/n1.yml
Normal file
@@ -0,0 +1,771 @@
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: Arm Neoverse N1
|
||||
arch_code: n1
|
||||
isa: AArch64
|
||||
ROB_size: 128 # wikichip
|
||||
retired_uOps_per_cycle: 8 # wikichip
|
||||
scheduler_size: 120 # wikichip
|
||||
hidden_loads: false
|
||||
load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 5.0, q: 6.0, v: 5.0, z: 4.0}
|
||||
load_throughput:
|
||||
- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
|
||||
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
|
||||
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
|
||||
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
|
||||
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
|
||||
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
|
||||
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
|
||||
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
|
||||
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
|
||||
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
|
||||
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
|
||||
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
|
||||
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
|
||||
load_throughput_default: [[1, '67']]
|
||||
store_throughput: []
|
||||
store_throughput_default: [[1, '56'], [1, '67']]
|
||||
ports: ['0', '1', '2', '3', '4', '4DV', '5', '6', '7']
|
||||
port_model_scheme: |
|
||||
+----------------------------------------------------------------------------+
|
||||
| 120 entries |
|
||||
+----------------------------------------------------------------------------+
|
||||
0 |BR 1 |IS0 2 |IS1 3 |IM0 4 |FP0 5 |FP1 6 |LDST 7 |LDST
|
||||
\/ \/ \/ \/ \/ \/ \/ \/
|
||||
+------+ +-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+
|
||||
|Branch| | INT | | INT | | INT | | FP ALU | | FP ALU | | AGU | | AGU |
|
||||
+------+ | ALU | | ALU | | ALU | +--------+ +--------+ +-------+ +-------+
|
||||
+-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+
|
||||
+-----+ +-----+ | FP MUL | | FP MUL | |LD DATA| |LD DATA|
|
||||
| ST | | INT | +--------+ +--------+ +-------+ +-------+
|
||||
| INT | | MUL | +--------+ +---------+
|
||||
+-----+ +-----+ | FP DIV | |SIMD SHFT|
|
||||
+-----+ +--------+ +---------+
|
||||
| INT | +--------+ +--------+
|
||||
| DIV | | FMA | | FMA |
|
||||
+-----+ +--------+ +--------+
|
||||
+-----+ +--------+ +--------+
|
||||
|SHIFT| | ST SIMD| | ST SIMD|
|
||||
+-----+ | DATA | | DATA |
|
||||
+-----+ +--------+ +--------+
|
||||
| ST |
|
||||
| INT |
|
||||
+-----+
|
||||
instruction_forms:
|
||||
- name: add
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p123
|
||||
port_pressure: [[1, '123']]
|
||||
- name: add
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p123
|
||||
port_pressure: [[1, '123']]
|
||||
- name: adds
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p123
|
||||
port_pressure: [[1, '132']]
|
||||
- name: b.ne
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 1.0
|
||||
latency: 0.0
|
||||
port_pressure: [[1, '0']]
|
||||
- name: b.gt
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 1.0
|
||||
latency: 0.0
|
||||
port_pressure: [[1, '0']]
|
||||
- name: bne
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 1.0
|
||||
latency: 0.0
|
||||
port_pressure: [[1, '0']]
|
||||
- name: cmp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: w
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p123
|
||||
port_pressure: [[1, '123']]
|
||||
- name: cmp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.3333333
|
||||
latency: 1.0 # 1*p123
|
||||
port_pressure: [[1, '123']]
|
||||
- name: dup
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: d
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fadd
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fdiv
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: 128
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: 128
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: 128
|
||||
throughput: 6.0
|
||||
latency: 8.0 # 1*p4+6*p4DV
|
||||
port_pressure: [[1, '4'], [6, [4DV]]]
|
||||
- name: fdiv
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: 128
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: 128
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: 128
|
||||
throughput: 10.0
|
||||
latency: 12.0 # 1*p4+10*p4DV
|
||||
port_pressure: [[4, '0'], [10, [4DV]]]
|
||||
- name: fmla
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fmla
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fmov
|
||||
operands:
|
||||
- {class: register, prefix: s}
|
||||
- {class: immediate, imd: double}
|
||||
latency: ~ # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
throughput: 0.5
|
||||
- name: fmul
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fmul
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fmul
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: frecpe
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
throughput: 2.0
|
||||
latency: 4.0 # 1*p4
|
||||
port_pressure: [[2, '4']]
|
||||
- name: frecpe
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
throughput: 1.0
|
||||
latency: 3.0 # 1*p4
|
||||
port_pressure: [[1, '4']]
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 2*p67, from n1 opt guide
|
||||
port_pressure: [[2, '67']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 2*p67+1*p123, from n1 opt guide
|
||||
port_pressure: [[2, '67'], [1, '123']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 7.0 # 2*p67, from n1 opt guide
|
||||
port_pressure: [[2, '67']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: ~
|
||||
index: ~
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 7.0 # 2*p67+1*p123, from n1 opt guide
|
||||
port_pressure: [[2, '56'], [1, '123']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 7.0 # 2*p67
|
||||
port_pressure: [[2, '67']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: true
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 7.0 # 2*p67+1*p123
|
||||
port_pressure: [[2, '67'], [1, '123']]
|
||||
- name: ldp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 2*p67+1*p123
|
||||
port_pressure: [[2, '67'], [1, '123']]
|
||||
- name: ldur # JL: assumed from n1 opt guide
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p67
|
||||
port_pressure: [[1, '67']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p67
|
||||
port_pressure: [[1, '67']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 5.0 # 1*p67
|
||||
port_pressure: [[1, '67']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: imd
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 5.0 # 1*p67
|
||||
port_pressure: [[1, '67']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
post-indexed: false
|
||||
pre-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 5.0 # 1*p67
|
||||
port_pressure: [[1, '67']]
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: ldr
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
throughput: 0.0
|
||||
latency: 0.0
|
||||
port_pressure: []
|
||||
- name: mov
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p3456
|
||||
port_pressure: [[1, '3456']]
|
||||
- name: mov
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: b
|
||||
width: '*'
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: b
|
||||
width: '*'
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p45
|
||||
port_pressure: [[1, '45']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 0 # 2*p45+1*p67
|
||||
port_pressure: [[2, '45'], [1, '67']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 0 # 2*p45+2*p67+1*123
|
||||
port_pressure: [[2, '45'], [2, '67'], [1, '123']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 0 # 2*p45+2*p67
|
||||
port_pressure: [[2, '45'], [2, '67']]
|
||||
- name: stur # JL: assumed from n1 opt guide
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 0 # 1*p67+1*p23
|
||||
port_pressure: [[1, '56'], [1, '23']]
|
||||
- name: stur # JL: assumed from n1 opt guide
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 0 # 2*p67+1*p45
|
||||
port_pressure: [[2, '67'], [1, '45']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 0 # 1*p67+1*p23
|
||||
port_pressure: [[1, '56'], [1, '23']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 0.5
|
||||
latency: 0 # 1*p67+1*p45
|
||||
port_pressure: [[1, '67'], [1, '45']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: d
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 0.5
|
||||
latency: 0 # 1*p67+1*p45+1*p123
|
||||
port_pressure: [[1, '67'], [1, '45'], [1, '123']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: 1
|
||||
pre-indexed: false
|
||||
post-indexed: false
|
||||
throughput: 1.0
|
||||
latency: 0 # 2*p67+1*p45
|
||||
port_pressure: [[1, '67'], [1, '45']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 0 # 1*p67+1*p45+1*123
|
||||
port_pressure: [[1, '67'], [1, '45'], [1, '123']]
|
||||
- name: str
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: false
|
||||
post-indexed: true
|
||||
throughput: 1.0
|
||||
latency: 0 # 1*p67+1*p23+1*p123
|
||||
port_pressure: [[1, '67'], [1, '23'], [1, '123']]
|
||||
- name: sub
|
||||
operands:
|
||||
- class: register
|
||||
prefix: w
|
||||
- class: register
|
||||
prefix: w
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.33333333
|
||||
latency: 1.0 # 1*p123
|
||||
port_pressure: [[1, '123']]
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: Intel Skylake SP
|
||||
arch_code: SKX
|
||||
isa: x86
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: Intel Sandy Bridge
|
||||
arch_code: SNB
|
||||
isa: x86
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: Thunder X2
|
||||
arch_code: tx2
|
||||
isa: AArch64
|
||||
@@ -267,6 +267,34 @@ instruction_forms:
|
||||
throughput: 0.5
|
||||
latency: 6.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: frecpe
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: s
|
||||
throughput: 0.5
|
||||
latency: 5.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: frecpe
|
||||
operands:
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
- class: register
|
||||
prefix: v
|
||||
shape: d
|
||||
throughput: 0.5
|
||||
latency: 5.0 # 1*p01
|
||||
port_pressure: [[1, '01']]
|
||||
- name: fsub
|
||||
operands:
|
||||
- class: register
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: AMD Zen (family 17h)
|
||||
arch_code: ZEN1
|
||||
isa: x86
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.2
|
||||
osaca_version: 0.3.4
|
||||
micro_architecture: AMD Zen2
|
||||
arch_code: ZEN2
|
||||
isa: x86
|
||||
|
||||
@@ -17,7 +17,7 @@ MODULE_DATA_DIR = os.path.join(
|
||||
)
|
||||
LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/')
|
||||
DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')
|
||||
SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'ZEN2', 'TX2']
|
||||
SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'ZEN2', 'TX2', 'N1']
|
||||
|
||||
|
||||
# Stolen from pip
|
||||
@@ -71,7 +71,7 @@ def create_parser(parser=None):
|
||||
parser.add_argument(
|
||||
'--arch',
|
||||
type=str,
|
||||
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, ZEN2, TX2).',
|
||||
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, ZEN2, TX2, N1).',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--fixed',
|
||||
|
||||
@@ -8,6 +8,7 @@ class BaseParser(object):
|
||||
DIRECTIVE_ID = 'directive'
|
||||
IMMEDIATE_ID = 'immediate'
|
||||
LABEL_ID = 'label'
|
||||
IDENTIFIER_ID = 'identifier'
|
||||
MEMORY_ID = 'memory'
|
||||
REGISTER_ID = 'register'
|
||||
SEGMENT_EXT_ID = 'segment_extension'
|
||||
|
||||
@@ -19,22 +19,23 @@ class ParserAArch64v81(BaseParser):
|
||||
pp.ZeroOrMore(pp.Word(pp.printables))
|
||||
).setResultsName(self.COMMENT_ID)
|
||||
# Define ARM assembly identifier
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
|
||||
).setResultsName('value')
|
||||
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
|
||||
relocation = pp.Combine(pp.Literal(':') + pp.Word(pp.alphanums + '_') + pp.Literal(':'))
|
||||
first = pp.Word(pp.alphas + '_.', exact=1)
|
||||
rest = pp.Word(pp.alphanums + '_.')
|
||||
identifier = pp.Group(
|
||||
pp.Optional(relocation).setResultsName('relocation')
|
||||
+ pp.Combine(first + pp.Optional(rest)).setResultsName('name')
|
||||
).setResultsName('identifier')
|
||||
+ pp.Optional(pp.Suppress(pp.Literal('+')) + (hex_number | decimal_number).setResultsName('offset'))
|
||||
).setResultsName(self.IDENTIFIER_ID)
|
||||
# Label
|
||||
self.label = pp.Group(
|
||||
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
|
||||
).setResultsName(self.LABEL_ID)
|
||||
# Directive
|
||||
decimal_number = pp.Combine(
|
||||
pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)
|
||||
).setResultsName('value')
|
||||
hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value')
|
||||
directive_option = pp.Combine(
|
||||
pp.Word(pp.alphas + '#@.%', exact=1)
|
||||
+ pp.Optional(pp.Word(pp.printables + ' ', excludeChars=','))
|
||||
@@ -317,6 +318,8 @@ class ParserAArch64v81(BaseParser):
|
||||
return self.process_immediate(operand[self.IMMEDIATE_ID])
|
||||
if self.LABEL_ID in operand:
|
||||
return self.process_label(operand[self.LABEL_ID])
|
||||
if self.IDENTIFIER_ID in operand:
|
||||
return self.process_identifier(operand[self.IDENTIFIER_ID])
|
||||
return operand
|
||||
|
||||
def process_memory_address(self, memory_address):
|
||||
@@ -396,6 +399,13 @@ class ParserAArch64v81(BaseParser):
|
||||
label['name'] = label['name']['name']
|
||||
return AttrDict({self.LABEL_ID: label})
|
||||
|
||||
def process_identifier(self, identifier):
|
||||
"""Post-process identifier operand"""
|
||||
# remove value if it consists of symbol+offset
|
||||
if 'value' in identifier:
|
||||
del identifier['value']
|
||||
return AttrDict({self.IDENTIFIER_ID: identifier})
|
||||
|
||||
def get_full_reg_name(self, register):
|
||||
"""Return one register name string including all attributes"""
|
||||
if 'lanes' in register:
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
import string
|
||||
|
||||
import pyparsing as pp
|
||||
@@ -34,8 +33,14 @@ class ParserX86ATT(BaseParser):
|
||||
+ pp.Optional(relocation).setResultsName('relocation')
|
||||
).setResultsName('identifier')
|
||||
# Label
|
||||
numeric_identifier = pp.Group(
|
||||
pp.Word(pp.nums).setResultsName('name')
|
||||
+ pp.Optional(pp.oneOf('b f', caseless=True).setResultsName('suffix'))
|
||||
).setResultsName('identifier')
|
||||
self.label = pp.Group(
|
||||
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
|
||||
(identifier | numeric_identifier).setResultsName('name')
|
||||
+ pp.Literal(':')
|
||||
+ pp.Optional(self.comment)
|
||||
).setResultsName(self.LABEL_ID)
|
||||
# Register: pp.Regex('^%[0-9a-zA-Z]+{}{z},?')
|
||||
self.register = pp.Group(
|
||||
@@ -44,7 +49,7 @@ class ParserX86ATT(BaseParser):
|
||||
+ pp.Optional(pp.Literal('(') + pp.Word(pp.nums) + pp.Literal(')'))
|
||||
+ pp.Optional(
|
||||
pp.Literal('{')
|
||||
+ pp.Literal('%')
|
||||
+ pp.Optional(pp.Suppress(pp.Literal('%')))
|
||||
+ pp.Word(pp.alphanums).setResultsName('mask')
|
||||
+ pp.Literal('}')
|
||||
+ pp.Optional(
|
||||
@@ -99,7 +104,7 @@ class ParserX86ATT(BaseParser):
|
||||
+ pp.Literal(')')
|
||||
+ pp.Optional(
|
||||
pp.Literal('{')
|
||||
+ pp.Literal('%')
|
||||
+ pp.Optional(pp.Suppress(pp.Literal('%')))
|
||||
+ pp.Word(pp.alphanums).setResultsName('mask')
|
||||
+ pp.Literal('}')
|
||||
)
|
||||
@@ -132,7 +137,9 @@ class ParserX86ATT(BaseParser):
|
||||
pp.alphanums
|
||||
).setResultsName('mnemonic')
|
||||
# Combine to instruction form
|
||||
operand_first = pp.Group(self.register ^ immediate ^ memory ^ identifier)
|
||||
operand_first = pp.Group(
|
||||
self.register ^ immediate ^ memory ^ identifier ^ numeric_identifier
|
||||
)
|
||||
operand_rest = pp.Group(self.register ^ immediate ^ memory)
|
||||
self.instruction_parser = (
|
||||
mnemonic
|
||||
@@ -305,7 +312,7 @@ class ParserX86ATT(BaseParser):
|
||||
def process_label(self, label):
|
||||
"""Post-process label asm line"""
|
||||
# remove duplicated 'name' level due to identifier
|
||||
label['name'] = label['name']['name']
|
||||
label['name'] = label['name'][0]['name']
|
||||
return AttrDict({self.LABEL_ID: label})
|
||||
|
||||
def process_immediate(self, immediate):
|
||||
|
||||
@@ -241,6 +241,7 @@ class MachineModel(object):
|
||||
"""Return ISA for given micro-arch ``arch``."""
|
||||
arch_dict = {
|
||||
'tx2': 'aarch64',
|
||||
'n1': 'aarch64',
|
||||
'zen1': 'x86',
|
||||
'zen+': 'x86',
|
||||
'zen2': 'x86',
|
||||
|
||||
Reference in New Issue
Block a user