Compare commits

...

35 Commits

Author SHA1 Message Date
Julian Hammer
3308f5d68f version bump 2020-08-05 10:59:10 +02:00
JanLJL
0db8b6bcbf fixed first character match for symbolic identifiers 2020-08-03 18:30:29 +02:00
Jan
40755b2080 Merge pull request #49 from RRZE-HPC/coherent_label_parsing
Coherent label parsing
2020-08-03 18:25:20 +02:00
JanLJL
269148c2a1 save b/f in numeric identifier as suffix tag 2020-08-03 18:08:29 +02:00
JanLJL
12a8506530 removed unnecessary code 2020-08-03 17:14:58 +02:00
JanLJL
e715badcf9 detects numeric label as label 2020-08-03 16:59:48 +02:00
Julian Hammer
d6b4355a77 labels may now start with numbers 2020-08-03 15:53:29 +02:00
JanLJL
5361b63b52 version bump 2020-08-03 09:38:50 +02:00
JanLJL
cc39342047 minor enhancement for mask parsing 2020-08-03 09:07:45 +02:00
Jan
acbde7a19c Merge pull request #48 from RRZE-HPC/n1
initial implementation of Neoverse N1 support
2020-07-02 09:32:54 +02:00
Cloud User
34e978d2ae initial implementation of Neoverse N1 support 2020-06-30 20:28:57 +00:00
JanLJL
6801229275 PEP8 adjustments 2020-06-25 21:56:18 +02:00
JanLJL
d3d1a89600 two new instrs 2020-06-25 21:55:10 +02:00
JanLJL
93c1951097 prettified aarch64 ISA DB 2020-06-25 21:54:52 +02:00
JanLJL
7211dd0799 improvements for uops.info importer script 2020-06-25 21:53:41 +02:00
JanLJL
94d7d35c0b more instructions 2020-05-04 18:50:58 +02:00
JanLJL
1009c60d2d fixed wrong output format for 3-digit TP numbers 2020-04-08 21:28:50 +02:00
JanLJL
229b316b6d added some instructions 2020-04-08 15:54:31 +02:00
JanLJL
c0753be899 added python 3.7/3.8 to tests 2020-04-02 09:20:08 +02:00
JanLJL
eaa56792ab added bs4 dependency for Travis 2020-04-02 09:08:08 +02:00
JanLJL
3425fa3024 added tests 2020-04-02 08:57:26 +02:00
JanLJL
38924b6ec1 more instructions 2020-03-30 18:27:33 +02:00
JanLJL
d6ae457de4 removed duplicates in CSX DB 2020-03-30 18:18:35 +02:00
JanLJL
a5c2ab1a4a bugfix for online check of operands 2020-03-26 11:46:46 +01:00
JanLJL
e4393189dc minor update 2020-03-26 11:06:11 +01:00
JanLJL
3016fc7c46 added more tests 2020-03-26 10:19:14 +01:00
JanLJL
82f47d217c Merge branch 'master' of github.com:RRZE-HPC/osaca 2020-03-26 10:03:23 +01:00
JanLJL
1754df42d2 enhanced x86 parser for directives 2020-03-26 10:02:39 +01:00
Julian Hammer
ac1295aac2 flag string in output now in line with required flags 2020-03-24 16:02:40 +01:00
Julian Hammer
9624e6c109 closing cache file after dump 2020-03-24 15:20:49 +01:00
Julian Hammer
2d16037c44 Merge branch 'master' of github.com:RRZE-HPC/OSACA 2020-03-21 17:18:37 +01:00
Julian Hammer
c5801cfe2f closing cache file 2020-03-21 17:18:04 +01:00
Julian Hammer
3e960dd4ac closing cache file 2020-03-20 15:02:30 +01:00
JanLJL
680774267d fixed wrong import of mm registers 2020-03-17 12:56:12 +01:00
JanLJL
1aa710f195 enhanced MachineModel to support mask/zeroing differentiation for instruction forms 2020-03-17 12:55:37 +01:00
24 changed files with 3639 additions and 22326 deletions

View File

@@ -4,10 +4,12 @@ python:
- "3.5"
- "3.6"
# Python 3.7 not working yet
# - "3.7"
- "3.7"
- "3.8"
before_install:
# - pip install tox-travis
- pip install codecov
- pip install bs4
- pip install pygraphviz
- pip install kerncraft
install:

View File

@@ -1,6 +1,6 @@
"""Open Source Architecture Code Analyzer"""
name = 'osaca'
__version__ = '0.3.3.dev0'
__version__ = '0.3.6'
# To trigger travis deployment to pypi, do the following:
# 1. Increment __version___

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -20,8 +20,8 @@ class MOVEntryBuilder:
return load, store
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0, comment=None):
self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None
):
if comment:
comment = " # " + comment
else:
@@ -32,10 +32,7 @@ class MOVEntryBuilder:
if ot == 'imd':
description += ' - class: immediate\n imd: int\n'
elif ot.startswith('mem'):
description += (
' - class: memory\n'
' base: "*"\n'
' offset: "*"\n')
description += ' - class: memory\n' ' base: "*"\n' ' offset: "*"\n'
if ot == 'mem_simple':
description += ' index: ~\n'
elif ot == 'mem_complex':
@@ -50,11 +47,13 @@ class MOVEntryBuilder:
' latency: {latency}\n'
' port_pressure: {port_pressure!r}\n'
' throughput: {throughput}\n'
' uops: {uops}\n').format(
' uops: {uops}\n'
).format(
latency=latency,
port_pressure=port_pressure,
throughput=self.compute_throughput(port_pressure),
uops=sum([i for i,p in port_pressure]))
uops=sum([i for i, p in port_pressure]),
)
return description
def parse_port_pressure(self, port_pressure_str):
@@ -84,9 +83,7 @@ class MOVEntryBuilder:
class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
# for SNB and IVB
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0):
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
load, store = self.classify(operand_types)
comment = None
@@ -100,15 +97,14 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
comment = "with store"
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency, comment)
self, instruction_name, operand_types, port_pressure, latency, comment
)
class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
# for HSW, BDW, SKX and CSX
def build_description(
self, instruction_name, operand_types,
port_pressure=[], latency=0):
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
load, store = self.classify(operand_types)
if load:
@@ -116,7 +112,8 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
latency += 4
comment = "with load"
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency, comment)
self, instruction_name, operand_types, port_pressure, latency, comment
)
if store:
port_pressure_simple = port_pressure + [[1, '237'], [1, '4']]
operands_simple = ['mem_simple' if o == 'mem' else o for o in operand_types]
@@ -125,16 +122,28 @@ class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder):
latency += 0
return (
MOVEntryBuilder.build_description(
self, instruction_name, operands_simple, port_pressure_simple, latency,
"with store, simple AGU") +
'\n' +
MOVEntryBuilder.build_description(
self, instruction_name, operands_complex, port_pressure_complex, latency,
"with store, complex AGU"))
self,
instruction_name,
operands_simple,
port_pressure_simple,
latency,
"with store, simple AGU",
)
+ '\n'
+ MOVEntryBuilder.build_description(
self,
instruction_name,
operands_complex,
port_pressure_complex,
latency,
"with store, complex AGU",
)
)
# Register only:
return MOVEntryBuilder.build_description(
self, instruction_name, operand_types, port_pressure, latency)
self, instruction_name, operand_types, port_pressure, latency
)
np7 = MOVEntryBuilderIntelNoPort7AGU()
@@ -149,7 +158,6 @@ snb_mov_instructions = [
('mov imd gpr', ('1*p015', 1)),
('mov imd mem', ('', 0)),
('movabs imd gpr', ('1*p015', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('1*p5', 1)),
('movapd xmm mem', ('', 0)),
@@ -160,7 +168,6 @@ snb_mov_instructions = [
('vmovapd ymm ymm', ('1*p5', 1)),
('vmovapd ymm mem', ('', 0)),
('vmovapd mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('1*p5', 1)),
('movaps xmm mem', ('', 0)),
@@ -171,7 +178,6 @@ snb_mov_instructions = [
('vmovaps ymm ymm', ('1*p5', 1)),
('movaps ymm mem', ('', 0)),
('movaps mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movd:movq
('movd gpr mm', ('1*p5', 1)),
('movd mem mm', ('', 0)),
@@ -197,7 +203,6 @@ snb_mov_instructions = [
('vmovd xmm mem', ('', 0)),
('vmovq xmm gpr', ('1*p0', 1)),
('vmovq xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movddup
('movddup xmm xmm', ('1*p5', 1)),
('movddup mem xmm', ('', 0)),
@@ -205,10 +210,8 @@ snb_mov_instructions = [
('vmovddup mem xmm', ('', 0)),
('vmovddup ymm ymm', ('1*p5', 1)),
('vmovddup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdq2q
('movdq2q xmm mm', ('1*p015+1*p5', 1)),
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('1*p015', 1)),
('movdqa mem xmm', ('', 0)),
@@ -219,7 +222,6 @@ snb_mov_instructions = [
('vmovdqa ymm ymm', ('1*p05', 1)),
('vmovdqa mem ymm', ('', 0)),
('vmovdqa ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('1*p015', 1)),
('movdqu mem xmm', ('', 0)),
@@ -230,75 +232,60 @@ snb_mov_instructions = [
('vmovdqu ymm ymm', ('1*p05', 1)),
('vmovdqu mem ymm', ('', 0)),
('vmovdqu ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movhlps
('movhlps xmm xmm', ('1*p5', 1)),
('vmovhlps xmm xmm xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movhpd
('movhpd mem xmm', ('1*p5', 1)),
('vmovhpd mem xmm xmm', ('1*p5', 1)),
('movhpd xmm mem', ('', 0)),
('vmovhpd mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movhps
('movhps mem xmm', ('1*p5', 1)),
('vmovhps mem xmm xmm', ('1*p5', 1)),
('movhps xmm mem', ('', 0)),
('vmovhps mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movlhps
('movlhps xmm xmm', ('1*p5', 1)),
('vmovlhps xmm xmm xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movlpd
('movlpd mem xmm', ('1*p5', 1)),
('vmovlpd mem xmm xmm', ('1*p5', 1)),
('movlpd xmm mem', ('', 0)),
('vmovlpd mem xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movlps
('movlps mem xmm', ('1*p5', 1)),
('vmovlps mem xmm xmm', ('1*p5', 1)),
('movlps xmm mem', ('', 0)),
('vmovlps mem xmm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 2)),
('vmovmskpd xmm gpr', ('1*p0', 2)),
('vmovmskpd ymm gpr', ('1*p0', 2)),
# https://www.felixcloutier.com/x86/movmskps
('movmskps xmm gpr', ('1*p0', 1)),
('vmovmskps xmm gpr', ('1*p0', 1)),
('vmovmskps ymm gpr', ('1*p0', 1)),
# https://www.felixcloutier.com/x86/movntdq
('movntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntdq xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntdq ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('movntdqa mem xmm', ('', 0)),
('vmovntdqa mem xmm', ('', 0)),
('vmovntdqa mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movnti
('movnti gpr mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntpd
('movntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntpd xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntpd ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('movntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntps xmm mem', ('', 0)), # TODO NT-store: what latency to use?
('vmovntps ymm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntq
('movntq mm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq
('movq mm mm', ('', 0)),
('movq mem mm', ('', 0)),
@@ -309,14 +296,11 @@ snb_mov_instructions = [
('vmovq xmm xmm', ('1*p015', 1)),
('vmovq mem xmm', ('', 0)),
('vmovq xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p015', 1)),
# https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq
# TODO combined load-store is currently not supported
# ('movs mem mem', ()),
# https://www.felixcloutier.com/x86/movsd
('movsd xmm xmm', ('1*p5', 1)),
('movsd mem xmm', ('', 0)),
@@ -324,7 +308,6 @@ snb_mov_instructions = [
('vmovsd xmm xmm xmm', ('1*p5', 1)),
('vmovsd mem xmm', ('', 0)),
('vmovsd xmm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movshdup
('movshdup xmm xmm', ('1*p5', 1)),
('movshdup mem xmm', ('', 0)),
@@ -332,7 +315,6 @@ snb_mov_instructions = [
('vmovshdup mem xmm', ('', 0)),
('vmovshdup ymm ymm', ('1*p5', 1)),
('vmovshdup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movsldup
('movsldup xmm xmm', ('1*p5', 1)),
('movsldup mem xmm', ('', 0)),
@@ -340,7 +322,6 @@ snb_mov_instructions = [
('vmovsldup mem xmm', ('', 0)),
('vmovsldup ymm ymm', ('1*p5', 1)),
('vmovsldup mem ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movss
('movss xmm xmm', ('1*p5', 1)),
('movss mem xmm', ('', 0)),
@@ -349,7 +330,6 @@ snb_mov_instructions = [
('vmovss xmm xmm', ('1*p5', 1)),
('vmovss xmm mem', ('', 0)),
('movss mem xmm', ('', 0)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('1*p015', 1)),
('movsx mem gpr', ('', 0)),
@@ -363,7 +343,6 @@ snb_mov_instructions = [
('movsl mem gpr', ('', 0)), # AT&T version
('movsq gpr gpr', ('1*p015', 1)), # AT&T version
('movsq mem gpr', ('', 0)), # AT&T version
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('1*p5', 1)),
('movupd mem xmm', ('', 0)),
@@ -374,7 +353,6 @@ snb_mov_instructions = [
('vmovupd ymm ymm', ('1*p5', 1)),
('vmovupd mem ymm', ('', 0)),
('vmovupd ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movups
('movups xmm xmm', ('1*p5', 1)),
('movups mem xmm', ('', 0)),
@@ -385,7 +363,6 @@ snb_mov_instructions = [
('vmovups ymm ymm', ('1*p5', 1)),
('vmovups mem ymm', ('', 0)),
('vmovups ymm mem', ('', 0)),
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('1*p015', 1)),
('movzx mem gpr', ('', 0)),
@@ -397,7 +374,6 @@ snb_mov_instructions = [
('movzl mem gpr', ('', 0)), # AT&T version
('movzq gpr gpr', ('1*p015', 1)), # AT&T version
('movzq mem gpr', ('', 0)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p015+2*p05', 2)),
('cmova mem gpr', ('1*p015+2*p05', 2)),
@@ -459,12 +435,10 @@ snb_mov_instructions = [
('cmovs mem gpr', ('1*p015+1*p05', 2)),
('cmovz gpr gpr', ('1*p015+1*p05', 2)),
('cmovz mem gpr', ('1*p015+1*p05', 2)),
# https://www.felixcloutier.com/x86/pmovmskb
('pmovmskb mm gpr', ('1*p0', 2)),
('pmovmskb xmm gpr', ('1*p0', 2)),
('vpmovmskb xmm gpr', ('1*p0', 2)),
# https://www.felixcloutier.com/x86/pmovsx
('pmovsxbw xmm xmm', ('1*p15', 1)),
('pmovsxbw mem xmm', ('1*p15', 1)),
@@ -484,7 +458,6 @@ snb_mov_instructions = [
('vpmovsxbd mem ymm', ('1*p15', 1)),
('vpmovsxbq ymm ymm', ('1*p15', 1)),
('vpmovsxbq mem ymm', ('1*p15', 1)),
# https://www.felixcloutier.com/x86/pmovzx
('pmovzxbw xmm xmm', ('1*p15', 1)),
('pmovzxbw mem xmm', ('1*p15', 1)),
@@ -494,76 +467,72 @@ snb_mov_instructions = [
('vpmovzxbw mem ymm', ('1*p15', 1)),
]
ivb_mov_instructions = list(OrderedDict(snb_mov_instructions + [
ivb_mov_instructions = list(
OrderedDict(
snb_mov_instructions
+ [
# https://www.felixcloutier.com/x86/mov
('mov gpr gpr', ('', 0)),
('mov imd gpr', ('', 0)),
# https://www.felixcloutier.com/x86/movapd
('movapd xmm xmm', ('', 0)),
('vmovapd xmm xmm', ('', 0)),
('vmovapd ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movaps
('movaps xmm xmm', ('', 0)),
('vmovaps xmm xmm', ('', 0)),
('vmovaps ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
('movdqa xmm xmm', ('', 0)),
('vmovdqa xmm xmm', ('', 0)),
('vmovdqa ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
('movdqu xmm xmm', ('', 0)),
('vmovdqu xmm xmm', ('', 0)),
('vmovdqu ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movupd
('movupd xmm xmm', ('', 0)),
('vmovupd xmm xmm', ('', 0)),
('vmovupd ymm ymm', ('', 0)),
# https://www.felixcloutier.com/x86/movupd
('movups xmm xmm', ('', 0)),
('vmovups xmm xmm', ('', 0)),
('vmovups ymm ymm', ('', 0)),
]).items())
]
).items()
)
hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [
hsw_mov_instructions = list(
OrderedDict(
ivb_mov_instructions
+ [
# https://www.felixcloutier.com/x86/mov
('mov imd gpr', ('1*p0156', 1)),
('mov gpr gpr', ('1*p0156', 1)),
('movabs imd gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 6)),
('movbe mem gpr', ('1*p15', 6)),
# https://www.felixcloutier.com/x86/movmskpd
('movmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd xmm gpr', ('1*p0', 3)),
('vmovmskpd ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movmskps
('movmskps xmm gpr', ('1*p0', 3)),
('vmovmskps xmm gpr', ('1*p0', 3)),
('vmovmskps ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/movsx:movsxd
('movsx gpr gpr', ('1*p0156', 1)),
('movsb gpr gpr', ('1*p0156', 1)), # AT&T version
('movsw gpr gpr', ('1*p0156', 1)), # AT&T version
('movsl gpr gpr', ('1*p0156', 1)), # AT&T version
('movsq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/movzx
('movzx gpr gpr', ('1*p0156', 1)),
('movzb gpr gpr', ('1*p0156', 1)), # AT&T version
('movzw gpr gpr', ('1*p0156', 1)), # AT&T version
('movzl gpr gpr', ('1*p0156', 1)), # AT&T version
('movzq gpr gpr', ('1*p0156', 1)), # AT&T version
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('1*p0156+2*p06', 2)),
('cmova mem gpr', ('1*p0156+2*p06', 2)),
@@ -625,13 +594,11 @@ hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [
('cmovs mem gpr', ('1*p0156+1*p06', 2)),
('cmovz gpr gpr', ('1*p0156+1*p06', 2)),
('cmovz mem gpr', ('1*p0156+1*p06', 2)),
# https://www.felixcloutier.com/x86/pmovmskb
('pmovmskb mm gpr', ('1*p0', 3)),
('pmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb xmm gpr', ('1*p0', 3)),
('vpmovmskb ymm gpr', ('1*p0', 3)),
# https://www.felixcloutier.com/x86/pmovsx
('pmovsxbw xmm xmm', ('1*p5', 1)),
('pmovsxbw mem xmm', ('1*p5', 1)),
@@ -651,7 +618,6 @@ hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [
('vpmovsxbd mem ymm', ('1*p5', 1)),
('vpmovsxbq ymm ymm', ('1*p5', 1)),
('vpmovsxbq mem ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/pmovzx
('pmovzxbw xmm xmm', ('1*p5', 1)),
('pmovzxbw mem xmm', ('1*p5', 1)),
@@ -659,9 +625,14 @@ hsw_mov_instructions = list(OrderedDict(ivb_mov_instructions + [
('vpmovzxbw mem xmm', ('1*p5', 1)),
('vpmovzxbw ymm ymm', ('1*p5', 1)),
('vpmovzxbw mem ymm', ('1*p5', 1)),
]).items())
]
).items()
)
bdw_mov_instructions = list(OrderedDict(hsw_mov_instructions + [
bdw_mov_instructions = list(
OrderedDict(
hsw_mov_instructions
+ [
# https://www.felixcloutier.com/x86/cmovcc
('cmova gpr gpr', ('2*p06', 1)),
('cmova mem gpr', ('2*p06', 1)),
@@ -723,78 +694,67 @@ bdw_mov_instructions = list(OrderedDict(hsw_mov_instructions + [
('cmovs mem gpr', ('1*p06', 1)),
('cmovz gpr gpr', ('1*p06', 1)),
('cmovz mem gpr', ('1*p06', 1)),
]).items())
]
).items()
)
skx_mov_instructions = list(OrderedDict(bdw_mov_instructions + [
skx_mov_instructions = list(
OrderedDict(
bdw_mov_instructions
+ [
# https://www.felixcloutier.com/x86/movapd
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movapd xmm xmm', ('1*p5', 1)),
# ('vmovapd xmm xmm', ('1*p5', 1)),
# ('vmovapd ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movaps
# TODO with masking!
# TODO the following may eliminate or be bound to 1*p0156:
# ('movaps xmm xmm', ('1*p5', 1)),
# ('vmovaps xmm xmm', ('1*p5', 1)),
# ('vmovaps ymm ymm', ('1*p5', 1)),
# https://www.felixcloutier.com/x86/movbe
('movbe gpr mem', ('1*p15', 4)),
('movbe mem gpr', ('1*p15', 4)),
# https://www.felixcloutier.com/x86/movddup
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
# TODO with masking!
# https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
# TODO with masking!
# https://www.felixcloutier.com/x86/movntdq
('vmovntdq zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntdqa
('vmovntdqa mem zmm', ('', 0)),
# https://www.felixcloutier.com/x86/movntpd
('vmovntpd zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movntps
('vmovntps zmm mem', ('', 0)), # TODO NT-store: what latency to use?
# https://www.felixcloutier.com/x86/movq2dq
('movq2dq mm xmm', ('1*p0+1*p015', 1)),
# https://www.felixcloutier.com/x86/movsd
# TODO with masking!
# https://www.felixcloutier.com/x86/movshdup
# TODO with masking!
# https://www.felixcloutier.com/x86/movsldup
# TODO with masking!
# https://www.felixcloutier.com/x86/movss
# TODO with masking!
# https://www.felixcloutier.com/x86/movupd
# TODO with masking!
# https://www.felixcloutier.com/x86/movups
# TODO with masking!
# https://www.felixcloutier.com/x86/pmovsx
# TODO with masking!
('vpmovsxbw ymm zmm', ('1*p5', 3)),
('vpmovsxbw mem zmm', ('1*p5', 1)),
]).items())
]
).items()
)
csx_mov_instructions = OrderedDict(skx_mov_instructions + [
csx_mov_instructions = OrderedDict(skx_mov_instructions + []).items()
]).items()
def get_description(arch, rhs_comment=None):
descriptions = {
@@ -803,7 +763,7 @@ def get_description(arch, rhs_comment=None):
'hsw': '\n'.join([p7.process_item(*item) for item in hsw_mov_instructions]),
'bdw': '\n'.join([p7.process_item(*item) for item in bdw_mov_instructions]),
'skx': '\n'.join([p7.process_item(*item) for item in skx_mov_instructions]),
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions])
'csx': '\n'.join([p7.process_item(*item) for item in csx_mov_instructions]),
}
description = descriptions[arch]
@@ -818,8 +778,10 @@ def get_description(arch, rhs_comment=None):
return description
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
print("Usage: {} (snb|ivb|hsw|bdw|skx|csx)".format(sys.argv[0]))
sys.exit(0)
@@ -829,4 +791,3 @@ if __name__ == '__main__':
except KeyError:
print("Unknown architecture.")
sys.exit(1)

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.2
osaca_version: 0.3.4
micro_architecture: Intel Haswell
arch_code: HSW
isa: x86

View File

@@ -1,52 +1,35 @@
osaca_version: 0.3.0
osaca_version: 0.3.4
isa: "AArch64"
# Contains all operand-irregular instruction forms OSACA supports for AArch64.
# Operand-regular for a AArch64 instruction form with N operands in the shape of
# mnemonic op1 ... opN
# means that op1 is the only destination operand and op2 to op(N) are source operands.
instruction_forms:
- name: "fmla"
- name: fmla
operands:
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: true
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: false
- class: "register"
prefix: "v"
shape: "s"
prefix: "*"
shape: "*"
source: true
destination: false
- name: "fmla"
- name: ldp
operands:
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: true
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: false
- class: "register"
prefix: "v"
shape: "d"
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "d"
prefix: "*"
source: false
destination: true
- class: "memory"
@@ -58,52 +41,14 @@ instruction_forms:
post-indexed: false
source: true
destination: false
- name: "ldp"
- name: ldp
operands:
- class: "register"
prefix: "q"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
prefix: "*"
source: false
destination: true
- class: "memory"
@@ -115,14 +60,63 @@ instruction_forms:
post-indexed: true
source: true
destination: true
- name: "stp"
- name: ldp
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: false
destination: true
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: [ldr, ldur]
operands:
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: true
source: true
destination: true
- name: [ldr, ldur]
operands:
- class: "register"
prefix: "*"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: stp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "d"
prefix: "*"
source: true
destination: false
- class: "memory"
@@ -134,14 +128,33 @@ instruction_forms:
post-indexed: false
source: false
destination: true
- name: "stp"
- name: stp
operands:
- class: "register"
prefix: "q"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "q"
prefix: "*"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: false
destination: true
- name: stp
operands:
- class: "register"
prefix: "*"
source: true
destination: false
- class: "register"
prefix: "*"
source: true
destination: false
- class: "memory"
@@ -150,73 +163,13 @@ instruction_forms:
index: "*"
scale: "*"
pre-indexed: false
post-indexed: false
post-indexed: true
source: false
destination: true
- name: "str"
- name: [str, stur]
operands:
- class: "register"
prefix: "x"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "str"
operands:
- class: "register"
prefix: "d"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "str"
operands:
- class: "register"
prefix: "q"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "stur"
operands:
- class: "register"
prefix: "q"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true
- name: "stur"
operands:
- class: "register"
prefix: "d"
prefix: "*"
source: true
destination: false
- class: "memory"

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.0
osaca_version: 0.3.4
isa: "x86"
# Contains all operand-irregular instruction forms OSACA supports for x86.
# Operand-regular for a x86 AT&T instruction form with N operands in the shape of
@@ -25,16 +25,6 @@ instruction_forms:
name: "gpr"
source: true
destination: true
- name: adc
operands:
- class: "register"
name: "gpr"
source: true
destination: false
- class: "register"
name: "gpr"
source: true
destination: true
- name: add
operands:
- class: "immediate"
@@ -191,7 +181,7 @@ instruction_forms:
name: "xmm"
source: true
destination: false
- name: addss
- name: [addss, addps, addpd]
operands:
- class: "register"
name: "xmm"
@@ -201,6 +191,26 @@ instruction_forms:
name: "xmm"
source: true
destination: false
- name: [addsubps, addsubpd]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [aesdec, aesdeclast, aesenc, aesenclast]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: and
operands:
- class: "immediate"
@@ -261,6 +271,21 @@ instruction_forms:
name: "gpr"
source: false
destination: true
- name: [blendvps, blendvpd]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
hidden_operands:
- class: "register"
name: "xmm0"
source: true
destination: false
- name: blsr
operands:
- class: "register"
@@ -2494,6 +2519,16 @@ instruction_forms:
source: true
destination: false
# TODO sets MXCSR
- name: [maxpd, maxps, maxsd, maxss, minpd, minps, minsd, minss]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: mulsd
operands:
- class: "register"
@@ -2514,7 +2549,607 @@ instruction_forms:
name: "xmm"
source: true
destination: true
- name: mulpd
- name: [mulpd, mulps]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [packssdw, packsswb, packusdw, packuswb]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [packssdw, packsswb, packusdw, packuswb]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [paddb, paddw, paddd, paddq, paddsb, paddsw, paddusb, paddusw]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [paddb, paddw, paddd, paddq, paddsb, paddsw, paddusb, paddusw]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [pand, por, pxor, pandn]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pand, por, pxor, pandn]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [pavgb, pavgw]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pavgb, pavgw]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [pcmpeqb, pcmpeqw, pcmpeqd, pcmpeqq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pcmpeqb, pcmpeqw, pcmpeqd, pcmpeqq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [pcmpgtb, pcmpgtw, pcmpgtd, pcmpgtq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pcmpgtb, pcmpgtw, pcmpgtd, pcmpgtq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: pmaddubsw
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: pmaddubsw
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: pmaddwd
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: pmaddwd
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [pmaxsb, pmaxsw, pmaxsd, pmaxsq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pmaxsb, pmaxsw, pmaxsd, pmaxsq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [pmaxub, pmaxuw, pmaxud, pmaxuq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pmaxub, pmaxuw]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [pminsb, pminsw, pminsd, pminsq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pminsb, pminsw, pminsd, pminsq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [pminub, pminuw, pminud, pminuq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pminub, pminuw]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: pmuldq
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pmulhrsw, pmulhuw, pmulhw, pmullw]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [pmulhrsw, pmulhuw, pmulhw, pmullw]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: pmuludq
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: pmuludq
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: psadbw
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: psadbw
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: pshufb
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: pshufb
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [psignb, psignw, psignd]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psignb, psignw, psignd]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [psllw, pslld, psllq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psllw, pslld, psllq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [psllw, pslld, psllq]
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psllw, pslld, psllq]
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: pslldq
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psraw, psrad, psraq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psraw, psrad, psraq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [psraw, psrad, psraq]
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psraw, psrad, psraq]
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [psrlw, psrld, psrlq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psrlw, psrld, psrlq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [psrlw, psrld, psrlq]
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psrlw, psrld, psrlq]
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: psrldq
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psubb, psubw, psubd, psubq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psubb, psubw, psubd, psubq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [psubsb, psubsw]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psubsb, psubsw]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [psubusb, psubusw]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [psubusb, psubusw]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [punpckhbw, punpckhwd, punpckhdq, punpckhqdq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [punpckhbw, punpckhwd, punpckhdq, punpckhqdq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [punpcklbw, punpcklwd, punpckldq, punpcklqdq]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [punpcklbw, punpcklwd, punpckldq, punpcklqdq]
operands:
- class: "register"
name: "mm"
source: true
destination: false
- class: "register"
name: "mm"
source: true
destination: true
- name: [sha1msg2, sha1nexte]
operands:
- class: "register"
name: "xmm"
@@ -2763,6 +3398,26 @@ instruction_forms:
name: "CF"
source: false
destination: true
- name: [subsd, subpd]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [subss, subps]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: test
operands:
- class: "register"
@@ -2774,6 +3429,10 @@ instruction_forms:
source: true
destination: false
hidden_operands:
- class: "flag"
name: "AF"
source: false
destination: true
- class: "flag"
name: "SF"
source: false
@@ -2800,6 +3459,10 @@ instruction_forms:
source: true
destination: false
hidden_operands:
- class: "flag"
name: "AF"
source: false
destination: true
- class: "flag"
name: "SF"
source: false
@@ -2826,6 +3489,10 @@ instruction_forms:
source: true
destination: false
hidden_operands:
- class: "flag"
name: "AF"
source: false
destination: true
- class: "flag"
name: "SF"
source: false
@@ -2849,6 +3516,10 @@ instruction_forms:
source: true
destination: false
hidden_operands:
- class: "flag"
name: "AF"
source: false
destination: true
- class: "flag"
name: "SF"
source: false
@@ -2861,6 +3532,16 @@ instruction_forms:
name: "PF"
source: false
destination: true
- name: [unpcklps, unpcklpd]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: vldmxcsr
operands:
- class: "memory"
@@ -2955,6 +3636,90 @@ instruction_forms:
name: "*"
source: true
destination: true
- name: [vfmsub132pd, vfmsub213pd, vfmsub231pd]
operands:
- class: "register"
name: "*"
source: true
destination: false
- class: "register"
name: "*"
source: true
destination: false
- class: "register"
name: "*"
source: true
destination: true
- name: [vfmsub132ps, vfmsub213ps, vfmsub231ps]
operands:
- class: "register"
name: "*"
source: true
destination: false
- class: "register"
name: "*"
source: true
destination: false
- class: "register"
name: "*"
source: true
destination: true
- name: [vfmsub132sd, vfmsub213sd, vfmsub231sd]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [vfmsub132ss, vfmsub213ss, vfmsub231ss]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [vfmsubadd132pd, vfmsubadd213pd, vfmsubadd231pd]
operands:
- class: "register"
name: "*"
source: true
destination: false
- class: "register"
name: "*"
source: true
destination: false
- class: "register"
name: "*"
source: true
destination: true
- name: [vfmsubadd132ps, vfmsubadd213ps, vfmsubadd231ps]
operands:
- class: "register"
name: "*"
source: true
destination: false
- class: "register"
name: "*"
source: true
destination: false
- class: "register"
name: "*"
source: true
destination: true
- name: vextractf128
operands:
- class: "immediate"
@@ -2986,6 +3751,60 @@ instruction_forms:
scale: "*"
source: true
destination: true
- name: [vptest, ptest]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: false
hidden_operands:
- class: "flag"
name: "AF"
source: false
destination: true
- class: "flag"
name: "SF"
source: false
destination: true
- class: "flag"
name: "ZF"
source: false
destination: true
- class: "flag"
name: "PF"
source: false
destination: true
- name: vptest
operands:
- class: "register"
name: "ymm"
source: true
destination: false
- class: "register"
name: "ymm"
source: true
destination: false
hidden_operands:
- class: "flag"
name: "AF"
source: false
destination: true
- class: "flag"
name: "SF"
source: false
destination: true
- class: "flag"
name: "ZF"
source: false
destination: true
- class: "flag"
name: "PF"
source: false
destination: true
- name: vzeroall
operands: []
hidden_operands:
@@ -3122,3 +3941,34 @@ instruction_forms:
name: "ymm15"
source: false
destination: true
- name: [unpckhps, unpckhpd]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [andps, andpd, andnps, andnpd, orps, orpd, xorps, xorpd]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: true
- name: [shl, shr, shlq, shrq]
operands:
- class: "immediate"
imd: "int"
source: true
destination: false
- class: "register"
name: "gpr"
source: true
destination: true

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
import os.path
import argparse
import os.path
import sys
import xml.etree.ElementTree as ET
from distutils.version import StrictVersion
@@ -8,8 +8,23 @@ from distutils.version import StrictVersion
from osaca.parser import get_parser
from osaca.semantics import MachineModel
intel_archs = ['CON', 'WOL', 'NHM', 'WSM', 'SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'SKX', 'KBL', 'CFL',
'CNL', 'ICL']
intel_archs = [
'CON',
'WOL',
'NHM',
'WSM',
'SNB',
'IVB',
'HSW',
'BDW',
'SKL',
'SKX',
'KBL',
'CFL',
'CNL',
'ICL',
]
amd_archs = ['ZEN1', 'ZEN+', 'ZEN2']
def port_pressure_from_tag_attributes(attrib):
@@ -19,6 +34,7 @@ def port_pressure_from_tag_attributes(attrib):
for p in attrib['ports'].split('+'):
cycles, ports = p.split('*')
ports = ports.lstrip('p')
ports = ports.lstrip('FP')
port_occupation.append([int(cycles), ports])
# Also consider div on DIV pipeline
@@ -88,10 +104,10 @@ def extract_paramters(instruction_tag, parser, isa):
return parameters
def extract_model(tree, arch):
def extract_model(tree, arch, skip_mem=True):
try:
isa = MachineModel.get_isa_for_arch(arch)
except:
except Exception:
print("Skipping...", file=sys.stderr)
return None
mm = MachineModel(isa=isa)
@@ -101,6 +117,7 @@ def extract_model(tree, arch):
ignore = False
mnemonic = instruction_tag.attrib['asm']
iform = instruction_tag.attrib['iform']
# skip any mnemonic which contain spaces (e.g., "REX CRC32")
if ' ' in mnemonic:
continue
@@ -118,6 +135,26 @@ def extract_model(tree, arch):
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
if arch_tag is None:
continue
# skip any instructions without port utilization
if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]):
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
continue
# skip if computed and measured TP don't match
if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][
0
]:
print(
"Calculated TP from port utilization doesn't match TP, skip: ",
iform,
file=sys.stderr,
)
continue
# skip if instruction contains memory operand
if skip_mem and any(
[x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')]
):
print("Contains memory operand, skip: ", iform, file=sys.stderr)
continue
# We collect all measurement and IACA information and compare them later
for measurement_tag in arch_tag.iter('measurement'):
if 'TP_ports' in measurement_tag.attrib:
@@ -143,10 +180,14 @@ def extract_model(tree, arch):
if 'max_cycles' in l_tag.attrib
]
if latencies[1:] != latencies[:-1]:
print("Contradicting latencies found, using first:", mnemonic, latencies,
file=sys.stderr)
print(
"Contradicting latencies found, using smallest:",
iform,
latencies,
file=sys.stderr,
)
if latencies:
latency = latencies[0]
latency = min(latencies)
if ignore:
continue
@@ -160,9 +201,7 @@ def extract_model(tree, arch):
# Check if all are equal
if port_pressure:
if port_pressure[1:] != port_pressure[:-1]:
print(
"Contradicting port occupancies, using latest IACA:",
mnemonic, file=sys.stderr)
print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
port_pressure = port_pressure[-1]
else:
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
@@ -218,19 +257,31 @@ def main():
help='architecture to extract, use IACA abbreviations (e.g., SNB). '
'if not given, all will be extracted and saved to file in CWD.',
)
parser.add_argument(
'--mem',
dest='skip_mem',
action='store_false',
help='add instruction forms including memory addressing operands, which are '
'skipped by default'
)
args = parser.parse_args()
basename = os.path.basename(__file__)
tree = ET.parse(args.xml)
print('Available architectures:', ', '.join(architectures(tree)))
print('# Available architectures:', ', '.join(architectures(tree)))
if args.arch:
model = extract_model(tree, args.arch)
print('# Chosen architecture: {}'.format(args.arch))
model = extract_model(tree, args.arch, args.skip_mem)
if model is not None:
print(rhs_comment(model.dump(), basename+" "+sys.argv[0]))
print(
rhs_comment(
model.dump(), basename + " " + args.xml.split('/')[-1] + " " + args.arch
)
)
else:
for arch in architectures(tree):
print(arch, end='')
model = extract_model(tree, arch.lower())
model = extract_model(tree, arch.lower(), args.skip_mem)
if model:
model_string = rhs_comment(model.dump(), basename + " " + arch)

771
osaca/data/n1.yml Normal file
View File

@@ -0,0 +1,771 @@
osaca_version: 0.3.4
micro_architecture: Arm Neoverse N1
arch_code: n1
isa: AArch64
ROB_size: 128 # wikichip
retired_uOps_per_cycle: 8 # wikichip
scheduler_size: 120 # wikichip
hidden_loads: false
load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 5.0, q: 6.0, v: 5.0, z: 4.0}
load_throughput:
- {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [[1, '67']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [[1, '67'], [1, '123']]}
- {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [[1, '67'], [1, '123']]}
load_throughput_default: [[1, '67']]
store_throughput: []
store_throughput_default: [[1, '56'], [1, '67']]
ports: ['0', '1', '2', '3', '4', '4DV', '5', '6', '7']
port_model_scheme: |
+----------------------------------------------------------------------------+
| 120 entries |
+----------------------------------------------------------------------------+
0 |BR 1 |IS0 2 |IS1 3 |IM0 4 |FP0 5 |FP1 6 |LDST 7 |LDST
\/ \/ \/ \/ \/ \/ \/ \/
+------+ +-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+
|Branch| | INT | | INT | | INT | | FP ALU | | FP ALU | | AGU | | AGU |
+------+ | ALU | | ALU | | ALU | +--------+ +--------+ +-------+ +-------+
+-----+ +-----+ +-----+ +--------+ +--------+ +-------+ +-------+
+-----+ +-----+ | FP MUL | | FP MUL | |LD DATA| |LD DATA|
| ST | | INT | +--------+ +--------+ +-------+ +-------+
| INT | | MUL | +--------+ +---------+
+-----+ +-----+ | FP DIV | |SIMD SHFT|
+-----+ +--------+ +---------+
| INT | +--------+ +--------+
| DIV | | FMA | | FMA |
+-----+ +--------+ +--------+
+-----+ +--------+ +--------+
|SHIFT| | ST SIMD| | ST SIMD|
+-----+ | DATA | | DATA |
+-----+ +--------+ +--------+
| ST |
| INT |
+-----+
instruction_forms:
- name: add
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: add
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: adds
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '132']]
- name: b.ne
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: b.gt
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: bne
operands:
- class: identifier
throughput: 1.0
latency: 0.0
port_pressure: [[1, '0']]
- name: cmp
operands:
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: cmp
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.3333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]
- name: dup
operands:
- class: register
prefix: d
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: d
width: '*'
- class: register
prefix: d
width: '*'
- class: register
prefix: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fadd
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fdiv
operands:
- class: register
prefix: v
shape: s
width: 128
- class: register
prefix: v
shape: s
width: 128
- class: register
prefix: v
shape: s
width: 128
throughput: 6.0
latency: 8.0 # 1*p4+6*p4DV
port_pressure: [[1, '4'], [6, [4DV]]]
- name: fdiv
operands:
- class: register
prefix: v
shape: d
width: 128
- class: register
prefix: v
shape: d
width: 128
- class: register
prefix: v
shape: d
width: 128
throughput: 10.0
latency: 12.0 # 1*p4+10*p4DV
port_pressure: [[4, '0'], [10, [4DV]]]
- name: fmla
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmla
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmov
operands:
- {class: register, prefix: s}
- {class: immediate, imd: double}
latency: ~ # 1*p45
port_pressure: [[1, '45']]
throughput: 0.5
- name: fmul
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmul
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: fmul
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: register
prefix: d
throughput: 0.5
latency: 3.0 # 1*p45
port_pressure: [[1, '45']]
- name: frecpe
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 2.0
latency: 4.0 # 1*p4
port_pressure: [[2, '4']]
- name: frecpe
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 1.0
latency: 3.0 # 1*p4
port_pressure: [[1, '4']]
- name: fsub
operands:
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
- class: register
prefix: v
shape: s
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: fsub
operands:
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
- class: register
prefix: v
shape: d
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 5.0 # 2*p67, from n1 opt guide
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 5.0 # 2*p67+1*p123, from n1 opt guide
port_pressure: [[2, '67'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67, from n1 opt guide
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: ~
index: ~
scale: 1
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 7.0 # 2*p67+1*p123, from n1 opt guide
port_pressure: [[2, '56'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67
port_pressure: [[2, '67']]
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: true
post-indexed: false
throughput: 1.0
latency: 7.0 # 2*p67+1*p123
port_pressure: [[2, '67'], [1, '123']]
- name: ldp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 5.0 # 2*p67+1*p123
port_pressure: [[2, '67'], [1, '123']]
- name: ldur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 6.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 6.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
latency: 5.0 # 1*p67
port_pressure: [[1, '67']]
- name: ldr
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
prefix: q
- class: register
prefix: q
throughput: 0.0
latency: 0.0
port_pressure: []
- name: ldr
operands:
- class: register
prefix: d
- class: register
prefix: d
throughput: 0.0
latency: 0.0
port_pressure: []
- name: mov
operands:
- class: register
prefix: x
- class: register
prefix: x
throughput: 0.25
latency: 1.0 # 1*p3456
port_pressure: [[1, '3456']]
- name: mov
operands:
- class: register
prefix: v
shape: b
width: '*'
- class: register
prefix: v
shape: b
width: '*'
throughput: 0.5
latency: 2.0 # 1*p45
port_pressure: [[1, '45']]
- name: stp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p45+1*p67
port_pressure: [[2, '45'], [1, '67']]
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 2*p45+2*p67+1*123
port_pressure: [[2, '45'], [2, '67'], [1, '123']]
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p45+2*p67
port_pressure: [[2, '45'], [2, '67']]
- name: stur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p23
port_pressure: [[1, '56'], [1, '23']]
- name: stur # JL: assumed from n1 opt guide
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p67+1*p45
port_pressure: [[2, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p23
port_pressure: [[1, '56'], [1, '23']]
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 0.5
latency: 0 # 1*p67+1*p45
port_pressure: [[1, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 0.5
latency: 0 # 1*p67+1*p45+1*p123
port_pressure: [[1, '67'], [1, '45'], [1, '123']]
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 0 # 2*p67+1*p45
port_pressure: [[1, '67'], [1, '45']]
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 1*p67+1*p45+1*123
port_pressure: [[1, '67'], [1, '45'], [1, '123']]
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 0 # 1*p67+1*p23+1*p123
port_pressure: [[1, '67'], [1, '23'], [1, '123']]
- name: sub
operands:
- class: register
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p123
port_pressure: [[1, '123']]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.2
osaca_version: 0.3.4
micro_architecture: Thunder X2
arch_code: tx2
isa: AArch64
@@ -267,6 +267,34 @@ instruction_forms:
throughput: 0.5
latency: 6.0 # 1*p01
port_pressure: [[1, '01']]
- name: frecpe
operands:
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: frecpe
operands:
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
throughput: 0.5
latency: 5.0 # 1*p01
port_pressure: [[1, '01']]
- name: fsub
operands:
- class: register

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.2
osaca_version: 0.3.4
micro_architecture: AMD Zen (family 17h)
arch_code: ZEN1
isa: x86

View File

@@ -1,4 +1,4 @@
osaca_version: 0.3.2
osaca_version: 0.3.4
micro_architecture: AMD Zen2
arch_code: ZEN2
isa: x86
@@ -725,6 +725,39 @@ instruction_forms:
throughput: 1.0
latency: 0 # 1*p89+1*p10D
port_pressure: [[1, '89'], [1, [10D]]]
- name: vmovdqu
operands:
- class: memory
base: gpr
offset: "*"
index: ~
scale: 1
- class: register
name: "*"
throughput: 0.5
latency: 4.0 # 1*p8910+1*p8D9D
port_pressure: [[1, ['8','9','10']], [1, [8D,9D]]]
- name: vmovdqu
operands:
- class: memory
base: gpr
offset: "*"
index: gpr
scale: "*"
- class: register
name: "*"
throughput: 0.5
latency: 4.0 # 1*p8910+1*p8D9D
port_pressure: [[1, ['8','9']], [1, [8D,9D]]]
- name: vmovdqu
operands:
- class: register
name: "*"
- class: register
name: "*"
throughput: 0.0
latency: 0.0
port_pressure: []
- name: add
operands:
- class: immediate
@@ -1081,6 +1114,16 @@ instruction_forms:
latency: 3.0 # 1*p01
port_pressure: [[1, '01']]
uops: 1
- name: [shl, shr]
operands:
- class: immediate
imd: int
- class: register
name: gpr
throughput: 0.25
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
uops: 1
- name: UNPCKHPS # model_importer.py ./model_importer.py
operands: # model_importer.py ./model_importer.py
- class: register # model_importer.py ./model_importer.py

View File

@@ -274,10 +274,19 @@ def _create_db_operand_x86(operand):
def _scrape_from_felixcloutier(mnemonic):
"""Scrape src/dst information from felixcloutier website and return infromation for user."""
from bs4 import BeautifulSoup
"""Scrape src/dst information from felixcloutier website and return information for user."""
import requests
try:
from bs4 import BeautifulSoup
except ImportError:
print(
'Module BeautifulSoup not installed. Fetching instruction form information '
'online requires BeautifulSoup.\nUse \'pip install bs4\' for installation.',
file=sys.stderr,
)
sys.exit(1)
index = 'https://www.felixcloutier.com/x86/index.html'
base_url = 'https://www.felixcloutier.com/x86/'
url = base_url + mnemonic.lower()
@@ -287,11 +296,14 @@ def _scrape_from_felixcloutier(mnemonic):
# GET website
r = requests.get(url=url)
# Parse result
soup = BeautifulSoup(r.text, 'html.parser')
if r.status_code == 200:
# Found result
table = soup.find('h2', attrs={'id': 'instruction-operand-encoding'}).findNextSibling()
operand_enc = BeautifulSoup(r.text, 'html.parser').find(
'h2', attrs={'id': 'instruction-operand-encoding'}
)
if operand_enc:
# operand encoding found, otherwise, no need to mark as suspicous
table = operand_enc.findNextSibling()
operands = _get_src_dst_from_table(table)
elif r.status_code == 404:
# Check for alternative href
@@ -300,10 +312,13 @@ def _scrape_from_felixcloutier(mnemonic):
if len(alternatives) > 0:
# alternative(s) found, take first one
url = base_url + alternatives[0].attrs['href'][2:]
operand_enc = BeautifulSoup(requests.get(url=url).text, 'html.parser').find(
'h2', attrs={'id': 'instruction-operand-encoding'}
)
if operand_enc:
# operand encoding found, otherwise, no need to mark as suspicous
table = (
BeautifulSoup(requests.get(url=url).text, 'html.parser')
.find('h2', attrs={'id': 'instruction-operand-encoding'})
.findNextSibling()
operand_enc.findNextSibling()
)
operands = _get_src_dst_from_table(table)
if operands:
@@ -313,9 +328,8 @@ def _scrape_from_felixcloutier(mnemonic):
return (suspicious, ' '.join(operands))
def _get_src_dst_from_table(table):
def _get_src_dst_from_table(table, num_operands=2):
"""Prettify bs4 table object to string for user"""
NUM_OPERANDS = 2
# Parse table
header = [''.join(x.string.lower().split()) for x in table.find('tr').findAll('td')]
data = table.findAll('tr')[1:]
@@ -327,10 +341,10 @@ def _get_src_dst_from_table(table):
data_dict[i][header[j]] = col.string
# Get only the instruction forms with 2 operands
num_ops = [_get_number_of_operands(row) for _, row in data_dict.items()]
if NUM_OPERANDS in num_ops:
row = data_dict[num_ops.index(NUM_OPERANDS)]
if num_operands in num_ops:
row = data_dict[num_ops.index(num_operands)]
reads_writes = []
for i in range(1, NUM_OPERANDS + 1):
for i in range(1, num_operands + 1):
m = re.search(r'(\([^\(\)]+\))', row['operand{}'.format(i)])
if not m:
# no parentheses (probably immediate operand), assume READ
@@ -369,6 +383,7 @@ def _check_sanity_arch_db(arch_mm, isa_mm, internet_check=True):
missing_port_pressure = []
suspicious_instructions = []
duplicate_instr_arch = []
duplicate_strings = []
for instr_form in arch_mm['instruction_forms']:
# check value in DB entry
@@ -388,6 +403,7 @@ def _check_sanity_arch_db(arch_mm, isa_mm, internet_check=True):
# instr forms with less than 3 operands might need an ISA DB entry due to src_reg operands
if (
len(instr_form['operands']) < 3
and len(instr_form['operands']) > 1
and 'mov' not in instr_form['name'].lower()
and not instr_form['name'].lower().startswith('j')
and instr_form not in suspicious_instructions
@@ -406,9 +422,10 @@ def _check_sanity_arch_db(arch_mm, isa_mm, internet_check=True):
duplicate_instr_arch.append(instr_form)
# every entry exists twice --> uniquify
tmp_list = []
for i in range(0, len(duplicate_instr_arch)):
for _ in range(0, len(duplicate_instr_arch)):
tmp = duplicate_instr_arch.pop()
if tmp not in duplicate_instr_arch:
if _get_full_instruction_name(tmp).lower() not in duplicate_strings:
duplicate_strings.append(_get_full_instruction_name(tmp).lower())
tmp_list.append(tmp)
duplicate_instr_arch = tmp_list
return (

View File

@@ -279,7 +279,7 @@ class Frontend(object):
'------------------\n'
' No final analysis is given. If you want to ignore this\n'
' warning and run the analysis anyway, start osaca with\n'
' --ignore_unknown flag.\n'
' --ignore-unknown flag.\n'
'--------------------------------------------------------------------------------'
'----------------{}\n'
).format(amount, '-' * len(str(amount)))
@@ -319,7 +319,12 @@ class Frontend(object):
continue
left_len = len(str(float(ports[i])).split('.')[0])
substr = '{:' + str(left_len) + '.' + str(max(port_len[i] - left_len - 1, 0)) + 'f}'
string_result += substr.format(ports[i]) + ' {} '.format(separator[i])
substr = substr.format(ports[i])
string_result += (
substr + ' {} '.format(separator[i])
if '.' in substr
else '{:.1f}{} '.format(ports[i], separator[i])
)
return string_result[:-1]
def _get_node_by_lineno(self, lineno, kernel):

View File

@@ -17,7 +17,7 @@ MODULE_DATA_DIR = os.path.join(
)
LOCAL_OSACA_DIR = os.path.join(os.path.expanduser('~') + '/.osaca/')
DATA_DIR = os.path.join(LOCAL_OSACA_DIR, 'data/')
SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'ZEN2', 'TX2']
SUPPORTED_ARCHS = ['SNB', 'IVB', 'HSW', 'BDW', 'SKX', 'CSX', 'ZEN1', 'ZEN2', 'TX2', 'N1']
# Stolen from pip
@@ -71,7 +71,7 @@ def create_parser(parser=None):
parser.add_argument(
'--arch',
type=str,
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, ZEN2, TX2).',
help='Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ZEN1, ZEN2, TX2, N1).',
)
parser.add_argument(
'--fixed',
@@ -188,9 +188,9 @@ def insert_byte_marker(args):
from kerncraft.incore_model import asm_instrumentation
except ImportError:
print(
"Module kerncraft not installed. Use 'pip install --user "
"kerncraft' for installation.\nFor more information see "
"https://github.com/RRZE-HPC/kerncraft",
'Module kerncraft not installed. Use \'pip install --user '
'kerncraft\' for installation.\nFor more information see '
'https://github.com/RRZE-HPC/kerncraft',
file=sys.stderr,
)
sys.exit(1)

View File

@@ -33,8 +33,14 @@ class ParserX86ATT(BaseParser):
+ pp.Optional(relocation).setResultsName('relocation')
).setResultsName('identifier')
# Label
numeric_identifier = pp.Group(
pp.Word(pp.nums).setResultsName('name')
+ pp.Optional(pp.oneOf('b f', caseless=True).setResultsName('suffix'))
).setResultsName('identifier')
self.label = pp.Group(
identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment)
(identifier | numeric_identifier).setResultsName('name')
+ pp.Literal(':')
+ pp.Optional(self.comment)
).setResultsName(self.LABEL_ID)
# Register: pp.Regex('^%[0-9a-zA-Z]+{}{z},?')
self.register = pp.Group(
@@ -43,7 +49,7 @@ class ParserX86ATT(BaseParser):
+ pp.Optional(pp.Literal('(') + pp.Word(pp.nums) + pp.Literal(')'))
+ pp.Optional(
pp.Literal('{')
+ pp.Literal('%')
+ pp.Optional(pp.Suppress(pp.Literal('%')))
+ pp.Word(pp.alphanums).setResultsName('mask')
+ pp.Literal('}')
+ pp.Optional(
@@ -98,7 +104,7 @@ class ParserX86ATT(BaseParser):
+ pp.Literal(')')
+ pp.Optional(
pp.Literal('{')
+ pp.Literal('%')
+ pp.Optional(pp.Suppress(pp.Literal('%')))
+ pp.Word(pp.alphanums).setResultsName('mask')
+ pp.Literal('}')
)
@@ -108,23 +114,20 @@ class ParserX86ATT(BaseParser):
).setResultsName(self.MEMORY_ID)
# Directive
directive_option = pp.Combine(
pp.Word('#@.', exact=1) + pp.Word(pp.printables, excludeChars=',')
)
# parameter can be any quoted string or sequence of characters besides '#' (for comments)
# or ',' (parameter delimiter)
directive_parameter = (
pp.quotedString
^ directive_option
^ identifier
^ hex_number
^ decimal_number
^ self.register
^ pp.Group(pp.Word(pp.alphanums + '_').setResultsName('name'))
^ (
pp.Word(pp.printables, excludeChars=',#')
+ pp.Optional(pp.Suppress(pp.Literal(',')))
)
^ pp.Suppress(pp.Literal(','))
)
commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=',')
self.directive = pp.Group(
pp.Literal('.')
+ pp.Word(pp.alphanums + '_').setResultsName('name')
+ commaSeparatedList.setResultsName('parameters')
+ pp.ZeroOrMore(directive_parameter).setResultsName('parameters')
+ pp.Optional(self.comment)
).setResultsName(self.DIRECTIVE_ID)
@@ -134,7 +137,9 @@ class ParserX86ATT(BaseParser):
pp.alphanums
).setResultsName('mnemonic')
# Combine to instruction form
operand_first = pp.Group(self.register ^ immediate ^ memory ^ identifier)
operand_first = pp.Group(
self.register ^ immediate ^ memory ^ identifier ^ numeric_identifier
)
operand_rest = pp.Group(self.register ^ immediate ^ memory)
self.instruction_parser = (
mnemonic
@@ -277,8 +282,18 @@ class ParserX86ATT(BaseParser):
return self.process_immediate(operand[self.IMMEDIATE_ID])
if self.LABEL_ID in operand:
return self.process_label(operand[self.LABEL_ID])
if self.DIRECTIVE_ID in operand:
return self.process_directive(operand[self.DIRECTIVE_ID])
return operand
def process_directive(self, directive):
directive_new = {'name': directive['name'], 'parameters': []}
if 'parameters' in directive:
directive_new['parameters'] = directive['parameters']
if 'comment' in directive:
directive_new['comment'] = directive['comment']
return AttrDict({self.DIRECTIVE_ID: directive_new})
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
# Remove unecessarily created dictionary entries during memory address parsing
@@ -297,7 +312,7 @@ class ParserX86ATT(BaseParser):
def process_label(self, label):
"""Post-process label asm line"""
# remove duplicated 'name' level due to identifier
label['name'] = label['name']['name']
label['name'] = label['name'][0]['name']
return AttrDict({self.LABEL_ID: label})
def process_immediate(self, immediate):

View File

@@ -241,6 +241,7 @@ class MachineModel(object):
"""Return ISA for given micro-arch ``arch``."""
arch_dict = {
'tx2': 'aarch64',
'n1': 'aarch64',
'zen1': 'x86',
'zen+': 'x86',
'zen2': 'x86',
@@ -318,7 +319,8 @@ class MachineModel(object):
# Check if modification date of DB is older than cached version
if os.path.getmtime(filepath) < os.path.getmtime(cachepath):
# load cached version
cached_db = pickle.load(open(cachepath, 'rb'))
with open(cachepath, 'rb') as f:
cached_db = pickle.load(f)
return cached_db
else:
# DB newer than cached version --> delete cached file and return False
@@ -336,7 +338,8 @@ class MachineModel(object):
"""
hashname = self._get_hashname(filepath)
filepath = os.path.join(utils.CACHE_DIR, hashname + '.pickle')
pickle.dump(data, open(filepath, 'wb'))
with open(filepath, 'wb') as f:
pickle.dump(data, f)
def _get_hashname(self, name):
"""Returns unique hashname for machine model"""
@@ -396,7 +399,7 @@ class MachineModel(object):
operand_string += 'p' if operand['post-indexed'] else ''
return operand_string
def _create_db_operand_aarch64(operand):
def _create_db_operand_aarch64(self, operand):
"""Create instruction form operand for DB out of operand string."""
if operand == 'i':
return {'class': 'immediate', 'imd': 'int'}
@@ -417,7 +420,7 @@ class MachineModel(object):
else:
raise ValueError('Parameter {} is not a valid operand code'.format(operand))
def _create_db_operand_x86(operand):
def _create_db_operand_x86(self, operand):
"""Create instruction form operand for DB out of operand string."""
if operand == 'r':
return {'class': 'register', 'name': 'gpr'}
@@ -526,7 +529,7 @@ class MachineModel(object):
if 'register' in operand:
if i_operand['class'] != 'register':
return False
return self._is_x86_reg_type(i_operand['name'], operand['register'])
return self._is_x86_reg_type(i_operand, operand['register'], consider_masking=True)
# memory
if 'memory' in operand:
if i_operand['class'] != 'memory':
@@ -546,7 +549,9 @@ class MachineModel(object):
)
for key in operand_attributes:
try:
if operand_1[key] != operand_2[key] and not any([x == self.WILDCARD for x in [operand_1[key], operand_2[key]]]):
if operand_1[key] != operand_2[key] and not any(
[x == self.WILDCARD for x in [operand_1[key], operand_2[key]]]
):
return False
except KeyError:
return False
@@ -573,8 +578,9 @@ class MachineModel(object):
return False
return True
def _is_x86_reg_type(self, i_reg_name, reg):
def _is_x86_reg_type(self, i_reg, reg, consider_masking=False):
"""Check if register type match."""
i_reg_name = i_reg if not consider_masking else i_reg['name']
# check for wildcards
if i_reg_name == self.WILDCARD or reg['name'] == self.WILDCARD:
return True
@@ -582,6 +588,33 @@ class MachineModel(object):
parser_x86 = ParserX86ATT()
if parser_x86.is_vector_register(reg):
if reg['name'].rstrip(string.digits).lower() == i_reg_name:
# Consider masking and zeroing for AVX512
if consider_masking:
mask_ok = zero_ok = True
if 'mask' in reg or 'mask' in i_reg:
# one instruction is missing the masking while the other has it
mask_ok = False
# check for wildcard
if (
(
'mask' in reg
and reg['mask'].rstrip(string.digits).lower() == i_reg.get('mask')
)
or reg.get('mask') == self.WILDCARD
or i_reg.get('mask') == self.WILDCARD
):
mask_ok = True
if bool('zeroing' in reg) ^ bool('zeroing' in i_reg):
# one instruction is missing zeroing while the other has it
zero_ok = False
# check for wildcard
if (
i_reg.get('zeroing') == self.WILDCARD
or reg.get('zeroing') == self.WILDCARD
):
zero_ok = True
if not mask_ok or not zero_ok:
return False
return True
else:
if i_reg_name == 'gpr':

View File

@@ -91,8 +91,8 @@ setup(
# https://packaging.python.org/en/latest/requirements.html
install_requires=[
'networkx',
'pyparsing',
'ruamel.yaml',
'pyparsing>=2.3.1',
'ruamel.yaml>=0.15.71',
],
python_requires='>=3.5',

View File

@@ -124,6 +124,18 @@ class TestDBInterface(unittest.TestCase):
with self.assertRaises(AssertionError):
dbi.import_benchmark_output('csx', 'ibench', 'invalid_file')
def test_online_scraping(self):
# addpd -- suspicious instruction, normal URL
instr_1 = ['addpd', (True, '(r) (r,w)')]
self.assertEqual(dbi._scrape_from_felixcloutier(instr_1[0]), instr_1[1])
# movpd -- not suspicious,
instr_2 = ['movapd', (False, '(r) (w)')]
self.assertEqual(dbi._scrape_from_felixcloutier(instr_2[0]), instr_2[1])
# vfmadd132pd -- only in combined view with 213/231.
# No 2-operand version, therefore, empty string
instr_3 = ['vfmadd132pd', (True, '')]
self.assertEqual(dbi._scrape_from_felixcloutier(instr_3[0]), instr_3[1])
##################
# Helper functions
##################

View File

@@ -45,20 +45,31 @@ class TestParserX86ATT(unittest.TestCase):
self.assertEqual(len(self._get_directive(self.parser, '\t.text').parameters), 0)
self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90').name, 'align')
self.assertEqual(len(self._get_directive(self.parser, '\t.align\t16,0x90').parameters), 2)
self.assertEqual(len(self._get_directive(self.parser, '.text').parameters), 0)
self.assertEqual(
len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2
)
self.assertEqual(
self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
'"path/to/file.c"',
)
self.assertEqual(
self._get_directive(self.parser, '\t.set\tL$set$0,LECIE1-LSCIE1').parameters,
[{'name': 'L$set$0'}, {'name': 'LECIE1-LSCIE1'}])
['L$set$0', 'LECIE1-LSCIE1'],
)
self.assertEqual(
self._get_directive(
self.parser,
'\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support'
'\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support',
).parameters,
[{'name': v} for v in
['__TEXT', '__eh_frame', 'coalesced', 'no_toc+strip_static_syms+live_support']])
['__TEXT', '__eh_frame', 'coalesced', 'no_toc+strip_static_syms+live_support'],
)
self.assertEqual(
self._get_directive(
self.parser, '\t.section\t__TEXT,__literal16,16byte_literals').parameters,
[{'name': v} for v in ['__TEXT', '__literal16', '16byte_literals']])
self.parser, '\t.section\t__TEXT,__literal16,16byte_literals'
).parameters,
['__TEXT', '__literal16', '16byte_literals'],
)
self.assertEqual(
self._get_directive(self.parser, '\t.align\t16,0x90').parameters[1], '0x90'
)
@@ -169,7 +180,7 @@ class TestParserX86ATT(unittest.TestCase):
instruction_form_3 = {
'instruction': None,
'operands': [],
'directive': {'name': 'quad', 'parameters': [{'name': '.2.3_2__kmpc_loc_pack.2'}]},
'directive': {'name': 'quad', 'parameters': ['.2.3_2__kmpc_loc_pack.2']},
'comment': 'qed',
'label': None,
'line': '.quad .2.3_2__kmpc_loc_pack.2 #qed',
@@ -186,9 +197,7 @@ class TestParserX86ATT(unittest.TestCase):
'scale': 1,
}
},
{
'register': {'name': 'ecx'}
}
{'register': {'name': 'ecx'}},
],
'directive': None,
'comment': '12.9',