added new instructions and fixed false positive assignment of stores by dynamic TP/LT combination for aarch64

This commit is contained in:
JanLJL
2020-01-22 21:40:11 +01:00
parent 092403c529
commit 2fc1f3a186
4 changed files with 206 additions and 136 deletions

View File

@@ -54,8 +54,8 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
pre-indexed: false
post-indexed: false
source: true
destination: false
- name: "ldp"
@@ -73,10 +73,48 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
pre-indexed: false
post-indexed: false
source: true
destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: true
source: true
destination: true
- name: "stp"
operands:
- class: "register"
@@ -92,8 +130,8 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
pre-indexed: false
post-indexed: false
source: false
destination: true
- name: "stp"
@@ -111,8 +149,8 @@ instruction_forms:
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
pre-indexed: false
post-indexed: false
source: false
destination: true
- name: "str"
@@ -175,3 +213,18 @@ instruction_forms:
post-indexed: "*"
source: false
destination: true
- name: "stur"
operands:
- class: "register"
prefix: "d"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true

View File

@@ -318,7 +318,7 @@ instruction_forms:
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: ~ # 2*p34
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
@@ -334,7 +334,7 @@ instruction_forms:
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: ~ # 2*p34
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
@@ -344,13 +344,13 @@ instruction_forms:
prefix: q
- class: memory
base: x
offset: imd
index: ~
offset: '*'
index: '*'
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: ~ # 2*p34
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
@@ -366,7 +366,7 @@ instruction_forms:
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: ~ # 2*p34
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
@@ -376,13 +376,13 @@ instruction_forms:
prefix: q
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: ~ # 2*p34
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
@@ -392,13 +392,13 @@ instruction_forms:
prefix: q
- class: memory
base: x
offset: imd
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: true
post-indexed: false
throughput: 1.0
latency: ~ # 2*p34
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldp
operands:
@@ -408,13 +408,13 @@ instruction_forms:
prefix: d
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: ~ # 2*p34
latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']]
- name: ldur # JL: assumed from ldr
operands:
@@ -450,9 +450,9 @@ instruction_forms:
prefix: d
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
@@ -465,8 +465,8 @@ instruction_forms:
- class: memory
base: x
offset: imd
index: ~
scale: 1
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
@@ -478,9 +478,9 @@ instruction_forms:
prefix: d
- class: memory
base: x
offset: ~
index: x
scale: 8
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
throughput: 0.5
@@ -557,29 +557,13 @@ instruction_forms:
prefix: d
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: ~ # 4*p34
port_pressure: [[4.0, '34']]
- name: stp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: ~ # 4*p34
latency: 0 # 4*p34
port_pressure: [[4.0, '34']]
- name: stp
operands:
@@ -589,13 +573,13 @@ instruction_forms:
prefix: q
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 2.0
latency: ~ # 2*p34+2*p5
latency: 0 # 2*p34+2*p5
port_pressure: [[2.0, '34'], [2.0, '5']]
- name: stp
operands:
@@ -605,30 +589,28 @@ instruction_forms:
prefix: q
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: ~ # 2*p34+2*p5
latency: 0 # 2*p34+2*p5
port_pressure: [[2.0, '34'], [2.0, '5']]
- name: stp
- name: stur # JL: assumed from str
operands:
- class: register
prefix: q
- class: register
prefix: q
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: ~ # 2*p34+2*p5
port_pressure: [[2.0, '34'], [2.0, '5']]
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: stur # JL: assumed from str
operands:
- class: register
@@ -649,13 +631,13 @@ instruction_forms:
prefix: x
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
@@ -669,7 +651,7 @@ instruction_forms:
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
@@ -677,13 +659,13 @@ instruction_forms:
prefix: d
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
@@ -697,7 +679,7 @@ instruction_forms:
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
@@ -705,13 +687,13 @@ instruction_forms:
prefix: q
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
operands:
@@ -719,25 +701,22 @@ instruction_forms:
prefix: x
- class: memory
base: x
offset: ~
index: ~
scale: 1
offset: '*'
index: '*'
scale: '*'
pre-indexed: false
post-indexed: true
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str
- name: sub
operands:
- class: register
prefix: x
- class: memory
base: x
offset: ~
index: x
scale: 1
pre-indexed: false
post-indexed: false
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]
prefix: w
- class: register
prefix: w
- class: immediate
imd: int
throughput: 0.33333333
latency: 1.0 # 1*p012
port_pressure: [[1, '012']]

View File

@@ -82,7 +82,7 @@ instruction_forms:
- class: register
name: gpr
throughput: 0.25
latency: ~ # 1*p4567
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: cmpq
operands:
@@ -91,7 +91,7 @@ instruction_forms:
- class: register
name: gpr
throughput: 0.25
latency: ~ # 1*p4567
latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']]
- name: incq
operands:
@@ -104,19 +104,19 @@ instruction_forms:
operands:
- class: identifier
throughput: 0.0
latency: ~
latency: 0
port_pressure: []
- name: jb
operands:
- class: identifier
throughput: 0.0
latency: ~
latency: 0
port_pressure: []
- name: jne
operands:
- class: identifier
throughput: 0.0
latency: ~
latency: 0
port_pressure: []
- name: leaq
operands:
@@ -128,7 +128,7 @@ instruction_forms:
- class: register
name: gpr
throughput: 0.5
latency: ~ # 1*p89
latency: 1.0 # 1*p89
port_pressure: [[1, '89']]
- name: movl
operands:
@@ -367,7 +367,7 @@ instruction_forms:
index: gpr
scale: 1
throughput: 1.0
latency: 4.0 # 1*p89+1*pST
latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovapd
operands:
@@ -388,7 +388,7 @@ instruction_forms:
index: gpr
scale: 1
throughput: 2.0
latency: 3.0 # 2*p89+2*pST
latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovapd
operands:
@@ -400,19 +400,31 @@ instruction_forms:
index: gpr
scale: 1
throughput: 2.0
latency: 3.0 # 2*p89+2*pST
latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovaps
- name: vmovups
operands:
- class: memory
base: gpr
offset: "*"
index: "*"
scale: "*"
- class: register
name: xmm
throughput: 0.5
latency: 4.0 # 1*p89+1*p8D9D
port_pressure: [[1, '89'], [1, [8D, 9D]]]
- name: vmovups
operands:
- class: register
name: xmm
- class: memory
base: gpr
offset: ~
index: gpr
scale: 1
offset: "*"
index: "*"
scale: "*"
throughput: 1.0
latency: 4.0 # 1*p89+1*pST
latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovaps
operands:
@@ -420,11 +432,11 @@ instruction_forms:
name: xmm
- class: memory
base: gpr
offset: imd
index: gpr
scale: 1
offset: "*"
index: "*"
scale: "*"
throughput: 1.0
latency: 4.0 # 1*p89+1*pST
latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovupd
operands:
@@ -436,7 +448,7 @@ instruction_forms:
index: gpr
scale: 1
throughput: 2.0
latency: 3.0 # 2*p89+2*pST
latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovupd
operands:
@@ -448,7 +460,7 @@ instruction_forms:
index: gpr
scale: 1
throughput: 2.0
latency: 3.0 # 2*p89+2*pST
latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovupd
operands:
@@ -460,7 +472,7 @@ instruction_forms:
index: gpr
scale: 1
throughput: 2.0
latency: 3.0 # 2*p89+2*pST
latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovupd
operands:
@@ -502,7 +514,7 @@ instruction_forms:
index: ~
scale: 1
throughput: 1.0
latency: 4.0 # 1*p89+1*pST
latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovsd
operands:
@@ -514,7 +526,7 @@ instruction_forms:
index: ~
scale: 1
throughput: 1.0
latency: 4.0 # 1*p89+1*pST
latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovsd
operands:
@@ -526,7 +538,7 @@ instruction_forms:
index: gpr
scale: 1
throughput: 1.0
latency: 4.0 # 1*p89+1*pST
latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovsd
operands:
@@ -538,5 +550,5 @@ instruction_forms:
index: gpr
scale: 1
throughput: 1.0
latency: 4.0 # 1*p89+1*pST
latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]]

View File

@@ -176,6 +176,8 @@ class ArchSemantics(ISASemantics):
operands.index(self._create_reg_wildcard())
]
)
data_port_pressure = [0.0 for _ in range(port_number)]
data_port_uops = []
if INSTR_FLAGS.HAS_LD in instruction_form['flags']:
# LOAD performance data
data_port_uops = self._machine_model.get_load_throughput(
@@ -196,17 +198,41 @@ class ArchSemantics(ISASemantics):
data_port_pressure = [pp * multiplier for pp in data_port_pressure]
if INSTR_FLAGS.HAS_ST in instruction_form['flags']:
# STORE performance data
data_port_uops = self._machine_model.get_store_throughput(
[
x['memory']
for x in instruction_form['semantic_operands']['destination']
+ instruction_form['semantic_operands']['src_dst']
if 'memory' in x
][0]
destinations = (
instruction_form['semantic_operands']['destination']
+ instruction_form['semantic_operands']['src_dst']
)
data_port_pressure = self._machine_model.average_port_pressure(
data_port_uops
st_data_port_uops = self._machine_model.get_store_throughput(
[x['memory'] for x in destinations if 'memory' in x][0]
)
# zero data port pressure and remove HAS_ST flag if
# - no mem operand in dst &&
# - all mem operands in src_dst are pre-/post-indexed
# since it is no mem store
if (
self._isa == 'aarch64'
and 'memory'
not in instruction_form['semantic_operands']['destination']
and all(
[
'post_indexed' in op['memory']
or 'pre_indexed' in op['memory']
for op in instruction_form['semantic_operands']['src_dst']
if 'memory' in op
]
)
):
st_data_port_uops = []
instruction_form['flags'].remove(INSTR_FLAGS.HAS_ST)
# sum up all data ports in case for LOAD and STORE
st_data_port_pressure = self._machine_model.average_port_pressure(
st_data_port_uops
)
data_port_pressure = [
sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
]
data_port_uops += st_data_port_uops
throughput = max(
max(data_port_pressure), instruction_data_reg['throughput']
)