added new instructions and fixed false positive assignment of stores by dynamic TP/LT combination for aarch64

This commit is contained in:
JanLJL
2020-01-22 21:40:11 +01:00
parent 092403c529
commit 2fc1f3a186
4 changed files with 206 additions and 136 deletions

View File

@@ -54,8 +54,8 @@ instruction_forms:
offset: "*" offset: "*"
index: "*" index: "*"
scale: "*" scale: "*"
pre-indexed: "*" pre-indexed: false
post-indexed: "*" post-indexed: false
source: true source: true
destination: false destination: false
- name: "ldp" - name: "ldp"
@@ -73,10 +73,48 @@ instruction_forms:
offset: "*" offset: "*"
index: "*" index: "*"
scale: "*" scale: "*"
pre-indexed: "*" pre-indexed: false
post-indexed: "*" post-indexed: false
source: true source: true
destination: false destination: false
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: true
post-indexed: false
source: true
destination: true
- name: "ldp"
operands:
- class: "register"
prefix: "q"
source: false
destination: true
- class: "register"
prefix: "q"
source: false
destination: true
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: false
post-indexed: true
source: true
destination: true
- name: "stp" - name: "stp"
operands: operands:
- class: "register" - class: "register"
@@ -92,8 +130,8 @@ instruction_forms:
offset: "*" offset: "*"
index: "*" index: "*"
scale: "*" scale: "*"
pre-indexed: "*" pre-indexed: false
post-indexed: "*" post-indexed: false
source: false source: false
destination: true destination: true
- name: "stp" - name: "stp"
@@ -111,8 +149,8 @@ instruction_forms:
offset: "*" offset: "*"
index: "*" index: "*"
scale: "*" scale: "*"
pre-indexed: "*" pre-indexed: false
post-indexed: "*" post-indexed: false
source: false source: false
destination: true destination: true
- name: "str" - name: "str"
@@ -175,3 +213,18 @@ instruction_forms:
post-indexed: "*" post-indexed: "*"
source: false source: false
destination: true destination: true
- name: "stur"
operands:
- class: "register"
prefix: "d"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
pre-indexed: "*"
post-indexed: "*"
source: false
destination: true

View File

@@ -318,7 +318,7 @@ instruction_forms:
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 1.0 throughput: 1.0
latency: ~ # 2*p34 latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']] port_pressure: [[2.0, '34']]
- name: ldp - name: ldp
operands: operands:
@@ -334,7 +334,7 @@ instruction_forms:
pre-indexed: false pre-indexed: false
post-indexed: true post-indexed: true
throughput: 1.0 throughput: 1.0
latency: ~ # 2*p34 latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']] port_pressure: [[2.0, '34']]
- name: ldp - name: ldp
operands: operands:
@@ -344,13 +344,13 @@ instruction_forms:
prefix: q prefix: q
- class: memory - class: memory
base: x base: x
offset: imd offset: '*'
index: ~ index: '*'
scale: 1 scale: 1
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 1.0 throughput: 1.0
latency: ~ # 2*p34 latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']] port_pressure: [[2.0, '34']]
- name: ldp - name: ldp
operands: operands:
@@ -366,7 +366,7 @@ instruction_forms:
pre-indexed: false pre-indexed: false
post-indexed: true post-indexed: true
throughput: 1.0 throughput: 1.0
latency: ~ # 2*p34 latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']] port_pressure: [[2.0, '34']]
- name: ldp - name: ldp
operands: operands:
@@ -376,13 +376,13 @@ instruction_forms:
prefix: q prefix: q
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 1.0 throughput: 1.0
latency: ~ # 2*p34 latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']] port_pressure: [[2.0, '34']]
- name: ldp - name: ldp
operands: operands:
@@ -392,13 +392,13 @@ instruction_forms:
prefix: q prefix: q
- class: memory - class: memory
base: x base: x
offset: imd offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: true pre-indexed: true
post-indexed: false post-indexed: false
throughput: 1.0 throughput: 1.0
latency: ~ # 2*p34 latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']] port_pressure: [[2.0, '34']]
- name: ldp - name: ldp
operands: operands:
@@ -408,13 +408,13 @@ instruction_forms:
prefix: d prefix: d
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: true post-indexed: true
throughput: 1.0 throughput: 1.0
latency: ~ # 2*p34 latency: 4.0 # 2*p34
port_pressure: [[2.0, '34']] port_pressure: [[2.0, '34']]
- name: ldur # JL: assumed from ldr - name: ldur # JL: assumed from ldr
operands: operands:
@@ -450,9 +450,9 @@ instruction_forms:
prefix: d prefix: d
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
post-indexed: false post-indexed: false
pre-indexed: false pre-indexed: false
throughput: 0.5 throughput: 0.5
@@ -465,8 +465,8 @@ instruction_forms:
- class: memory - class: memory
base: x base: x
offset: imd offset: imd
index: ~ index: '*'
scale: 1 scale: '*'
post-indexed: false post-indexed: false
pre-indexed: false pre-indexed: false
throughput: 0.5 throughput: 0.5
@@ -478,9 +478,9 @@ instruction_forms:
prefix: d prefix: d
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: x index: '*'
scale: 8 scale: '*'
post-indexed: false post-indexed: false
pre-indexed: false pre-indexed: false
throughput: 0.5 throughput: 0.5
@@ -557,29 +557,13 @@ instruction_forms:
prefix: d prefix: d
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 2.0 throughput: 2.0
latency: ~ # 4*p34 latency: 0 # 4*p34
port_pressure: [[4.0, '34']]
- name: stp
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: memory
base: x
offset: imd
index: ~
scale: 1
pre-indexed: false
post-indexed: false
throughput: 2.0
latency: ~ # 4*p34
port_pressure: [[4.0, '34']] port_pressure: [[4.0, '34']]
- name: stp - name: stp
operands: operands:
@@ -589,13 +573,13 @@ instruction_forms:
prefix: q prefix: q
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: true post-indexed: true
throughput: 2.0 throughput: 2.0
latency: ~ # 2*p34+2*p5 latency: 0 # 2*p34+2*p5
port_pressure: [[2.0, '34'], [2.0, '5']] port_pressure: [[2.0, '34'], [2.0, '5']]
- name: stp - name: stp
operands: operands:
@@ -605,30 +589,28 @@ instruction_forms:
prefix: q prefix: q
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 2.0 throughput: 2.0
latency: ~ # 2*p34+2*p5 latency: 0 # 2*p34+2*p5
port_pressure: [[2.0, '34'], [2.0, '5']] port_pressure: [[2.0, '34'], [2.0, '5']]
- name: stp - name: stur # JL: assumed from str
operands: operands:
- class: register - class: register
prefix: q prefix: d
- class: register
prefix: q
- class: memory - class: memory
base: x base: x
offset: imd offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 2.0 throughput: 1.0
latency: ~ # 2*p34+2*p5 latency: 4.0 # 1*p34+1*p5
port_pressure: [[2.0, '34'], [2.0, '5']] port_pressure: [[1.0, '34'], [1.0, '5']]
- name: stur # JL: assumed from str - name: stur # JL: assumed from str
operands: operands:
- class: register - class: register
@@ -649,13 +631,13 @@ instruction_forms:
prefix: x prefix: x
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p34+1*p5 latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']] port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str - name: str
operands: operands:
@@ -669,7 +651,7 @@ instruction_forms:
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p34+1*p5 latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']] port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str - name: str
operands: operands:
@@ -677,13 +659,13 @@ instruction_forms:
prefix: d prefix: d
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: true post-indexed: true
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p34+1*p5 latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']] port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str - name: str
operands: operands:
@@ -697,7 +679,7 @@ instruction_forms:
pre-indexed: false pre-indexed: false
post-indexed: false post-indexed: false
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p34+1*p5 latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']] port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str - name: str
operands: operands:
@@ -705,13 +687,13 @@ instruction_forms:
prefix: q prefix: q
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: true post-indexed: true
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p34+1*p5 latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']] port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str - name: str
operands: operands:
@@ -719,25 +701,22 @@ instruction_forms:
prefix: x prefix: x
- class: memory - class: memory
base: x base: x
offset: ~ offset: '*'
index: ~ index: '*'
scale: 1 scale: '*'
pre-indexed: false pre-indexed: false
post-indexed: true post-indexed: true
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p34+1*p5 latency: 0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']] port_pressure: [[1.0, '34'], [1.0, '5']]
- name: str - name: sub
operands: operands:
- class: register - class: register
prefix: x prefix: w
- class: memory - class: register
base: x prefix: w
offset: ~ - class: immediate
index: x imd: int
scale: 1 throughput: 0.33333333
pre-indexed: false latency: 1.0 # 1*p012
post-indexed: false port_pressure: [[1, '012']]
throughput: 1.0
latency: 4.0 # 1*p34+1*p5
port_pressure: [[1.0, '34'], [1.0, '5']]

View File

@@ -82,7 +82,7 @@ instruction_forms:
- class: register - class: register
name: gpr name: gpr
throughput: 0.25 throughput: 0.25
latency: ~ # 1*p4567 latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']] port_pressure: [[1, '4567']]
- name: cmpq - name: cmpq
operands: operands:
@@ -91,7 +91,7 @@ instruction_forms:
- class: register - class: register
name: gpr name: gpr
throughput: 0.25 throughput: 0.25
latency: ~ # 1*p4567 latency: 1.0 # 1*p4567
port_pressure: [[1, '4567']] port_pressure: [[1, '4567']]
- name: incq - name: incq
operands: operands:
@@ -104,19 +104,19 @@ instruction_forms:
operands: operands:
- class: identifier - class: identifier
throughput: 0.0 throughput: 0.0
latency: ~ latency: 0
port_pressure: [] port_pressure: []
- name: jb - name: jb
operands: operands:
- class: identifier - class: identifier
throughput: 0.0 throughput: 0.0
latency: ~ latency: 0
port_pressure: [] port_pressure: []
- name: jne - name: jne
operands: operands:
- class: identifier - class: identifier
throughput: 0.0 throughput: 0.0
latency: ~ latency: 0
port_pressure: [] port_pressure: []
- name: leaq - name: leaq
operands: operands:
@@ -128,7 +128,7 @@ instruction_forms:
- class: register - class: register
name: gpr name: gpr
throughput: 0.5 throughput: 0.5
latency: ~ # 1*p89 latency: 1.0 # 1*p89
port_pressure: [[1, '89']] port_pressure: [[1, '89']]
- name: movl - name: movl
operands: operands:
@@ -367,7 +367,7 @@ instruction_forms:
index: gpr index: gpr
scale: 1 scale: 1
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p89+1*pST latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]] port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovapd - name: vmovapd
operands: operands:
@@ -388,7 +388,7 @@ instruction_forms:
index: gpr index: gpr
scale: 1 scale: 1
throughput: 2.0 throughput: 2.0
latency: 3.0 # 2*p89+2*pST latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]] port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovapd - name: vmovapd
operands: operands:
@@ -400,19 +400,31 @@ instruction_forms:
index: gpr index: gpr
scale: 1 scale: 1
throughput: 2.0 throughput: 2.0
latency: 3.0 # 2*p89+2*pST latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]] port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovaps - name: vmovups
operands:
- class: memory
base: gpr
offset: "*"
index: "*"
scale: "*"
- class: register
name: xmm
throughput: 0.5
latency: 4.0 # 1*p89+1*p8D9D
port_pressure: [[1, '89'], [1, [8D, 9D]]]
- name: vmovups
operands: operands:
- class: register - class: register
name: xmm name: xmm
- class: memory - class: memory
base: gpr base: gpr
offset: ~ offset: "*"
index: gpr index: "*"
scale: 1 scale: "*"
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p89+1*pST latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]] port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovaps - name: vmovaps
operands: operands:
@@ -420,11 +432,11 @@ instruction_forms:
name: xmm name: xmm
- class: memory - class: memory
base: gpr base: gpr
offset: imd offset: "*"
index: gpr index: "*"
scale: 1 scale: "*"
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p89+1*pST latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]] port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovupd - name: vmovupd
operands: operands:
@@ -436,7 +448,7 @@ instruction_forms:
index: gpr index: gpr
scale: 1 scale: 1
throughput: 2.0 throughput: 2.0
latency: 3.0 # 2*p89+2*pST latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]] port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovupd - name: vmovupd
operands: operands:
@@ -448,7 +460,7 @@ instruction_forms:
index: gpr index: gpr
scale: 1 scale: 1
throughput: 2.0 throughput: 2.0
latency: 3.0 # 2*p89+2*pST latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]] port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovupd - name: vmovupd
operands: operands:
@@ -460,7 +472,7 @@ instruction_forms:
index: gpr index: gpr
scale: 1 scale: 1
throughput: 2.0 throughput: 2.0
latency: 3.0 # 2*p89+2*pST latency: 0 # 2*p89+2*pST
port_pressure: [[2, '89'], [2, [ST]]] port_pressure: [[2, '89'], [2, [ST]]]
- name: vmovupd - name: vmovupd
operands: operands:
@@ -502,7 +514,7 @@ instruction_forms:
index: ~ index: ~
scale: 1 scale: 1
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p89+1*pST latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]] port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovsd - name: vmovsd
operands: operands:
@@ -514,7 +526,7 @@ instruction_forms:
index: ~ index: ~
scale: 1 scale: 1
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p89+1*pST latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]] port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovsd - name: vmovsd
operands: operands:
@@ -526,7 +538,7 @@ instruction_forms:
index: gpr index: gpr
scale: 1 scale: 1
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p89+1*pST latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]] port_pressure: [[1, '89'], [1, [ST]]]
- name: vmovsd - name: vmovsd
operands: operands:
@@ -538,5 +550,5 @@ instruction_forms:
index: gpr index: gpr
scale: 1 scale: 1
throughput: 1.0 throughput: 1.0
latency: 4.0 # 1*p89+1*pST latency: 0 # 1*p89+1*pST
port_pressure: [[1, '89'], [1, [ST]]] port_pressure: [[1, '89'], [1, [ST]]]

View File

@@ -176,6 +176,8 @@ class ArchSemantics(ISASemantics):
operands.index(self._create_reg_wildcard()) operands.index(self._create_reg_wildcard())
] ]
) )
data_port_pressure = [0.0 for _ in range(port_number)]
data_port_uops = []
if INSTR_FLAGS.HAS_LD in instruction_form['flags']: if INSTR_FLAGS.HAS_LD in instruction_form['flags']:
# LOAD performance data # LOAD performance data
data_port_uops = self._machine_model.get_load_throughput( data_port_uops = self._machine_model.get_load_throughput(
@@ -196,17 +198,41 @@ class ArchSemantics(ISASemantics):
data_port_pressure = [pp * multiplier for pp in data_port_pressure] data_port_pressure = [pp * multiplier for pp in data_port_pressure]
if INSTR_FLAGS.HAS_ST in instruction_form['flags']: if INSTR_FLAGS.HAS_ST in instruction_form['flags']:
# STORE performance data # STORE performance data
data_port_uops = self._machine_model.get_store_throughput( destinations = (
[ instruction_form['semantic_operands']['destination']
x['memory'] + instruction_form['semantic_operands']['src_dst']
for x in instruction_form['semantic_operands']['destination']
+ instruction_form['semantic_operands']['src_dst']
if 'memory' in x
][0]
) )
data_port_pressure = self._machine_model.average_port_pressure( st_data_port_uops = self._machine_model.get_store_throughput(
data_port_uops [x['memory'] for x in destinations if 'memory' in x][0]
) )
# zero data port pressure and remove HAS_ST flag if
# - no mem operand in dst &&
# - all mem operands in src_dst are pre-/post-indexed
# since it is no mem store
if (
self._isa == 'aarch64'
and 'memory'
not in instruction_form['semantic_operands']['destination']
and all(
[
'post_indexed' in op['memory']
or 'pre_indexed' in op['memory']
for op in instruction_form['semantic_operands']['src_dst']
if 'memory' in op
]
)
):
st_data_port_uops = []
instruction_form['flags'].remove(INSTR_FLAGS.HAS_ST)
# sum up all data ports in case for LOAD and STORE
st_data_port_pressure = self._machine_model.average_port_pressure(
st_data_port_uops
)
data_port_pressure = [
sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
]
data_port_uops += st_data_port_uops
throughput = max( throughput = max(
max(data_port_pressure), instruction_data_reg['throughput'] max(data_port_pressure), instruction_data_reg['throughput']
) )