From 2fc1f3a186d28d78d0ce8ad5ed73636dabf3a357 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Wed, 22 Jan 2020 21:40:11 +0100 Subject: [PATCH] added new instructions and fixed false positive assignment of stores by dynamic TP/LT combination for aarch64 --- osaca/data/isa/aarch64.yml | 69 ++++++++++-- osaca/data/tx2.yml | 167 +++++++++++++----------------- osaca/data/zen1.yml | 62 ++++++----- osaca/semantics/arch_semantics.py | 44 ++++++-- 4 files changed, 206 insertions(+), 136 deletions(-) diff --git a/osaca/data/isa/aarch64.yml b/osaca/data/isa/aarch64.yml index 7db1b39..4422dac 100644 --- a/osaca/data/isa/aarch64.yml +++ b/osaca/data/isa/aarch64.yml @@ -54,8 +54,8 @@ instruction_forms: offset: "*" index: "*" scale: "*" - pre-indexed: "*" - post-indexed: "*" + pre-indexed: false + post-indexed: false source: true destination: false - name: "ldp" @@ -73,10 +73,48 @@ instruction_forms: offset: "*" index: "*" scale: "*" - pre-indexed: "*" - post-indexed: "*" + pre-indexed: false + post-indexed: false source: true destination: false + - name: "ldp" + operands: + - class: "register" + prefix: "q" + source: false + destination: true + - class: "register" + prefix: "q" + source: false + destination: true + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: true + post-indexed: false + source: true + destination: true + - name: "ldp" + operands: + - class: "register" + prefix: "q" + source: false + destination: true + - class: "register" + prefix: "q" + source: false + destination: true + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: false + post-indexed: true + source: true + destination: true - name: "stp" operands: - class: "register" @@ -92,8 +130,8 @@ instruction_forms: offset: "*" index: "*" scale: "*" - pre-indexed: "*" - post-indexed: "*" + pre-indexed: false + post-indexed: false source: false destination: true - name: "stp" @@ -111,8 +149,8 @@ instruction_forms: offset: "*" index: "*" scale: "*" - pre-indexed: "*" - post-indexed: "*" + pre-indexed: false + post-indexed: false source: false destination: true - name: "str" @@ -175,3 +213,18 @@ instruction_forms: post-indexed: "*" source: false destination: true + - name: "stur" + operands: + - class: "register" + prefix: "d" + source: true + destination: false + - class: "memory" + base: "*" + offset: "*" + index: "*" + scale: "*" + pre-indexed: "*" + post-indexed: "*" + source: false + destination: true diff --git a/osaca/data/tx2.yml b/osaca/data/tx2.yml index e8c431d..6aadc14 100644 --- a/osaca/data/tx2.yml +++ b/osaca/data/tx2.yml @@ -318,7 +318,7 @@ instruction_forms: pre-indexed: false post-indexed: false throughput: 1.0 - latency: ~ # 2*p34 + latency: 4.0 # 2*p34 port_pressure: [[2.0, '34']] - name: ldp operands: @@ -334,7 +334,7 @@ instruction_forms: pre-indexed: false post-indexed: true throughput: 1.0 - latency: ~ # 2*p34 + latency: 4.0 # 2*p34 port_pressure: [[2.0, '34']] - name: ldp operands: @@ -344,13 +344,13 @@ instruction_forms: prefix: q - class: memory base: x - offset: imd - index: ~ + offset: '*' + index: '*' scale: 1 pre-indexed: false post-indexed: false throughput: 1.0 - latency: ~ # 2*p34 + latency: 4.0 # 2*p34 port_pressure: [[2.0, '34']] - name: ldp operands: @@ -366,7 +366,7 @@ instruction_forms: pre-indexed: false post-indexed: true throughput: 1.0 - latency: ~ # 2*p34 + latency: 4.0 # 2*p34 port_pressure: [[2.0, '34']] - name: ldp operands: @@ -376,13 +376,13 @@ instruction_forms: prefix: q - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: false throughput: 1.0 - latency: ~ # 2*p34 + latency: 4.0 # 2*p34 port_pressure: [[2.0, '34']] - name: ldp operands: @@ -392,13 +392,13 @@ instruction_forms: prefix: q - class: memory base: x - offset: imd - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: true post-indexed: false throughput: 1.0 - latency: ~ # 2*p34 + latency: 4.0 # 2*p34 port_pressure: [[2.0, '34']] - name: ldp operands: @@ -408,13 +408,13 @@ instruction_forms: prefix: d - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: true throughput: 1.0 - latency: ~ # 2*p34 + latency: 4.0 # 2*p34 port_pressure: [[2.0, '34']] - name: ldur # JL: assumed from ldr operands: @@ -450,9 +450,9 @@ instruction_forms: prefix: d - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' post-indexed: false pre-indexed: false throughput: 0.5 @@ -465,8 +465,8 @@ instruction_forms: - class: memory base: x offset: imd - index: ~ - scale: 1 + index: '*' + scale: '*' post-indexed: false pre-indexed: false throughput: 0.5 @@ -478,9 +478,9 @@ instruction_forms: prefix: d - class: memory base: x - offset: ~ - index: x - scale: 8 + offset: '*' + index: '*' + scale: '*' post-indexed: false pre-indexed: false throughput: 0.5 @@ -557,29 +557,13 @@ instruction_forms: prefix: d - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: false throughput: 2.0 - latency: ~ # 4*p34 - port_pressure: [[4.0, '34']] -- name: stp - operands: - - class: register - prefix: d - - class: register - prefix: d - - class: memory - base: x - offset: imd - index: ~ - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 2.0 - latency: ~ # 4*p34 + latency: 0 # 4*p34 port_pressure: [[4.0, '34']] - name: stp operands: @@ -589,13 +573,13 @@ instruction_forms: prefix: q - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: true throughput: 2.0 - latency: ~ # 2*p34+2*p5 + latency: 0 # 2*p34+2*p5 port_pressure: [[2.0, '34'], [2.0, '5']] - name: stp operands: @@ -605,30 +589,28 @@ instruction_forms: prefix: q - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: false throughput: 2.0 - latency: ~ # 2*p34+2*p5 + latency: 0 # 2*p34+2*p5 port_pressure: [[2.0, '34'], [2.0, '5']] -- name: stp +- name: stur # JL: assumed from str operands: - class: register - prefix: q - - class: register - prefix: q + prefix: d - class: memory base: x - offset: imd - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: false - throughput: 2.0 - latency: ~ # 2*p34+2*p5 - port_pressure: [[2.0, '34'], [2.0, '5']] + throughput: 1.0 + latency: 4.0 # 1*p34+1*p5 + port_pressure: [[1.0, '34'], [1.0, '5']] - name: stur # JL: assumed from str operands: - class: register @@ -649,13 +631,13 @@ instruction_forms: prefix: x - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: false throughput: 1.0 - latency: 4.0 # 1*p34+1*p5 + latency: 0 # 1*p34+1*p5 port_pressure: [[1.0, '34'], [1.0, '5']] - name: str operands: @@ -669,7 +651,7 @@ instruction_forms: pre-indexed: false post-indexed: false throughput: 1.0 - latency: 4.0 # 1*p34+1*p5 + latency: 0 # 1*p34+1*p5 port_pressure: [[1.0, '34'], [1.0, '5']] - name: str operands: @@ -677,13 +659,13 @@ instruction_forms: prefix: d - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: true throughput: 1.0 - latency: 4.0 # 1*p34+1*p5 + latency: 0 # 1*p34+1*p5 port_pressure: [[1.0, '34'], [1.0, '5']] - name: str operands: @@ -697,7 +679,7 @@ instruction_forms: pre-indexed: false post-indexed: false throughput: 1.0 - latency: 4.0 # 1*p34+1*p5 + latency: 0 # 1*p34+1*p5 port_pressure: [[1.0, '34'], [1.0, '5']] - name: str operands: @@ -705,13 +687,13 @@ instruction_forms: prefix: q - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: true throughput: 1.0 - latency: 4.0 # 1*p34+1*p5 + latency: 0 # 1*p34+1*p5 port_pressure: [[1.0, '34'], [1.0, '5']] - name: str operands: @@ -719,25 +701,22 @@ instruction_forms: prefix: x - class: memory base: x - offset: ~ - index: ~ - scale: 1 + offset: '*' + index: '*' + scale: '*' pre-indexed: false post-indexed: true throughput: 1.0 - latency: 4.0 # 1*p34+1*p5 + latency: 0 # 1*p34+1*p5 port_pressure: [[1.0, '34'], [1.0, '5']] -- name: str +- name: sub operands: - class: register - prefix: x - - class: memory - base: x - offset: ~ - index: x - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 1.0 - latency: 4.0 # 1*p34+1*p5 - port_pressure: [[1.0, '34'], [1.0, '5']] + prefix: w + - class: register + prefix: w + - class: immediate + imd: int + throughput: 0.33333333 + latency: 1.0 # 1*p012 + port_pressure: [[1, '012']] diff --git a/osaca/data/zen1.yml b/osaca/data/zen1.yml index fee0c59..29de7a0 100644 --- a/osaca/data/zen1.yml +++ b/osaca/data/zen1.yml @@ -82,7 +82,7 @@ instruction_forms: - class: register name: gpr throughput: 0.25 - latency: ~ # 1*p4567 + latency: 1.0 # 1*p4567 port_pressure: [[1, '4567']] - name: cmpq operands: @@ -91,7 +91,7 @@ instruction_forms: - class: register name: gpr throughput: 0.25 - latency: ~ # 1*p4567 + latency: 1.0 # 1*p4567 port_pressure: [[1, '4567']] - name: incq operands: @@ -104,19 +104,19 @@ instruction_forms: operands: - class: identifier throughput: 0.0 - latency: ~ + latency: 0 port_pressure: [] - name: jb operands: - class: identifier throughput: 0.0 - latency: ~ + latency: 0 port_pressure: [] - name: jne operands: - class: identifier throughput: 0.0 - latency: ~ + latency: 0 port_pressure: [] - name: leaq operands: @@ -128,7 +128,7 @@ instruction_forms: - class: register name: gpr throughput: 0.5 - latency: ~ # 1*p89 + latency: 1.0 # 1*p89 port_pressure: [[1, '89']] - name: movl operands: @@ -367,7 +367,7 @@ instruction_forms: index: gpr scale: 1 throughput: 1.0 - latency: 4.0 # 1*p89+1*pST + latency: 0 # 1*p89+1*pST port_pressure: [[1, '89'], [1, [ST]]] - name: vmovapd operands: @@ -388,7 +388,7 @@ instruction_forms: index: gpr scale: 1 throughput: 2.0 - latency: 3.0 # 2*p89+2*pST + latency: 0 # 2*p89+2*pST port_pressure: [[2, '89'], [2, [ST]]] - name: vmovapd operands: @@ -400,19 +400,31 @@ instruction_forms: index: gpr scale: 1 throughput: 2.0 - latency: 3.0 # 2*p89+2*pST + latency: 0 # 2*p89+2*pST port_pressure: [[2, '89'], [2, [ST]]] -- name: vmovaps +- name: vmovups + operands: + - class: memory + base: gpr + offset: "*" + index: "*" + scale: "*" + - class: register + name: xmm + throughput: 0.5 + latency: 4.0 # 1*p89+1*p8D9D + port_pressure: [[1, '89'], [1, [8D, 9D]]] +- name: vmovups operands: - class: register name: xmm - class: memory base: gpr - offset: ~ - index: gpr - scale: 1 + offset: "*" + index: "*" + scale: "*" throughput: 1.0 - latency: 4.0 # 1*p89+1*pST + latency: 0 # 1*p89+1*pST port_pressure: [[1, '89'], [1, [ST]]] - name: vmovaps operands: @@ -420,11 +432,11 @@ instruction_forms: name: xmm - class: memory base: gpr - offset: imd - index: gpr - scale: 1 + offset: "*" + index: "*" + scale: "*" throughput: 1.0 - latency: 4.0 # 1*p89+1*pST + latency: 0 # 1*p89+1*pST port_pressure: [[1, '89'], [1, [ST]]] - name: vmovupd operands: @@ -436,7 +448,7 @@ instruction_forms: index: gpr scale: 1 throughput: 2.0 - latency: 3.0 # 2*p89+2*pST + latency: 0 # 2*p89+2*pST port_pressure: [[2, '89'], [2, [ST]]] - name: vmovupd operands: @@ -448,7 +460,7 @@ instruction_forms: index: gpr scale: 1 throughput: 2.0 - latency: 3.0 # 2*p89+2*pST + latency: 0 # 2*p89+2*pST port_pressure: [[2, '89'], [2, [ST]]] - name: vmovupd operands: @@ -460,7 +472,7 @@ instruction_forms: index: gpr scale: 1 throughput: 2.0 - latency: 3.0 # 2*p89+2*pST + latency: 0 # 2*p89+2*pST port_pressure: [[2, '89'], [2, [ST]]] - name: vmovupd operands: @@ -502,7 +514,7 @@ instruction_forms: index: ~ scale: 1 throughput: 1.0 - latency: 4.0 # 1*p89+1*pST + latency: 0 # 1*p89+1*pST port_pressure: [[1, '89'], [1, [ST]]] - name: vmovsd operands: @@ -514,7 +526,7 @@ instruction_forms: index: ~ scale: 1 throughput: 1.0 - latency: 4.0 # 1*p89+1*pST + latency: 0 # 1*p89+1*pST port_pressure: [[1, '89'], [1, [ST]]] - name: vmovsd operands: @@ -526,7 +538,7 @@ instruction_forms: index: gpr scale: 1 throughput: 1.0 - latency: 4.0 # 1*p89+1*pST + latency: 0 # 1*p89+1*pST port_pressure: [[1, '89'], [1, [ST]]] - name: vmovsd operands: @@ -538,5 +550,5 @@ instruction_forms: index: gpr scale: 1 throughput: 1.0 - latency: 4.0 # 1*p89+1*pST + latency: 0 # 1*p89+1*pST port_pressure: [[1, '89'], [1, [ST]]] diff --git a/osaca/semantics/arch_semantics.py b/osaca/semantics/arch_semantics.py index 6e8737d..268baa8 100755 --- a/osaca/semantics/arch_semantics.py +++ b/osaca/semantics/arch_semantics.py @@ -176,6 +176,8 @@ class ArchSemantics(ISASemantics): operands.index(self._create_reg_wildcard()) ] ) + data_port_pressure = [0.0 for _ in range(port_number)] + data_port_uops = [] if INSTR_FLAGS.HAS_LD in instruction_form['flags']: # LOAD performance data data_port_uops = self._machine_model.get_load_throughput( @@ -196,17 +198,41 @@ class ArchSemantics(ISASemantics): data_port_pressure = [pp * multiplier for pp in data_port_pressure] if INSTR_FLAGS.HAS_ST in instruction_form['flags']: # STORE performance data - data_port_uops = self._machine_model.get_store_throughput( - [ - x['memory'] - for x in instruction_form['semantic_operands']['destination'] - + instruction_form['semantic_operands']['src_dst'] - if 'memory' in x - ][0] + destinations = ( + instruction_form['semantic_operands']['destination'] + + instruction_form['semantic_operands']['src_dst'] ) - data_port_pressure = self._machine_model.average_port_pressure( - data_port_uops + st_data_port_uops = self._machine_model.get_store_throughput( + [x['memory'] for x in destinations if 'memory' in x][0] ) + # zero data port pressure and remove HAS_ST flag if + # - no mem operand in dst && + # - all mem operands in src_dst are pre-/post-indexed + # since it is no mem store + if ( + self._isa == 'aarch64' + and 'memory' + not in instruction_form['semantic_operands']['destination'] + and all( + [ + 'post_indexed' in op['memory'] + or 'pre_indexed' in op['memory'] + for op in instruction_form['semantic_operands']['src_dst'] + if 'memory' in op + ] + ) + ): + st_data_port_uops = [] + instruction_form['flags'].remove(INSTR_FLAGS.HAS_ST) + + # sum up all data ports in case for LOAD and STORE + st_data_port_pressure = self._machine_model.average_port_pressure( + st_data_port_uops + ) + data_port_pressure = [ + sum(x) for x in zip(data_port_pressure, st_data_port_pressure) + ] + data_port_uops += st_data_port_uops throughput = max( max(data_port_pressure), instruction_data_reg['throughput'] )