mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-08 04:00:05 +01:00
enhanced for dynamic ST throughput combination
This commit is contained in:
@@ -20,6 +20,10 @@ load_throughput:
|
||||
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
|
||||
store_throughput:
|
||||
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '237'], [1, '4']]}
|
||||
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
|
||||
store_throughput_default: [[1, '23'], [1, '4']]
|
||||
ports: ['0', 0DV, '1', '2', 2D, '3', 3D, '4', '5', '6', '7']
|
||||
port_model_scheme: |
|
||||
┌------------------------------------------------------------------------┐
|
||||
|
||||
@@ -8,19 +8,21 @@ scheduler_size: 97
|
||||
hidden_loads: false
|
||||
load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0}
|
||||
load_throughput:
|
||||
- {base: gpr, offset: ~, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: ~, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: ~, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: imd, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: ~, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: ~, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: ~, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: ~, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: ~, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: gpr, offset: imd, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: ~, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: ~, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
|
||||
store_throughput: []
|
||||
store_throughput:
|
||||
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '237'], [1, '4']]}
|
||||
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
|
||||
store_throughput_default: [[1, '23'], [1, '4']]
|
||||
ports: ['0', 0DV, '1', '2', 2D, '3', 3D, '4', '5', '6', '7']
|
||||
port_model_scheme: |
|
||||
|
||||
@@ -20,6 +20,10 @@ load_throughput:
|
||||
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
|
||||
store_throughput:
|
||||
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '237'], [1, '4']]}
|
||||
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
|
||||
store_throughput_default: [[1, '23'], [1, '4']]
|
||||
ports: ['0', 0DV, '1', '2', '2D', '3', '3D', '4', '5', '6', '7']
|
||||
port_model_scheme: |
|
||||
┌------------------------------------------------------------------------┐
|
||||
|
||||
@@ -20,6 +20,10 @@ load_throughput:
|
||||
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
|
||||
store_throughput:
|
||||
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
|
||||
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
|
||||
store_throughput_default: [[1, '23'], [1, '4']]
|
||||
ports: ['0', '0DV', '1', '2', '2D', '3', '3D', '4', '5']
|
||||
port_model_scheme: |
|
||||
┌-----------------------------------------------------┐
|
||||
|
||||
@@ -20,6 +20,10 @@ load_throughput:
|
||||
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
|
||||
store_throughput:
|
||||
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '237'], [1, '4']]}
|
||||
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
|
||||
store_throughput_default: [[1, '23'], [1, '4']]
|
||||
ports: ['0', 0DV, '1', '2', 2D, '3', 3D, '4', '5', '6', '7']
|
||||
port_model_scheme: |
|
||||
┌------------------------------------------------------------------------┐
|
||||
|
||||
@@ -20,6 +20,10 @@ load_throughput:
|
||||
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
|
||||
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
|
||||
store_throughput:
|
||||
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
|
||||
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
|
||||
store_throughput_default: [[1, '23'], [1, '4']]
|
||||
ports: ['0', '0DV', '1', '2', '2D', '3', '3D', '4', '5']
|
||||
port_model_scheme: |
|
||||
┌-----------------------------------------------------┐
|
||||
|
||||
@@ -41,6 +41,8 @@ load_throughput:
|
||||
- {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [[1, '34']]}
|
||||
- {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [[1, '34']]}
|
||||
load_throughput_default: [[1, '34']]
|
||||
store_throughput: []
|
||||
store_throughput_default: [[1, '34'], [1, '5']]
|
||||
ports: ['0', 0DV, '1', 1DV, '2', '3', '4', '5']
|
||||
port_model_scheme: |
|
||||
┌-----------------------------------------------------------┐
|
||||
|
||||
@@ -14,6 +14,8 @@ load_throughput:
|
||||
- {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
- {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
|
||||
load_throughput_default: [[1, '89'], [1, ['8D', '9D']]]
|
||||
store_throughput: []
|
||||
store_throughput_default: [[1, '89'], [1,[ST]]]
|
||||
hidden_loads: false
|
||||
ports: ['0', '1', '2', '3', 3DV, '4', '5', '6', '7', '8', '9', 8D, 9D, ST]
|
||||
port_model_scheme: |
|
||||
|
||||
@@ -150,8 +150,12 @@ class ArchSemantics(ISASemantics):
|
||||
# instruction could not be found in DB
|
||||
assign_unknown = True
|
||||
# check for equivalent register-operands DB entry if LD
|
||||
if INSTR_FLAGS.HAS_LD in instruction_form['flags']:
|
||||
# --> combine LD and reg form of instruction form
|
||||
if (
|
||||
INSTR_FLAGS.HAS_LD in instruction_form['flags']
|
||||
or INSTR_FLAGS.HAS_ST in instruction_form['flags']
|
||||
):
|
||||
# dynamically combine LD/ST and reg form of instruction form
|
||||
# substitute mem and look for reg-only variant
|
||||
operands = self.substitute_mem_address(instruction_form['operands'])
|
||||
instruction_data_reg = self._machine_model.get_instruction(
|
||||
instruction_form['instruction'], operands
|
||||
@@ -172,41 +176,64 @@ class ArchSemantics(ISASemantics):
|
||||
operands.index(self._create_reg_wildcard())
|
||||
]
|
||||
)
|
||||
load_port_uops = self._machine_model.get_load_throughput(
|
||||
[
|
||||
x['memory']
|
||||
for x in instruction_form['semantic_operands']['source']
|
||||
+ instruction_form['semantic_operands']['src_dst']
|
||||
if 'memory' in x
|
||||
][0]
|
||||
)
|
||||
load_port_pressure = self._machine_model.average_port_pressure(
|
||||
load_port_uops
|
||||
)
|
||||
if 'load_throughput_multiplier' in self._machine_model:
|
||||
multiplier = self._machine_model['load_throughput_multiplier'][
|
||||
reg_type
|
||||
]
|
||||
load_port_pressure = [pp * multiplier for pp in load_port_pressure]
|
||||
if INSTR_FLAGS.HAS_LD in instruction_form['flags']:
|
||||
# LOAD performance data
|
||||
data_port_uops = self._machine_model.get_load_throughput(
|
||||
[
|
||||
x['memory']
|
||||
for x in instruction_form['semantic_operands']['source']
|
||||
+ instruction_form['semantic_operands']['src_dst']
|
||||
if 'memory' in x
|
||||
][0]
|
||||
)
|
||||
data_port_pressure = self._machine_model.average_port_pressure(
|
||||
data_port_uops
|
||||
)
|
||||
if 'load_throughput_multiplier' in self._machine_model:
|
||||
multiplier = self._machine_model['load_throughput_multiplier'][
|
||||
reg_type
|
||||
]
|
||||
data_port_pressure = [pp * multiplier for pp in data_port_pressure]
|
||||
if INSTR_FLAGS.HAS_ST in instruction_form['flags']:
|
||||
# STORE performance data
|
||||
data_port_uops = self._machine_model.get_store_throughput(
|
||||
[
|
||||
x['memory']
|
||||
for x in instruction_form['semantic_operands']['destination']
|
||||
+ instruction_form['semantic_operands']['src_dst']
|
||||
if 'memory' in x
|
||||
][0]
|
||||
)
|
||||
data_port_pressure = self._machine_model.average_port_pressure(
|
||||
data_port_uops
|
||||
)
|
||||
throughput = max(
|
||||
max(load_port_pressure), instruction_data_reg['throughput']
|
||||
max(data_port_pressure), instruction_data_reg['throughput']
|
||||
)
|
||||
latency = (
|
||||
latency = instruction_data_reg['latency']
|
||||
# Add LD and ST latency
|
||||
latency += (
|
||||
self._machine_model.get_load_latency(reg_type)
|
||||
+ instruction_data_reg['latency']
|
||||
if INSTR_FLAGS.HAS_LD in instruction_form['flags']
|
||||
else 0
|
||||
)
|
||||
latency += (
|
||||
self._machine_model.get_store_latency(reg_type)
|
||||
if INSTR_FLAGS.HAS_ST in instruction_form['flags']
|
||||
else 0
|
||||
)
|
||||
latency_wo_load = instruction_data_reg['latency']
|
||||
instruction_form['port_pressure'] = [
|
||||
sum(x)
|
||||
for x in zip(
|
||||
load_port_pressure,
|
||||
data_port_pressure,
|
||||
self._machine_model.average_port_pressure(
|
||||
instruction_data_reg['port_pressure']
|
||||
),
|
||||
)
|
||||
]
|
||||
instruction_form['port_uops'] = list(
|
||||
chain(instruction_data_reg['port_pressure'], load_port_uops)
|
||||
chain(instruction_data_reg['port_pressure'], data_port_uops)
|
||||
)
|
||||
|
||||
if assign_unknown:
|
||||
|
||||
@@ -160,6 +160,16 @@ class MachineModel(object):
|
||||
return ld_tp[0]['port_pressure']
|
||||
return self._data['load_throughput_default']
|
||||
|
||||
def get_store_latency(self, reg_type):
|
||||
# assume 0 for now, since load-store-dependencies currently not detectable
|
||||
return 0
|
||||
|
||||
def get_store_throughput(self, memory):
|
||||
st_tp = [m for m in self._data['store_throughput'] if self._match_mem_entries(memory, m)]
|
||||
if len(st_tp) > 0:
|
||||
return st_tp[0]['port_pressure']
|
||||
return self._data['store_throughput_default']
|
||||
|
||||
def _match_mem_entries(self, mem, i_mem):
|
||||
if self._data['isa'].lower() == 'aarch64':
|
||||
return self._is_AArch64_mem_type(i_mem, mem)
|
||||
|
||||
Reference in New Issue
Block a user