enhanced for dynamic ST throughput combination

This commit is contained in:
JanLJL
2020-01-14 10:49:47 +01:00
parent 917146a7df
commit a3cc742a87
10 changed files with 98 additions and 35 deletions

View File

@@ -20,6 +20,10 @@ load_throughput:
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
store_throughput:
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '237'], [1, '4']]}
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
store_throughput_default: [[1, '23'], [1, '4']]
ports: ['0', 0DV, '1', '2', 2D, '3', 3D, '4', '5', '6', '7']
port_model_scheme: |
┌------------------------------------------------------------------------┐

View File

@@ -8,19 +8,21 @@ scheduler_size: 97
hidden_loads: false
load_latency: {gpr: 4.0, mm: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0}
load_throughput:
- {base: gpr, offset: ~, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: ~, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: ~, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: imd, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: ~, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: ~, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: ~, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: ~, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: ~, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: gpr, offset: imd, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: ~, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: ~, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: ~, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
store_throughput: []
store_throughput:
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '237'], [1, '4']]}
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
store_throughput_default: [[1, '23'], [1, '4']]
ports: ['0', 0DV, '1', '2', 2D, '3', 3D, '4', '5', '6', '7']
port_model_scheme: |

View File

@@ -20,6 +20,10 @@ load_throughput:
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
store_throughput:
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '237'], [1, '4']]}
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
store_throughput_default: [[1, '23'], [1, '4']]
ports: ['0', 0DV, '1', '2', '2D', '3', '3D', '4', '5', '6', '7']
port_model_scheme: |
┌------------------------------------------------------------------------┐

View File

@@ -20,6 +20,10 @@ load_throughput:
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
store_throughput:
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
store_throughput_default: [[1, '23'], [1, '4']]
ports: ['0', '0DV', '1', '2', '2D', '3', '3D', '4', '5']
port_model_scheme: |
┌-----------------------------------------------------┐

View File

@@ -20,6 +20,10 @@ load_throughput:
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
store_throughput:
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '237'], [1, '4']]}
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
store_throughput_default: [[1, '23'], [1, '4']]
ports: ['0', 0DV, '1', '2', 2D, '3', 3D, '4', '5', '6', '7']
port_model_scheme: |
┌------------------------------------------------------------------------┐

View File

@@ -20,6 +20,10 @@ load_throughput:
- {base: ~, offset: imd, index: gpr, scale: 1, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
- {base: ~, offset: imd, index: gpr, scale: 8, port_pressure: [[1, '23'], [1, ['2D', '3D']]]}
load_throughput_default: [[1, '23'], [1, ['2D', '3D']]]
store_throughput:
- {base: '*', offset: '*', index: ~, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
- {base: '*', offset: '*', index: gpr, scale: '*', port_pressure: [[1, '23'], [1, '4']]}
store_throughput_default: [[1, '23'], [1, '4']]
ports: ['0', '0DV', '1', '2', '2D', '3', '3D', '4', '5']
port_model_scheme: |
┌-----------------------------------------------------┐

View File

@@ -41,6 +41,8 @@ load_throughput:
- {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [[1, '34']]}
- {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [[1, '34']]}
load_throughput_default: [[1, '34']]
store_throughput: []
store_throughput_default: [[1, '34'], [1, '5']]
ports: ['0', 0DV, '1', 1DV, '2', '3', '4', '5']
port_model_scheme: |
┌-----------------------------------------------------------┐

View File

@@ -14,6 +14,8 @@ load_throughput:
- {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
- {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [[1, '89'], [1, ['8D','9D']]]}
load_throughput_default: [[1, '89'], [1, ['8D', '9D']]]
store_throughput: []
store_throughput_default: [[1, '89'], [1,[ST]]]
hidden_loads: false
ports: ['0', '1', '2', '3', 3DV, '4', '5', '6', '7', '8', '9', 8D, 9D, ST]
port_model_scheme: |

View File

@@ -150,8 +150,12 @@ class ArchSemantics(ISASemantics):
# instruction could not be found in DB
assign_unknown = True
# check for equivalent register-operands DB entry if LD
if INSTR_FLAGS.HAS_LD in instruction_form['flags']:
# --> combine LD and reg form of instruction form
if (
INSTR_FLAGS.HAS_LD in instruction_form['flags']
or INSTR_FLAGS.HAS_ST in instruction_form['flags']
):
# dynamically combine LD/ST and reg form of instruction form
# substitute mem and look for reg-only variant
operands = self.substitute_mem_address(instruction_form['operands'])
instruction_data_reg = self._machine_model.get_instruction(
instruction_form['instruction'], operands
@@ -172,41 +176,64 @@ class ArchSemantics(ISASemantics):
operands.index(self._create_reg_wildcard())
]
)
load_port_uops = self._machine_model.get_load_throughput(
[
x['memory']
for x in instruction_form['semantic_operands']['source']
+ instruction_form['semantic_operands']['src_dst']
if 'memory' in x
][0]
)
load_port_pressure = self._machine_model.average_port_pressure(
load_port_uops
)
if 'load_throughput_multiplier' in self._machine_model:
multiplier = self._machine_model['load_throughput_multiplier'][
reg_type
]
load_port_pressure = [pp * multiplier for pp in load_port_pressure]
if INSTR_FLAGS.HAS_LD in instruction_form['flags']:
# LOAD performance data
data_port_uops = self._machine_model.get_load_throughput(
[
x['memory']
for x in instruction_form['semantic_operands']['source']
+ instruction_form['semantic_operands']['src_dst']
if 'memory' in x
][0]
)
data_port_pressure = self._machine_model.average_port_pressure(
data_port_uops
)
if 'load_throughput_multiplier' in self._machine_model:
multiplier = self._machine_model['load_throughput_multiplier'][
reg_type
]
data_port_pressure = [pp * multiplier for pp in data_port_pressure]
if INSTR_FLAGS.HAS_ST in instruction_form['flags']:
# STORE performance data
data_port_uops = self._machine_model.get_store_throughput(
[
x['memory']
for x in instruction_form['semantic_operands']['destination']
+ instruction_form['semantic_operands']['src_dst']
if 'memory' in x
][0]
)
data_port_pressure = self._machine_model.average_port_pressure(
data_port_uops
)
throughput = max(
max(load_port_pressure), instruction_data_reg['throughput']
max(data_port_pressure), instruction_data_reg['throughput']
)
latency = (
latency = instruction_data_reg['latency']
# Add LD and ST latency
latency += (
self._machine_model.get_load_latency(reg_type)
+ instruction_data_reg['latency']
if INSTR_FLAGS.HAS_LD in instruction_form['flags']
else 0
)
latency += (
self._machine_model.get_store_latency(reg_type)
if INSTR_FLAGS.HAS_ST in instruction_form['flags']
else 0
)
latency_wo_load = instruction_data_reg['latency']
instruction_form['port_pressure'] = [
sum(x)
for x in zip(
load_port_pressure,
data_port_pressure,
self._machine_model.average_port_pressure(
instruction_data_reg['port_pressure']
),
)
]
instruction_form['port_uops'] = list(
chain(instruction_data_reg['port_pressure'], load_port_uops)
chain(instruction_data_reg['port_pressure'], data_port_uops)
)
if assign_unknown:

View File

@@ -160,6 +160,16 @@ class MachineModel(object):
return ld_tp[0]['port_pressure']
return self._data['load_throughput_default']
def get_store_latency(self, reg_type):
# assume 0 for now, since load-store-dependencies currently not detectable
return 0
def get_store_throughput(self, memory):
st_tp = [m for m in self._data['store_throughput'] if self._match_mem_entries(memory, m)]
if len(st_tp) > 0:
return st_tp[0]['port_pressure']
return self._data['store_throughput_default']
def _match_mem_entries(self, mem, i_mem):
if self._data['isa'].lower() == 'aarch64':
return self._is_AArch64_mem_type(i_mem, mem)