mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-04 18:20:09 +01:00
implemented new CP calculation for x86
This commit is contained in:
@@ -6,6 +6,7 @@ ROB_size: 224
|
||||
retired_uOps_per_cycle: 4
|
||||
scheduler_size: 97
|
||||
hidden_loads: false
|
||||
load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0}
|
||||
ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"]
|
||||
port_model_scheme: |
|
||||
┌------------------------------------------------------------------------┐
|
||||
@@ -212,7 +213,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vaddpd
|
||||
operands:
|
||||
@@ -226,7 +227,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vaddpd
|
||||
operands:
|
||||
@@ -240,7 +241,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vaddsd
|
||||
operands:
|
||||
@@ -254,7 +255,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vaddsd
|
||||
operands:
|
||||
@@ -429,7 +430,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
@@ -443,7 +444,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
@@ -455,7 +456,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
@@ -479,7 +480,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
@@ -491,7 +492,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
@@ -551,7 +552,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
@@ -563,7 +564,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
@@ -575,7 +576,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
|
||||
@@ -2,6 +2,7 @@ osaca_version: 0.3.0
|
||||
micro_architecture: "AMD Zen (family 17h)"
|
||||
arch_code: "ZEN1"
|
||||
isa: "x86"
|
||||
load_latency: {gpr: 4.0, xmm: 4.0, ymm: '5.0'}
|
||||
hidden_loads: false
|
||||
ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "9", "8D", "9D", "ST"]
|
||||
port_model_scheme: |
|
||||
@@ -138,7 +139,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8LD 9LD ST
|
||||
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8LD 9LD ST
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: mulsd
|
||||
operands:
|
||||
@@ -265,7 +266,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 7.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vaddsd
|
||||
operands:
|
||||
@@ -279,7 +280,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 7.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vaddsd
|
||||
operands:
|
||||
@@ -293,7 +294,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 7.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vaddsd
|
||||
operands:
|
||||
@@ -307,7 +308,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 7.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vaddss
|
||||
operands:
|
||||
@@ -460,7 +461,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
@@ -474,7 +475,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 8.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
@@ -499,7 +500,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 1.0
|
||||
latency: 9.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
|
||||
- name: vmulpd
|
||||
operands:
|
||||
@@ -513,7 +514,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 1.0
|
||||
latency: 9.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
@@ -525,7 +526,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
@@ -549,7 +550,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
@@ -561,7 +562,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 1.0
|
||||
latency: 5.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
|
||||
- name: vmovapd
|
||||
operands:
|
||||
@@ -645,7 +646,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 2.0
|
||||
latency: 5.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0]
|
||||
- name: vmovupd
|
||||
operands:
|
||||
@@ -657,7 +658,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "ymm"
|
||||
throughput: 2.0
|
||||
latency: 5.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
@@ -669,7 +670,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
@@ -681,7 +682,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
throughput: 0.5
|
||||
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
|
||||
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
|
||||
- name: vmovsd
|
||||
operands:
|
||||
|
||||
@@ -50,6 +50,9 @@ class BaseParser(object):
|
||||
def is_vector_register(self, register):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_reg_type(self, register):
|
||||
raise NotImplementedError
|
||||
|
||||
def construct_parser(self):
|
||||
return
|
||||
# raise NotImplementedError
|
||||
|
||||
@@ -408,3 +408,6 @@ class ParserAArch64v81(BaseParser):
|
||||
if reg_a['prefix'].lower() in prefixes_vec and reg_b['prefix'].lower() in prefixes_vec:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_reg_type(self, register):
|
||||
return register['prefix']
|
||||
|
||||
@@ -331,3 +331,10 @@ class ParserX86ATT(BaseParser):
|
||||
if len(register['name']) > 2 and register['name'][1:3].lower() == 'mm':
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_reg_type(self, register):
|
||||
if self.is_gpr(register):
|
||||
return 'gpr'
|
||||
elif self.is_vector_register(register):
|
||||
return register['name'][:3].lower()
|
||||
raise ValueError
|
||||
|
||||
@@ -16,6 +16,9 @@ class KernelDG(nx.DiGraph):
|
||||
self.model = hw_model
|
||||
self.dg = self.create_DG(self.kernel)
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel)
|
||||
import pdb
|
||||
|
||||
pdb.set_trace()
|
||||
|
||||
def create_DG(self, kernel):
|
||||
# 1. go through kernel instruction forms and add them as node attribute
|
||||
@@ -25,7 +28,32 @@ class KernelDG(nx.DiGraph):
|
||||
for i, instruction_form in enumerate(kernel):
|
||||
dg.add_node(instruction_form['line_number'])
|
||||
dg.nodes[instruction_form['line_number']]['instruction_form'] = instruction_form
|
||||
for dep in self.find_depending(instruction_form, kernel[i + 1:]):
|
||||
# add load as separate node if existent
|
||||
if 'performs_load' in instruction_form['flags']:
|
||||
regs = [
|
||||
op for op in instruction_form['operands']['destination'] if 'register' in op
|
||||
]
|
||||
if (
|
||||
len(regs) > 1
|
||||
and len(set([self.parser.get_reg_type(x['register']) for x in regs])) != 1
|
||||
):
|
||||
load_lat = max(self.model['load_latency'].values())
|
||||
else:
|
||||
load_lat = self.model['load_latency'][
|
||||
self.parser.get_reg_type(regs[0]['register'])
|
||||
]
|
||||
# add new node
|
||||
dg.add_node(instruction_form['line_number'] + 0.1)
|
||||
dg.nodes[instruction_form['line_number'] + 0.1][
|
||||
'instruction_form'
|
||||
] = instruction_form
|
||||
# and set LD latency as edge weight
|
||||
dg.add_edge(
|
||||
instruction_form['line_number'] + 0.1,
|
||||
instruction_form['line_number'],
|
||||
latency=load_lat,
|
||||
)
|
||||
for dep in self.find_depending(instruction_form, kernel[i + 1 :]):
|
||||
dg.add_edge(
|
||||
instruction_form['line_number'],
|
||||
dep['line_number'],
|
||||
@@ -50,7 +78,7 @@ class KernelDG(nx.DiGraph):
|
||||
loopcarried_deps = [
|
||||
(node, list(nx.algorithms.simple_paths.all_simple_paths(dg, node, node * multiplier)))
|
||||
for node in dg.nodes
|
||||
if node < first_line_no * multiplier
|
||||
if node < first_line_no * multiplier and node == int(node)
|
||||
]
|
||||
# filter others and create graph
|
||||
loopcarried_deps = list(
|
||||
@@ -71,7 +99,7 @@ class KernelDG(nx.DiGraph):
|
||||
if set(dep[1]).issubset(set(other_dep[1])) and dep[0] in other_dep[1]:
|
||||
is_subset = True
|
||||
if not is_subset:
|
||||
tmp_list.append(dep)
|
||||
tmp_list.append(dep)
|
||||
loopcarried_deps = tmp_list
|
||||
for dep in loopcarried_deps:
|
||||
nodes = [self._get_node_by_lineno(n) for n in dep[1]]
|
||||
@@ -88,6 +116,12 @@ class KernelDG(nx.DiGraph):
|
||||
def get_critical_path(self):
|
||||
if nx.algorithms.dag.is_directed_acyclic_graph(self.dg):
|
||||
longest_path = nx.algorithms.dag.dag_longest_path(self.dg, weight='latency')
|
||||
# add LD latency to instruction
|
||||
for line_number in longest_path:
|
||||
if line_number != int(line_number) and int(line_number) in longest_path:
|
||||
self._get_node_by_lineno(int(line_number))['latency'] += self.dg.edges[
|
||||
(line_number, int(line_number))
|
||||
]['latency']
|
||||
return [x for x in self.kernel if x['line_number'] in longest_path]
|
||||
else:
|
||||
# split to DAG
|
||||
|
||||
Reference in New Issue
Block a user