implemented new CP calculation for x86

This commit is contained in:
JanLJL
2019-09-24 19:00:12 +02:00
parent 88de0fc3d9
commit 8dd5b2eb0f
6 changed files with 80 additions and 31 deletions

View File

@@ -6,6 +6,7 @@ ROB_size: 224
retired_uOps_per_cycle: 4
scheduler_size: 97
hidden_loads: false
load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0}
ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"]
port_model_scheme: |
┌------------------------------------------------------------------------┐
@@ -212,7 +213,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 0.5
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vaddpd
operands:
@@ -226,7 +227,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 0.5
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vaddpd
operands:
@@ -240,7 +241,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vaddsd
operands:
@@ -254,7 +255,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vaddsd
operands:
@@ -429,7 +430,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 0.5
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vmulpd
operands:
@@ -443,7 +444,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 0.5
latency: 8.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vmovapd
operands:
@@ -455,7 +456,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 3.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vmovapd
operands:
@@ -479,7 +480,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 0.5
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vmovapd
operands:
@@ -491,7 +492,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 0.5
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vmovapd
operands:
@@ -551,7 +552,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 0.5
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vmovupd
operands:
@@ -563,7 +564,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 0.5
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vmovsd
operands:
@@ -575,7 +576,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7
port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]
- name: vmovsd
operands:

View File

@@ -2,6 +2,7 @@ osaca_version: 0.3.0
micro_architecture: "AMD Zen (family 17h)"
arch_code: "ZEN1"
isa: "x86"
load_latency: {gpr: 4.0, xmm: 4.0, ymm: '5.0'}
hidden_loads: false
ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "9", "8D", "9D", "ST"]
port_model_scheme: |
@@ -138,7 +139,7 @@ instruction_forms:
- class: "register"
name: "gpr"
throughput: 0.5
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8LD 9LD ST
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8LD 9LD ST
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: mulsd
operands:
@@ -265,7 +266,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 7.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vaddsd
operands:
@@ -279,7 +280,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 7.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vaddsd
operands:
@@ -293,7 +294,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 7.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vaddsd
operands:
@@ -307,7 +308,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 7.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vaddss
operands:
@@ -460,7 +461,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 8.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vmulpd
operands:
@@ -474,7 +475,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 8.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vmulpd
operands:
@@ -499,7 +500,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 1.0
latency: 9.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
- name: vmulpd
operands:
@@ -513,7 +514,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 1.0
latency: 9.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
- name: vmovapd
operands:
@@ -525,7 +526,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vmovapd
operands:
@@ -549,7 +550,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 1.0
latency: 5.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
- name: vmovapd
operands:
@@ -561,7 +562,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 1.0
latency: 5.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]
- name: vmovapd
operands:
@@ -645,7 +646,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 2.0
latency: 5.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0]
- name: vmovupd
operands:
@@ -657,7 +658,7 @@ instruction_forms:
- class: "register"
name: "ymm"
throughput: 2.0
latency: 5.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0]
- name: vmovsd
operands:
@@ -669,7 +670,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vmovsd
operands:
@@ -681,7 +682,7 @@ instruction_forms:
- class: "register"
name: "xmm"
throughput: 0.5
latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST
port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0]
- name: vmovsd
operands:

View File

@@ -50,6 +50,9 @@ class BaseParser(object):
def is_vector_register(self, register):
raise NotImplementedError
def get_reg_type(self, register):
raise NotImplementedError
def construct_parser(self):
return
# raise NotImplementedError

View File

@@ -408,3 +408,6 @@ class ParserAArch64v81(BaseParser):
if reg_a['prefix'].lower() in prefixes_vec and reg_b['prefix'].lower() in prefixes_vec:
return True
return False
def get_reg_type(self, register):
return register['prefix']

View File

@@ -331,3 +331,10 @@ class ParserX86ATT(BaseParser):
if len(register['name']) > 2 and register['name'][1:3].lower() == 'mm':
return True
return False
def get_reg_type(self, register):
if self.is_gpr(register):
return 'gpr'
elif self.is_vector_register(register):
return register['name'][:3].lower()
raise ValueError

View File

@@ -16,6 +16,9 @@ class KernelDG(nx.DiGraph):
self.model = hw_model
self.dg = self.create_DG(self.kernel)
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel)
import pdb
pdb.set_trace()
def create_DG(self, kernel):
# 1. go through kernel instruction forms and add them as node attribute
@@ -25,7 +28,32 @@ class KernelDG(nx.DiGraph):
for i, instruction_form in enumerate(kernel):
dg.add_node(instruction_form['line_number'])
dg.nodes[instruction_form['line_number']]['instruction_form'] = instruction_form
for dep in self.find_depending(instruction_form, kernel[i + 1:]):
# add load as separate node if existent
if 'performs_load' in instruction_form['flags']:
regs = [
op for op in instruction_form['operands']['destination'] if 'register' in op
]
if (
len(regs) > 1
and len(set([self.parser.get_reg_type(x['register']) for x in regs])) != 1
):
load_lat = max(self.model['load_latency'].values())
else:
load_lat = self.model['load_latency'][
self.parser.get_reg_type(regs[0]['register'])
]
# add new node
dg.add_node(instruction_form['line_number'] + 0.1)
dg.nodes[instruction_form['line_number'] + 0.1][
'instruction_form'
] = instruction_form
# and set LD latency as edge weight
dg.add_edge(
instruction_form['line_number'] + 0.1,
instruction_form['line_number'],
latency=load_lat,
)
for dep in self.find_depending(instruction_form, kernel[i + 1 :]):
dg.add_edge(
instruction_form['line_number'],
dep['line_number'],
@@ -50,7 +78,7 @@ class KernelDG(nx.DiGraph):
loopcarried_deps = [
(node, list(nx.algorithms.simple_paths.all_simple_paths(dg, node, node * multiplier)))
for node in dg.nodes
if node < first_line_no * multiplier
if node < first_line_no * multiplier and node == int(node)
]
# filter others and create graph
loopcarried_deps = list(
@@ -71,7 +99,7 @@ class KernelDG(nx.DiGraph):
if set(dep[1]).issubset(set(other_dep[1])) and dep[0] in other_dep[1]:
is_subset = True
if not is_subset:
tmp_list.append(dep)
tmp_list.append(dep)
loopcarried_deps = tmp_list
for dep in loopcarried_deps:
nodes = [self._get_node_by_lineno(n) for n in dep[1]]
@@ -88,6 +116,12 @@ class KernelDG(nx.DiGraph):
def get_critical_path(self):
if nx.algorithms.dag.is_directed_acyclic_graph(self.dg):
longest_path = nx.algorithms.dag.dag_longest_path(self.dg, weight='latency')
# add LD latency to instruction
for line_number in longest_path:
if line_number != int(line_number) and int(line_number) in longest_path:
self._get_node_by_lineno(int(line_number))['latency'] += self.dg.edges[
(line_number, int(line_number))
]['latency']
return [x for x in self.kernel if x['line_number'] in longest_path]
else:
# split to DAG