mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-05 18:50:08 +01:00
fixed loop-carried dependency check and minor fixes in DBs
This commit is contained in:
@@ -5,6 +5,16 @@ isa: "x86"
|
|||||||
# mnemonic op1 ... opN
|
# mnemonic op1 ... opN
|
||||||
# means that opN is the only destination operand and op1 to op(N-1) are source operands.
|
# means that opN is the only destination operand and op1 to op(N-1) are source operands.
|
||||||
instruction_forms:
|
instruction_forms:
|
||||||
|
- name: addq
|
||||||
|
operands:
|
||||||
|
- class: "immediate"
|
||||||
|
imd: "int"
|
||||||
|
source: true
|
||||||
|
destination: false
|
||||||
|
- class: "register"
|
||||||
|
name: "gpr"
|
||||||
|
source: true
|
||||||
|
destination: true
|
||||||
- name: addsd
|
- name: addsd
|
||||||
operands:
|
operands:
|
||||||
- class: "register"
|
- class: "register"
|
||||||
@@ -83,6 +93,16 @@ instruction_forms:
|
|||||||
name: "xmm"
|
name: "xmm"
|
||||||
source: true
|
source: true
|
||||||
destination: false
|
destination: false
|
||||||
|
- name: subq
|
||||||
|
operands:
|
||||||
|
- class: "immediate"
|
||||||
|
imd: "int"
|
||||||
|
source: true
|
||||||
|
destination: false
|
||||||
|
- class: "register"
|
||||||
|
name: "gpr"
|
||||||
|
source: true
|
||||||
|
destination: true
|
||||||
- name: vfmadd132pd
|
- name: vfmadd132pd
|
||||||
operands:
|
operands:
|
||||||
- class: "memory"
|
- class: "memory"
|
||||||
@@ -151,7 +171,6 @@ instruction_forms:
|
|||||||
name: "ymm"
|
name: "ymm"
|
||||||
source: true
|
source: true
|
||||||
destination: true
|
destination: true
|
||||||
|
|
||||||
- name: vfmadd231pd
|
- name: vfmadd231pd
|
||||||
operands:
|
operands:
|
||||||
- class: "memory"
|
- class: "memory"
|
||||||
|
|||||||
@@ -2,6 +2,10 @@ osaca_version: 0.3.0
|
|||||||
micro_architecture: "Cavium Vulcan"
|
micro_architecture: "Cavium Vulcan"
|
||||||
arch_code: "Vulcan"
|
arch_code: "Vulcan"
|
||||||
isa: "AArch64"
|
isa: "AArch64"
|
||||||
|
ROB_size: 180
|
||||||
|
retired_uOps_per_cycle: 4
|
||||||
|
scheduler_size: 60
|
||||||
|
ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"]
|
||||||
port_model_scheme: |
|
port_model_scheme: |
|
||||||
┌-----------------------------------------------------------┐
|
┌-----------------------------------------------------------┐
|
||||||
| 60 entry unified scheduler |
|
| 60 entry unified scheduler |
|
||||||
@@ -23,7 +27,6 @@ port_model_scheme: |
|
|||||||
┌------┐
|
┌------┐
|
||||||
|CRYPTO|
|
|CRYPTO|
|
||||||
└------┘
|
└------┘
|
||||||
ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"]
|
|
||||||
instruction_forms:
|
instruction_forms:
|
||||||
- name: "add"
|
- name: "add"
|
||||||
operands:
|
operands:
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
osaca_version: 0.~
|
osaca_version: 0.3.0
|
||||||
micro_architecture: "AMD Zen (family 17h)"
|
micro_architecture: "AMD Zen (family 17h)"
|
||||||
|
arch_code: "ZEN1"
|
||||||
isa: "x86"
|
isa: "x86"
|
||||||
port_model_scheme: |
|
port_model_scheme: |
|
||||||
┌--------------------------------------┐ ┌-----------------------------------------------┐
|
┌--------------------------------------┐ ┌-----------------------------------------------┐
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ class Frontend(object):
|
|||||||
print('\n\n------------------------')
|
print('\n\n------------------------')
|
||||||
for instruction_form in cp_kernel:
|
for instruction_form in cp_kernel:
|
||||||
print(
|
print(
|
||||||
'{} {} {} {}{}{} {}'.format(
|
'{:4d} {} {:4.1f} {}{}{} {}'.format(
|
||||||
instruction_form['line_number'],
|
instruction_form['line_number'],
|
||||||
separator,
|
separator,
|
||||||
instruction_form['latency'],
|
instruction_form['latency'],
|
||||||
@@ -133,13 +133,28 @@ class Frontend(object):
|
|||||||
instruction_form['line'],
|
instruction_form['line'],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
print(
|
||||||
|
'\n{:4} {} {:4.1f}'.format(
|
||||||
|
' ' * max([len(str(instr_form['line_number'])) for instr_form in cp_kernel]),
|
||||||
|
' ' * len(separator),
|
||||||
|
sum([instr_form['latency'] for instr_form in cp_kernel]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def print_loopcarried_dependencies(self, dep_tuplelist, separator='|'):
|
def print_loopcarried_dependencies(self, dep_tuplelist, separator='|'):
|
||||||
print('\n\n------------------------')
|
print('\n\n------------------------')
|
||||||
for tup in dep_tuplelist:
|
for tup in dep_tuplelist:
|
||||||
print(
|
print(
|
||||||
'{}: {} {} {}'.format(
|
'{:4d} {} {:4.1f} {} {:36}{} {}'.format(
|
||||||
tup[0]['line_number'],
|
tup[0]['line_number'],
|
||||||
|
separator,
|
||||||
|
sum(
|
||||||
|
[
|
||||||
|
instr_form['latency'] if instr_form['latency'] is not None else 0
|
||||||
|
for instr_form in tup[1]
|
||||||
|
]
|
||||||
|
),
|
||||||
|
separator,
|
||||||
tup[0]['line'],
|
tup[0]['line'],
|
||||||
separator,
|
separator,
|
||||||
[node['line_number'] for node in tup[1]],
|
[node['line_number'] for node in tup[1]],
|
||||||
|
|||||||
@@ -3,10 +3,10 @@
|
|||||||
import copy
|
import copy
|
||||||
|
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
|
from itertools import chain, product
|
||||||
|
|
||||||
from osaca.parser import AttrDict
|
from osaca.parser import AttrDict
|
||||||
|
from osaca.semantics import MachineModel
|
||||||
from .hw_model import MachineModel
|
|
||||||
|
|
||||||
|
|
||||||
class KernelDG(nx.DiGraph):
|
class KernelDG(nx.DiGraph):
|
||||||
@@ -18,47 +18,37 @@ class KernelDG(nx.DiGraph):
|
|||||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel)
|
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel)
|
||||||
|
|
||||||
def check_for_loopcarried_dep(self, kernel):
|
def check_for_loopcarried_dep(self, kernel):
|
||||||
|
multiplier = len(kernel) + 1
|
||||||
# increase line number for second kernel loop
|
# increase line number for second kernel loop
|
||||||
kernel_length = len(kernel)
|
kernel_length = len(kernel)
|
||||||
first_line_no = kernel[0].line_number
|
first_line_no = kernel[0].line_number
|
||||||
kernel_copy = [AttrDict.convert_dict(d) for d in copy.deepcopy(kernel)]
|
kernel_copy = [AttrDict.convert_dict(d) for d in copy.deepcopy(kernel)]
|
||||||
tmp_kernel = kernel + kernel_copy
|
tmp_kernel = kernel + kernel_copy
|
||||||
for i, instruction_form in enumerate(tmp_kernel[kernel_length:]):
|
for i, instruction_form in enumerate(tmp_kernel[kernel_length:]):
|
||||||
tmp_kernel[i + kernel_length].line_number = instruction_form.line_number * 10
|
tmp_kernel[i + kernel_length].line_number = instruction_form.line_number * multiplier
|
||||||
# get dependency graph
|
# get dependency graph
|
||||||
dg = self.create_DG(tmp_kernel)
|
dg = self.create_DG(tmp_kernel)
|
||||||
descendants = [
|
|
||||||
(x, sorted([x for x in nx.algorithms.dag.descendants(dg, x)]))
|
|
||||||
for x in range(first_line_no, first_line_no + kernel_length)
|
|
||||||
if x in dg
|
|
||||||
]
|
|
||||||
loopcarried_deps = [
|
|
||||||
x for x in descendants if len(x[1]) > 0 and x[1][-1] >= first_line_no * 10
|
|
||||||
]
|
|
||||||
|
|
||||||
|
# build cyclic loop-carried dependencies
|
||||||
|
loopcarried_deps = [
|
||||||
|
(node, list(nx.algorithms.simple_paths.all_simple_paths(dg, node, node * multiplier)))
|
||||||
|
for node in dg.nodes
|
||||||
|
if node < first_line_no * multiplier
|
||||||
|
]
|
||||||
|
# filter others and create graph
|
||||||
|
loopcarried_deps = list(
|
||||||
|
chain.from_iterable(
|
||||||
|
[list(product([dep_chain[0]], dep_chain[1])) for dep_chain in loopcarried_deps]
|
||||||
|
)
|
||||||
|
)
|
||||||
# adjust line numbers
|
# adjust line numbers
|
||||||
# and add reference to kernel again
|
# and add reference to kernel again
|
||||||
for i, dep in enumerate(loopcarried_deps):
|
for i, dep in enumerate(loopcarried_deps):
|
||||||
nodes = [int(n / 10) for n in dep[1] if n >= first_line_no * 10]
|
nodes = [int(n / multiplier) for n in dep[1] if n >= first_line_no * multiplier]
|
||||||
nodes = [self._get_node_by_lineno(x) for x in nodes]
|
nodes = [self._get_node_by_lineno(x) for x in nodes]
|
||||||
loopcarried_deps[i] = (self._get_node_by_lineno(dep[0]), nodes)
|
loopcarried_deps[i] = (self._get_node_by_lineno(dep[0]), nodes)
|
||||||
# check if dependency is cyclic
|
|
||||||
cyclic_lc_deps = []
|
return loopcarried_deps
|
||||||
for dep in loopcarried_deps:
|
|
||||||
write_back = list(
|
|
||||||
self.find_depending(
|
|
||||||
dep[0],
|
|
||||||
tmp_kernel[dep[0].line_number - first_line_no + 1:],
|
|
||||||
include_write=True,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if (
|
|
||||||
write_back is not None
|
|
||||||
and len(write_back) > 0
|
|
||||||
and int(write_back[-1].line_number / 10) == dep[0].line_number
|
|
||||||
):
|
|
||||||
cyclic_lc_deps.append(dep)
|
|
||||||
return cyclic_lc_deps
|
|
||||||
|
|
||||||
def _get_node_by_lineno(self, lineno):
|
def _get_node_by_lineno(self, lineno):
|
||||||
return [instr for instr in self.kernel if instr.line_number == lineno][0]
|
return [instr for instr in self.kernel if instr.line_number == lineno][0]
|
||||||
@@ -70,6 +60,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
# 4. add instr forms as node attribute
|
# 4. add instr forms as node attribute
|
||||||
dg = nx.DiGraph()
|
dg = nx.DiGraph()
|
||||||
for i, instruction_form in enumerate(kernel):
|
for i, instruction_form in enumerate(kernel):
|
||||||
|
dg.add_node(instruction_form['line_number'])
|
||||||
for dep in self.find_depending(instruction_form, kernel[i + 1:]):
|
for dep in self.find_depending(instruction_form, kernel[i + 1:]):
|
||||||
dg.add_edge(
|
dg.add_edge(
|
||||||
instruction_form['line_number'],
|
instruction_form['line_number'],
|
||||||
|
|||||||
Reference in New Issue
Block a user