mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-09 04:30:05 +01:00
fixed loop-carried dependency check and minor fixes in DBs
This commit is contained in:
@@ -5,6 +5,16 @@ isa: "x86"
|
||||
# mnemonic op1 ... opN
|
||||
# means that opN is the only destination operand and op1 to op(N-1) are source operands.
|
||||
instruction_forms:
|
||||
- name: addq
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: addsd
|
||||
operands:
|
||||
- class: "register"
|
||||
@@ -83,6 +93,16 @@ instruction_forms:
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- name: subq
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
- name: vfmadd132pd
|
||||
operands:
|
||||
- class: "memory"
|
||||
@@ -151,7 +171,6 @@ instruction_forms:
|
||||
name: "ymm"
|
||||
source: true
|
||||
destination: true
|
||||
|
||||
- name: vfmadd231pd
|
||||
operands:
|
||||
- class: "memory"
|
||||
|
||||
@@ -2,6 +2,10 @@ osaca_version: 0.3.0
|
||||
micro_architecture: "Cavium Vulcan"
|
||||
arch_code: "Vulcan"
|
||||
isa: "AArch64"
|
||||
ROB_size: 180
|
||||
retired_uOps_per_cycle: 4
|
||||
scheduler_size: 60
|
||||
ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"]
|
||||
port_model_scheme: |
|
||||
┌-----------------------------------------------------------┐
|
||||
| 60 entry unified scheduler |
|
||||
@@ -23,7 +27,6 @@ port_model_scheme: |
|
||||
┌------┐
|
||||
|CRYPTO|
|
||||
└------┘
|
||||
ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"]
|
||||
instruction_forms:
|
||||
- name: "add"
|
||||
operands:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
osaca_version: 0.~
|
||||
osaca_version: 0.3.0
|
||||
micro_architecture: "AMD Zen (family 17h)"
|
||||
arch_code: "ZEN1"
|
||||
isa: "x86"
|
||||
port_model_scheme: |
|
||||
┌--------------------------------------┐ ┌-----------------------------------------------┐
|
||||
|
||||
@@ -123,7 +123,7 @@ class Frontend(object):
|
||||
print('\n\n------------------------')
|
||||
for instruction_form in cp_kernel:
|
||||
print(
|
||||
'{} {} {} {}{}{} {}'.format(
|
||||
'{:4d} {} {:4.1f} {}{}{} {}'.format(
|
||||
instruction_form['line_number'],
|
||||
separator,
|
||||
instruction_form['latency'],
|
||||
@@ -133,13 +133,28 @@ class Frontend(object):
|
||||
instruction_form['line'],
|
||||
)
|
||||
)
|
||||
print(
|
||||
'\n{:4} {} {:4.1f}'.format(
|
||||
' ' * max([len(str(instr_form['line_number'])) for instr_form in cp_kernel]),
|
||||
' ' * len(separator),
|
||||
sum([instr_form['latency'] for instr_form in cp_kernel]),
|
||||
)
|
||||
)
|
||||
|
||||
def print_loopcarried_dependencies(self, dep_tuplelist, separator='|'):
|
||||
print('\n\n------------------------')
|
||||
for tup in dep_tuplelist:
|
||||
print(
|
||||
'{}: {} {} {}'.format(
|
||||
'{:4d} {} {:4.1f} {} {:36}{} {}'.format(
|
||||
tup[0]['line_number'],
|
||||
separator,
|
||||
sum(
|
||||
[
|
||||
instr_form['latency'] if instr_form['latency'] is not None else 0
|
||||
for instr_form in tup[1]
|
||||
]
|
||||
),
|
||||
separator,
|
||||
tup[0]['line'],
|
||||
separator,
|
||||
[node['line_number'] for node in tup[1]],
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
import copy
|
||||
|
||||
import networkx as nx
|
||||
from itertools import chain, product
|
||||
|
||||
from osaca.parser import AttrDict
|
||||
|
||||
from .hw_model import MachineModel
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
class KernelDG(nx.DiGraph):
|
||||
@@ -18,47 +18,37 @@ class KernelDG(nx.DiGraph):
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel)
|
||||
|
||||
def check_for_loopcarried_dep(self, kernel):
|
||||
multiplier = len(kernel) + 1
|
||||
# increase line number for second kernel loop
|
||||
kernel_length = len(kernel)
|
||||
first_line_no = kernel[0].line_number
|
||||
kernel_copy = [AttrDict.convert_dict(d) for d in copy.deepcopy(kernel)]
|
||||
tmp_kernel = kernel + kernel_copy
|
||||
for i, instruction_form in enumerate(tmp_kernel[kernel_length:]):
|
||||
tmp_kernel[i + kernel_length].line_number = instruction_form.line_number * 10
|
||||
tmp_kernel[i + kernel_length].line_number = instruction_form.line_number * multiplier
|
||||
# get dependency graph
|
||||
dg = self.create_DG(tmp_kernel)
|
||||
descendants = [
|
||||
(x, sorted([x for x in nx.algorithms.dag.descendants(dg, x)]))
|
||||
for x in range(first_line_no, first_line_no + kernel_length)
|
||||
if x in dg
|
||||
]
|
||||
loopcarried_deps = [
|
||||
x for x in descendants if len(x[1]) > 0 and x[1][-1] >= first_line_no * 10
|
||||
]
|
||||
|
||||
# build cyclic loop-carried dependencies
|
||||
loopcarried_deps = [
|
||||
(node, list(nx.algorithms.simple_paths.all_simple_paths(dg, node, node * multiplier)))
|
||||
for node in dg.nodes
|
||||
if node < first_line_no * multiplier
|
||||
]
|
||||
# filter others and create graph
|
||||
loopcarried_deps = list(
|
||||
chain.from_iterable(
|
||||
[list(product([dep_chain[0]], dep_chain[1])) for dep_chain in loopcarried_deps]
|
||||
)
|
||||
)
|
||||
# adjust line numbers
|
||||
# and add reference to kernel again
|
||||
for i, dep in enumerate(loopcarried_deps):
|
||||
nodes = [int(n / 10) for n in dep[1] if n >= first_line_no * 10]
|
||||
nodes = [int(n / multiplier) for n in dep[1] if n >= first_line_no * multiplier]
|
||||
nodes = [self._get_node_by_lineno(x) for x in nodes]
|
||||
loopcarried_deps[i] = (self._get_node_by_lineno(dep[0]), nodes)
|
||||
# check if dependency is cyclic
|
||||
cyclic_lc_deps = []
|
||||
for dep in loopcarried_deps:
|
||||
write_back = list(
|
||||
self.find_depending(
|
||||
dep[0],
|
||||
tmp_kernel[dep[0].line_number - first_line_no + 1:],
|
||||
include_write=True,
|
||||
)
|
||||
)
|
||||
if (
|
||||
write_back is not None
|
||||
and len(write_back) > 0
|
||||
and int(write_back[-1].line_number / 10) == dep[0].line_number
|
||||
):
|
||||
cyclic_lc_deps.append(dep)
|
||||
return cyclic_lc_deps
|
||||
|
||||
return loopcarried_deps
|
||||
|
||||
def _get_node_by_lineno(self, lineno):
|
||||
return [instr for instr in self.kernel if instr.line_number == lineno][0]
|
||||
@@ -70,6 +60,7 @@ class KernelDG(nx.DiGraph):
|
||||
# 4. add instr forms as node attribute
|
||||
dg = nx.DiGraph()
|
||||
for i, instruction_form in enumerate(kernel):
|
||||
dg.add_node(instruction_form['line_number'])
|
||||
for dep in self.find_depending(instruction_form, kernel[i + 1:]):
|
||||
dg.add_edge(
|
||||
instruction_form['line_number'],
|
||||
|
||||
Reference in New Issue
Block a user