mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-15 16:40:05 +01:00
Merge pull request #92 from dgazzoni/aarch64-conditions-codes
Support for flags and conditional ops on AArch64
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
osaca_version: 0.3.3
|
||||
osaca_version: 0.5.0
|
||||
micro_architecture: Fujitsu A64FX
|
||||
arch_code: a64fx
|
||||
isa: AArch64
|
||||
@@ -25,6 +25,7 @@ load_throughput:
|
||||
load_throughput_default: [[1, '56'], [1, ['5D', '6D']]]
|
||||
store_throughput: []
|
||||
store_throughput_default: [[1, '5'], [1, '6']]
|
||||
p_index_latency: 1
|
||||
#store_throughput_multiplier: {w: 1.0, x: 1.0, b: 1.0, h: 1.0, s: 1.0, d: 1.0, q: 1.0, v: 2.0, z: 2.0}
|
||||
ports: ['0', 0DV, '1', '2', '3', '4', '5', 5D, '6', 6D, '7']
|
||||
port_model_scheme: |
|
||||
@@ -186,14 +187,25 @@ instruction_forms:
|
||||
- name: adds
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
prefix: "*"
|
||||
- class: register
|
||||
prefix: x
|
||||
prefix: "*"
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.5
|
||||
latency: 1.0 # 1*p34
|
||||
port_pressure: [[1, '34']]
|
||||
- name: adds
|
||||
operands:
|
||||
- class: register
|
||||
prefix: "*"
|
||||
- class: register
|
||||
prefix: "*"
|
||||
- class: register
|
||||
prefix: "*"
|
||||
throughput: 0.5
|
||||
latency: 1.0 # 1*p34
|
||||
port_pressure: [[1, '34']]
|
||||
- name: and
|
||||
operands:
|
||||
- class: register
|
||||
@@ -356,27 +368,62 @@ instruction_forms:
|
||||
throughput: 0.5
|
||||
latency: 1.0 # 1*p34
|
||||
port_pressure: [[1, '34']]
|
||||
- name: csel
|
||||
- name: [ccmp, ccmn]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
prefix: "*"
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: condition
|
||||
ccode: "*"
|
||||
throughput: 1.0
|
||||
latency: 2.0 # 2*p3 | 2*p4
|
||||
port_pressure: [[2, '34']]
|
||||
- name: [ccmp, ccmn]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
prefix: "*"
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: identifier
|
||||
prefix: "*"
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: condition
|
||||
ccode: "*"
|
||||
throughput: 1.0
|
||||
latency: 2.0 # 2*p3 | 2*p4
|
||||
port_pressure: [[2, '34']]
|
||||
- name: [csel, csinc, csinv, csneg]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: "*"
|
||||
- class: register
|
||||
prefix: "*"
|
||||
- class: register
|
||||
prefix: "*"
|
||||
- class: condition
|
||||
ccode: "*"
|
||||
throughput: 0.5
|
||||
latency: 1.0 # 1*p34
|
||||
port_pressure: [[1, '34']]
|
||||
- name: csel
|
||||
- name: [cinc, cinv, cneg]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: w
|
||||
prefix: "*"
|
||||
- class: register
|
||||
prefix: w
|
||||
prefix: "*"
|
||||
- class: condition
|
||||
ccode: "*"
|
||||
throughput: 0.5
|
||||
latency: 1.0 # 1*p34
|
||||
port_pressure: [[1, '34']]
|
||||
- name: [cset, csetm]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: w
|
||||
- class: identifier
|
||||
prefix: "*"
|
||||
- class: condition
|
||||
ccode: "*"
|
||||
throughput: 0.5
|
||||
latency: 1.0 # 1*p34
|
||||
port_pressure: [[1, '34']]
|
||||
@@ -2163,6 +2210,22 @@ instruction_forms:
|
||||
throughput: 2.0
|
||||
latency: 0 # 2*p56+2*p0
|
||||
port_pressure: [[2, '5'], [2,'6'], [2, '0']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: register
|
||||
prefix: q
|
||||
- class: memory
|
||||
base: x
|
||||
offset: '*'
|
||||
index: '*'
|
||||
scale: '*'
|
||||
pre-indexed: true
|
||||
post-indexed: false
|
||||
throughput: 2.0
|
||||
latency: 0 # 2*p56+2*p0+1*0234
|
||||
port_pressure: [[2, '5'], [2,'6'], [2, '0'], [1, '0234']]
|
||||
- name: stp
|
||||
operands:
|
||||
- class: register
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -144,9 +144,9 @@ class Frontend(object):
|
||||
+ "-----------------------------------------\n"
|
||||
)
|
||||
# TODO find a way to overcome padding for different tab-lengths
|
||||
for dep in dep_dict:
|
||||
for dep in sorted(dep_dict.keys()):
|
||||
s += "{:4d} {} {:4.1f} {} {:36}{} {}\n".format(
|
||||
dep,
|
||||
int(dep.split("-")[0]),
|
||||
separator,
|
||||
dep_dict[dep]["latency"],
|
||||
separator,
|
||||
|
||||
@@ -170,6 +170,14 @@ def create_parser(parser=None):
|
||||
" its analysis with the dependency paths found up to this point. Defaults to 10."
|
||||
" Set to -1 for no timeout.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--consider-flag-deps",
|
||||
"-f",
|
||||
dest="consider_flag_deps",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Consider flag dependencies (carry, zero, ...)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="count", default=0, help="Increases verbosity level."
|
||||
)
|
||||
@@ -333,7 +341,9 @@ def inspect(args, output_file=sys.stdout):
|
||||
semantics.assign_optimal_throughput(kernel)
|
||||
|
||||
# Create DiGrahps
|
||||
kernel_graph = KernelDG(kernel, parser, machine_model, semantics, args.lcd_timeout)
|
||||
kernel_graph = KernelDG(
|
||||
kernel, parser, machine_model, semantics, args.lcd_timeout, args.consider_flag_deps
|
||||
)
|
||||
if args.dotpath is not None:
|
||||
kernel_graph.export_graph(args.dotpath if args.dotpath != "." else None)
|
||||
# Print analysis
|
||||
|
||||
@@ -196,11 +196,34 @@ class ParserAArch64(BaseParser):
|
||||
"policy"
|
||||
)
|
||||
).setResultsName("prfop")
|
||||
# Condition codes, based on http://tiny.cc/armcc
|
||||
condition = (
|
||||
pp.CaselessLiteral("EQ") # z set
|
||||
^ pp.CaselessLiteral("NE") # z clear
|
||||
^ pp.CaselessLiteral("CS") # c set
|
||||
^ pp.CaselessLiteral("HS") # c set
|
||||
^ pp.CaselessLiteral("CC") # c clear
|
||||
^ pp.CaselessLiteral("LO") # c clear
|
||||
^ pp.CaselessLiteral("MI") # n set
|
||||
^ pp.CaselessLiteral("PL") # n clear
|
||||
^ pp.CaselessLiteral("VS") # v set
|
||||
^ pp.CaselessLiteral("VC") # v clear
|
||||
^ pp.CaselessLiteral("HI") # c set and z clear
|
||||
^ pp.CaselessLiteral("LS") # c clear or z set
|
||||
^ pp.CaselessLiteral("GE") # n and v the same
|
||||
^ pp.CaselessLiteral("LT") # n and v different
|
||||
^ pp.CaselessLiteral("GT") # z clear, and n and v the same
|
||||
^ pp.CaselessLiteral("LE") # z set, or n and v different
|
||||
^ pp.CaselessLiteral("AL") # any
|
||||
).setResultsName("condition")
|
||||
self.condition = condition
|
||||
# Combine to instruction form
|
||||
operand_first = pp.Group(
|
||||
register ^ (prefetch_op | immediate) ^ memory ^ arith_immediate ^ identifier
|
||||
)
|
||||
operand_rest = pp.Group((register ^ immediate ^ memory ^ arith_immediate) | identifier)
|
||||
operand_rest = pp.Group(
|
||||
(register ^ condition ^ immediate ^ memory ^ arith_immediate) | identifier
|
||||
)
|
||||
self.instruction_parser = (
|
||||
mnemonic
|
||||
+ pp.Optional(operand_first.setResultsName("operand1"))
|
||||
@@ -558,7 +581,7 @@ class ParserAArch64(BaseParser):
|
||||
"""Check if ``flag_a`` is dependent on ``flag_b``"""
|
||||
# we assume flags are independent of each other, e.g., CF can be read while ZF gets written
|
||||
# TODO validate this assumption
|
||||
if flag_a.name == flag_b.name:
|
||||
if flag_a["name"] == flag_b["name"]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -581,6 +581,15 @@ class MachineModel(object):
|
||||
# prefetch option
|
||||
if "prfop" in operand:
|
||||
return i_operand["class"] == "prfop"
|
||||
# condition
|
||||
if "condition" in operand:
|
||||
if i_operand["ccode"] == self.WILDCARD:
|
||||
return True
|
||||
return i_operand["class"] == "condition" and (
|
||||
operand.get("condition", None) == i_operand.get("ccode", None).upper()
|
||||
if isinstance(i_operand.get("ccode", None), str)
|
||||
else i_operand.get("ccode", None)
|
||||
)
|
||||
# no match
|
||||
return False
|
||||
|
||||
|
||||
@@ -148,7 +148,6 @@ class ISASemantics(object):
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# store operand list in dict and reassign operand key/value pair
|
||||
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
|
||||
# assign LD/ST flags
|
||||
|
||||
@@ -22,14 +22,17 @@ class KernelDG(nx.DiGraph):
|
||||
hw_model: MachineModel,
|
||||
semantics: ArchSemantics,
|
||||
timeout=10,
|
||||
flag_dependencies=False,
|
||||
):
|
||||
self.timed_out = False
|
||||
self.kernel = parsed_kernel
|
||||
self.parser = parser
|
||||
self.model = hw_model
|
||||
self.arch_sem = semantics
|
||||
self.dg = self.create_DG(self.kernel)
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel, timeout)
|
||||
self.dg = self.create_DG(self.kernel, flag_dependencies)
|
||||
self.loopcarried_deps = self.check_for_loopcarried_dep(
|
||||
self.kernel, timeout, flag_dependencies
|
||||
)
|
||||
|
||||
def _extend_path(self, dst_list, kernel, dg, offset):
|
||||
for instr in kernel:
|
||||
@@ -40,12 +43,15 @@ class KernelDG(nx.DiGraph):
|
||||
dst_list.extend(tmp_list)
|
||||
# print('Thread [{}-{}] done'.format(kernel[0]['line_number'], kernel[-1]['line_number']))
|
||||
|
||||
def create_DG(self, kernel):
|
||||
def create_DG(self, kernel, flag_dependencies=False):
|
||||
"""
|
||||
Create directed graph from given kernel
|
||||
|
||||
:param kernel: Parsed asm kernel with assigned semantic information
|
||||
:type kerne: list
|
||||
:param flag_dependencies: indicating if dependencies of flags should be considered,
|
||||
defaults to `False`
|
||||
:type flag_dependencies: boolean, optional
|
||||
:returns: :class:`~nx.DiGraph` -- directed graph object
|
||||
"""
|
||||
# 1. go through kernel instruction forms and add them as node attribute
|
||||
@@ -71,23 +77,28 @@ class KernelDG(nx.DiGraph):
|
||||
instruction_form["line_number"],
|
||||
latency=instruction_form["latency"] - instruction_form["latency_wo_load"],
|
||||
)
|
||||
for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]):
|
||||
for dep, dep_flags in self.find_depending(
|
||||
instruction_form, kernel[i + 1 :], flag_dependencies
|
||||
):
|
||||
edge_weight = (
|
||||
instruction_form["latency"]
|
||||
if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form
|
||||
else instruction_form["latency_wo_load"]
|
||||
)
|
||||
if "storeload_dep" in dep_flags:
|
||||
if "storeload_dep" in dep_flags and self.model is not None:
|
||||
edge_weight += self.model.get("store_to_load_forward_latency", 0)
|
||||
if "p_indexed" in dep_flags and self.model is not None:
|
||||
edge_weight = self.model.get("p_index_latency", 1)
|
||||
dg.add_edge(
|
||||
instruction_form["line_number"],
|
||||
dep["line_number"],
|
||||
latency=edge_weight,
|
||||
)
|
||||
|
||||
dg.nodes[dep["line_number"]]["instruction_form"] = dep
|
||||
return dg
|
||||
|
||||
def check_for_loopcarried_dep(self, kernel, timeout=10):
|
||||
def check_for_loopcarried_dep(self, kernel, timeout=10, flag_dependencies=False):
|
||||
"""
|
||||
Try to find loop-carried dependencies in given kernel.
|
||||
|
||||
@@ -106,7 +117,7 @@ class KernelDG(nx.DiGraph):
|
||||
temp_iform["line_number"] += offset
|
||||
tmp_kernel.append(temp_iform)
|
||||
# get dependency graph
|
||||
dg = self.create_DG(tmp_kernel)
|
||||
dg = self.create_DG(tmp_kernel, flag_dependencies)
|
||||
|
||||
# build cyclic loop-carried dependencies
|
||||
loopcarried_deps = []
|
||||
@@ -191,7 +202,8 @@ class KernelDG(nx.DiGraph):
|
||||
# map lcd back to nodes
|
||||
loopcarried_deps_dict = {}
|
||||
for lat_sum, involved_lines in loopcarried_deps:
|
||||
loopcarried_deps_dict[involved_lines[0][0]] = {
|
||||
dict_key = "-".join([str(il[0]) for il in involved_lines])
|
||||
loopcarried_deps_dict[dict_key] = {
|
||||
"root": self._get_node_by_lineno(involved_lines[0][0]),
|
||||
"dependencies": [
|
||||
(self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines
|
||||
@@ -273,7 +285,10 @@ class KernelDG(nx.DiGraph):
|
||||
if "register" in dst:
|
||||
# read of register
|
||||
if self.is_read(dst.register, instr_form):
|
||||
yield instr_form, []
|
||||
if dst.get("pre_indexed", False) or dst.get("post_indexed", False):
|
||||
yield instr_form, ["p_indexed"]
|
||||
else:
|
||||
yield instr_form, []
|
||||
# write to register -> abort
|
||||
if self.is_written(dst.register, instr_form):
|
||||
break
|
||||
|
||||
14
tests/test_files/kernel_aarch64_deps.s
Normal file
14
tests/test_files/kernel_aarch64_deps.s
Normal file
@@ -0,0 +1,14 @@
|
||||
// OSACA-BEGIN
|
||||
.LBB0_32:
|
||||
ldp q4, q5, [x9, #-32]
|
||||
ldp q6, q7, [x9], #64
|
||||
add x9, x9, x9
|
||||
add x10, x9, #64 // =64
|
||||
fmul v4.2d, v4.2d, v6.2d
|
||||
fmul v5.2d, v4.2d, v7.2d
|
||||
adds x10, x10, x10
|
||||
csel, x9, x1, x9, eq
|
||||
stp q14, q15, [x9, #-32]!
|
||||
stp q14, q15, [x9], #64
|
||||
b.ne .LBB0_32
|
||||
// OSACA-END
|
||||
@@ -73,6 +73,14 @@ class TestParserAArch64(unittest.TestCase):
|
||||
"IACA START",
|
||||
)
|
||||
|
||||
def test_condition_parser(self):
|
||||
self.assertEqual(self._get_condition(self.parser, "EQ"), "EQ")
|
||||
self.assertEqual(self._get_condition(self.parser, "ne"), "NE")
|
||||
self.assertEqual(self._get_condition(self.parser, "Lt"), "LT")
|
||||
self.assertEqual(self._get_condition(self.parser, "Gt"), "GT")
|
||||
with self.assertRaises(ParseException):
|
||||
self._get_condition(self.parser, "LOcondition")
|
||||
|
||||
def test_parse_instruction(self):
|
||||
instr1 = "\t\tvcvt.F32.S32 w1, w2\t\t\t//12.27"
|
||||
instr2 = "b.lo ..B1.4 \t"
|
||||
@@ -82,6 +90,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
instr6 = "adrp x0, :got:visited"
|
||||
instr7 = "fadd v17.2d, v16.2d, v1.2d"
|
||||
instr8 = "mov.d x0, v16.d[1]"
|
||||
instr9 = "ccmp x0, x1, #4, cc"
|
||||
|
||||
parsed_1 = self.parser.parse_instruction(instr1)
|
||||
parsed_2 = self.parser.parse_instruction(instr2)
|
||||
@@ -91,6 +100,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
parsed_6 = self.parser.parse_instruction(instr6)
|
||||
parsed_7 = self.parser.parse_instruction(instr7)
|
||||
parsed_8 = self.parser.parse_instruction(instr8)
|
||||
parsed_9 = self.parser.parse_instruction(instr9)
|
||||
|
||||
self.assertEqual(parsed_1.instruction, "vcvt.F32.S32")
|
||||
self.assertEqual(parsed_1.operands[0].register.name, "1")
|
||||
@@ -152,6 +162,11 @@ class TestParserAArch64(unittest.TestCase):
|
||||
self.assertEqual(parsed_8.operands[1].register.index, "1")
|
||||
self.assertEqual(self.parser.get_full_reg_name(parsed_8.operands[1].register), "v16.d[1]")
|
||||
|
||||
self.assertEqual(parsed_9.instruction, "ccmp")
|
||||
self.assertEqual(parsed_9.operands[0].register.name, "0")
|
||||
self.assertEqual(parsed_9.operands[0].register.prefix, "x")
|
||||
self.assertEqual(parsed_9.operands[3].condition, "CC")
|
||||
|
||||
def test_parse_line(self):
|
||||
line_comment = "// -- Begin main"
|
||||
line_label = ".LBB0_1: // =>This Inner Loop Header: Depth=1"
|
||||
@@ -161,6 +176,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
line_preindexed = "stp x29, x30, [sp, #-16]!"
|
||||
line_postindexed = "ldp q2, q3, [x11], #64"
|
||||
line_5_operands = "fcmla z26.d, p0/m, z29.d, z21.d, #90"
|
||||
line_conditions = "ccmn x11, #1, #3, eq"
|
||||
|
||||
instruction_form_1 = {
|
||||
"instruction": None,
|
||||
@@ -291,6 +307,20 @@ class TestParserAArch64(unittest.TestCase):
|
||||
"line": "fcmla z26.d, p0/m, z29.d, z21.d, #90",
|
||||
"line_number": 8,
|
||||
}
|
||||
instruction_form_9 = {
|
||||
"instruction": "ccmn",
|
||||
"operands": [
|
||||
{"register": {"prefix": "x", "name": "11"}},
|
||||
{"immediate": {"value": 1, "type": "int"}},
|
||||
{"immediate": {"value": 3, "type": "int"}},
|
||||
{"condition": "EQ"},
|
||||
],
|
||||
"directive": None,
|
||||
"comment": None,
|
||||
"label": None,
|
||||
"line": "ccmn x11, #1, #3, eq",
|
||||
"line_number": 9,
|
||||
}
|
||||
|
||||
parsed_1 = self.parser.parse_line(line_comment, 1)
|
||||
parsed_2 = self.parser.parse_line(line_label, 2)
|
||||
@@ -300,6 +330,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
parsed_6 = self.parser.parse_line(line_preindexed, 6)
|
||||
parsed_7 = self.parser.parse_line(line_postindexed, 7)
|
||||
parsed_8 = self.parser.parse_line(line_5_operands, 8)
|
||||
parsed_9 = self.parser.parse_line(line_conditions, 9)
|
||||
|
||||
self.assertEqual(parsed_1, instruction_form_1)
|
||||
self.assertEqual(parsed_2, instruction_form_2)
|
||||
@@ -309,6 +340,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
self.assertEqual(parsed_6, instruction_form_6)
|
||||
self.assertEqual(parsed_7, instruction_form_7)
|
||||
self.assertEqual(parsed_8, instruction_form_8)
|
||||
self.assertEqual(parsed_9, instruction_form_9)
|
||||
|
||||
def test_parse_file(self):
|
||||
parsed = self.parser.parse_file(self.triad_code)
|
||||
@@ -435,6 +467,11 @@ class TestParserAArch64(unittest.TestCase):
|
||||
parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict())
|
||||
).directive
|
||||
|
||||
def _get_condition(self, parser, condition):
|
||||
return AttrDict.convert_dict(
|
||||
parser.process_operand(parser.condition.parseString(condition, parseAll=True).asDict())
|
||||
).condition
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
|
||||
@@ -43,6 +43,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.code_AArch64 = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_sve.s")) as f:
|
||||
cls.code_AArch64_SVE = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_deps.s")) as f:
|
||||
cls.code_AArch64_deps = f.read()
|
||||
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
|
||||
cls.kernel_x86_memdep = reduce_to_section(
|
||||
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86"
|
||||
@@ -59,6 +61,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.kernel_aarch64_SVE = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64"
|
||||
)
|
||||
cls.kernel_aarch64_deps = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64_deps), "aarch64"
|
||||
)
|
||||
|
||||
# set up machine models
|
||||
cls.machine_model_csx = MachineModel(
|
||||
@@ -104,6 +109,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
for i in range(len(cls.kernel_aarch64_SVE)):
|
||||
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
|
||||
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
|
||||
for i in range(len(cls.kernel_aarch64_deps)):
|
||||
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_deps[i])
|
||||
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_deps[i])
|
||||
|
||||
###########
|
||||
# Tests
|
||||
@@ -440,38 +448,71 @@ class TestSemanticTools(unittest.TestCase):
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 4)
|
||||
# based on line 6
|
||||
self.assertEqual(lc_deps[6]["latency"], 28.0)
|
||||
dep_path = "6-10-11-12-13-14"
|
||||
self.assertEqual(lc_deps[dep_path]["latency"], 29.0)
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 1.0)],
|
||||
)
|
||||
dg = KernelDG(
|
||||
self.kernel_aarch64_deps,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_a64fx,
|
||||
self.semantics_a64fx,
|
||||
flag_dependencies=True,
|
||||
)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 2)
|
||||
# based on line 4
|
||||
dep_path = "4-5-6-9-10-11-12"
|
||||
self.assertEqual(lc_deps[dep_path]["latency"], 7.0)
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]],
|
||||
[(4, 1.0), (5, 1.0), (6, 1.0), (9, 1.0), (10, 1.0), (11, 1.0), (12, 1.0)],
|
||||
)
|
||||
dg = KernelDG(
|
||||
self.kernel_aarch64_deps,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_a64fx,
|
||||
self.semantics_a64fx,
|
||||
flag_dependencies=False,
|
||||
)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 1)
|
||||
# based on line 4
|
||||
dep_path = "4-5-10-11-12"
|
||||
self.assertEqual(lc_deps[dep_path]["latency"], 5.0)
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]],
|
||||
[(4, 1.0), (5, 1.0), (10, 1.0), (11, 1.0), (12, 1.0)],
|
||||
)
|
||||
|
||||
def test_loop_carried_dependency_x86(self):
|
||||
lcd_id = 8
|
||||
lcd_id2 = 5
|
||||
lcd_id = "8"
|
||||
lcd_id2 = "5"
|
||||
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 2)
|
||||
# ID 8
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[lcd_id]["instruction_form"]
|
||||
lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"]
|
||||
)
|
||||
self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id]["dependencies"][0][0],
|
||||
dg.dg.nodes(data=True)[lcd_id]["instruction_form"],
|
||||
dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"],
|
||||
)
|
||||
# w/ flag dependencies: ID 9 w/ len=2
|
||||
# w/o flag dependencies: ID 5 w/ len=1
|
||||
# TODO discuss
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id2]["root"],
|
||||
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||
dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"],
|
||||
)
|
||||
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id2]["dependencies"][0][0],
|
||||
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||
dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"],
|
||||
)
|
||||
|
||||
def test_timeout_during_loop_carried_dependency(self):
|
||||
|
||||
Reference in New Issue
Block a user