added more dependency analysis for post/pre indexing and condition flags

This commit is contained in:
JanLJL
2023-03-14 17:00:02 +01:00
parent d6569a0f23
commit d1201ace11
8 changed files with 161 additions and 47 deletions

View File

@@ -144,9 +144,9 @@ class Frontend(object):
+ "-----------------------------------------\n"
)
# TODO find a way to overcome padding for different tab-lengths
for dep in dep_dict:
for dep in sorted(dep_dict.keys()):
s += "{:4d} {} {:4.1f} {} {:36}{} {}\n".format(
dep,
int(dep.split("-")[0]),
separator,
dep_dict[dep]["latency"],
separator,

View File

@@ -196,27 +196,27 @@ class ParserAArch64(BaseParser):
"policy"
)
).setResultsName("prfop")
# Condition codes
condition = pp.Group(
(
pp.CaselessLiteral("EQ")
^ pp.CaselessLiteral("NE")
^ pp.CaselessLiteral("CS")
^ pp.CaselessLiteral("HS")
^ pp.CaselessLiteral("CC")
^ pp.CaselessLiteral("LO")
^ pp.CaselessLiteral("HI")
^ pp.CaselessLiteral("LS")
^ pp.CaselessLiteral("GE")
^ pp.CaselessLiteral("LT")
^ pp.CaselessLiteral("GT")
^ pp.CaselessLiteral("LE")
^ pp.CaselessLiteral("MI")
^ pp.CaselessLiteral("PL")
^ pp.CaselessLiteral("VS")
^ pp.CaselessLiteral("VC")
).setResultsName("code")
# Condition codes, based on http://tiny.cc/armcc
condition = (
pp.CaselessLiteral("EQ") # z set
^ pp.CaselessLiteral("NE") # z clear
^ pp.CaselessLiteral("CS") # c set
^ pp.CaselessLiteral("HS") # c set
^ pp.CaselessLiteral("CC") # c clear
^ pp.CaselessLiteral("LO") # c clear
^ pp.CaselessLiteral("MI") # n set
^ pp.CaselessLiteral("PL") # n clear
^ pp.CaselessLiteral("VS") # v set
^ pp.CaselessLiteral("VC") # v clear
^ pp.CaselessLiteral("HI") # c set and z clear
^ pp.CaselessLiteral("LS") # c clear or z set
^ pp.CaselessLiteral("GE") # n and v the same
^ pp.CaselessLiteral("LT") # n and v different
^ pp.CaselessLiteral("GT") # z clear, and n and v the same
^ pp.CaselessLiteral("LE") # z set, or n and v different
^ pp.CaselessLiteral("AL") # any
).setResultsName("condition")
self.condition = condition
# Combine to instruction form
operand_first = pp.Group(
register ^ (prefetch_op | immediate) ^ memory ^ arith_immediate ^ identifier

View File

@@ -581,8 +581,15 @@ class MachineModel(object):
# prefetch option
if "prfop" in operand:
return i_operand["class"] == "prfop"
# condition
if "condition" in operand:
return i_operand["class"] == "condition"
if i_operand["ccode"] == self.WILDCARD:
return True
return i_operand["class"] == "condition" and (
operand.get("condition", None) == i_operand.get("ccode", None).upper()
if isinstance(i_operand.get("ccode", None), str)
else i_operand.get("ccode", None)
)
# no match
return False

View File

@@ -127,7 +127,6 @@ class ISASemantics(object):
}
)
)
# store operand list in dict and reassign operand key/value pair
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
# assign LD/ST flags

View File

@@ -22,14 +22,17 @@ class KernelDG(nx.DiGraph):
hw_model: MachineModel,
semantics: ArchSemantics,
timeout=10,
flag_dependencies=False,
):
self.timed_out = False
self.kernel = parsed_kernel
self.parser = parser
self.model = hw_model
self.arch_sem = semantics
self.dg = self.create_DG(self.kernel)
self.loopcarried_deps = self.check_for_loopcarried_dep(self.kernel, timeout)
self.dg = self.create_DG(self.kernel, flag_dependencies)
self.loopcarried_deps = self.check_for_loopcarried_dep(
self.kernel, timeout, flag_dependencies
)
def _extend_path(self, dst_list, kernel, dg, offset):
for instr in kernel:
@@ -40,12 +43,15 @@ class KernelDG(nx.DiGraph):
dst_list.extend(tmp_list)
# print('Thread [{}-{}] done'.format(kernel[0]['line_number'], kernel[-1]['line_number']))
def create_DG(self, kernel):
def create_DG(self, kernel, flag_dependencies=False):
"""
Create directed graph from given kernel
:param kernel: Parsed asm kernel with assigned semantic information
:type kerne: list
:param flag_dependencies: indicating if dependencies of flags should be considered,
defaults to `False`
:type flag_dependencies: boolean, optional
:returns: :class:`~nx.DiGraph` -- directed graph object
"""
# 1. go through kernel instruction forms and add them as node attribute
@@ -71,23 +77,28 @@ class KernelDG(nx.DiGraph):
instruction_form["line_number"],
latency=instruction_form["latency"] - instruction_form["latency_wo_load"],
)
for dep, dep_flags in self.find_depending(instruction_form, kernel[i + 1 :]):
for dep, dep_flags in self.find_depending(
instruction_form, kernel[i + 1 :], flag_dependencies
):
edge_weight = (
instruction_form["latency"]
if "mem_dep" in dep_flags or "latency_wo_load" not in instruction_form
else instruction_form["latency_wo_load"]
)
if "storeload_dep" in dep_flags:
if "storeload_dep" in dep_flags and self.model is not None:
edge_weight += self.model.get("store_to_load_forward_latency", 0)
if "p_indexed" in dep_flags and self.model is not None:
edge_weight = self.model.get("p_index_latency", 1)
dg.add_edge(
instruction_form["line_number"],
dep["line_number"],
latency=edge_weight,
)
dg.nodes[dep["line_number"]]["instruction_form"] = dep
return dg
def check_for_loopcarried_dep(self, kernel, timeout=10):
def check_for_loopcarried_dep(self, kernel, timeout=10, flag_dependencies=False):
"""
Try to find loop-carried dependencies in given kernel.
@@ -106,7 +117,7 @@ class KernelDG(nx.DiGraph):
temp_iform["line_number"] += offset
tmp_kernel.append(temp_iform)
# get dependency graph
dg = self.create_DG(tmp_kernel)
dg = self.create_DG(tmp_kernel, flag_dependencies)
# build cyclic loop-carried dependencies
loopcarried_deps = []
@@ -191,7 +202,8 @@ class KernelDG(nx.DiGraph):
# map lcd back to nodes
loopcarried_deps_dict = {}
for lat_sum, involved_lines in loopcarried_deps:
loopcarried_deps_dict[involved_lines[0][0]] = {
dict_key = "-".join([str(il[0]) for il in involved_lines])
loopcarried_deps_dict[dict_key] = {
"root": self._get_node_by_lineno(involved_lines[0][0]),
"dependencies": [
(self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines
@@ -272,10 +284,11 @@ class KernelDG(nx.DiGraph):
# print(" TO", instr_form.line, register_changes)
if "register" in dst:
# read of register
if self.is_read(dst.register, instr_form) and not (
dst.get("pre_indexed", False) or dst.get("post_indexed", False)
):
yield instr_form, []
if self.is_read(dst.register, instr_form):
if dst.get("pre_indexed", False) or dst.get("post_indexed", False):
yield instr_form, ["p_indexed"]
else:
yield instr_form, []
# write to register -> abort
if self.is_written(dst.register, instr_form):
break

View File

@@ -0,0 +1,14 @@
// OSACA-BEGIN
.LBB0_32:
ldp q4, q5, [x9, #-32]
ldp q6, q7, [x9], #64
add x9, x9, x9
add x10, x9, #64 // =64
fmul v4.2d, v4.2d, v6.2d
fmul v5.2d, v4.2d, v7.2d
adds x10, x10, x10
csel, x9, x1, x9, eq
stp q14, q15, [x9, #-32]!
stp q14, q15, [x9], #64
b.ne .LBB0_32
// OSACA-END

View File

@@ -73,6 +73,14 @@ class TestParserAArch64(unittest.TestCase):
"IACA START",
)
def test_condition_parser(self):
self.assertEqual(self._get_condition(self.parser, "EQ"), "EQ")
self.assertEqual(self._get_condition(self.parser, "ne"), "NE")
self.assertEqual(self._get_condition(self.parser, "Lt"), "LT")
self.assertEqual(self._get_condition(self.parser, "Gt"), "GT")
with self.assertRaises(ParseException):
self._get_condition(self.parser, "LOcondition")
def test_parse_instruction(self):
instr1 = "\t\tvcvt.F32.S32 w1, w2\t\t\t//12.27"
instr2 = "b.lo ..B1.4 \t"
@@ -81,6 +89,7 @@ class TestParserAArch64(unittest.TestCase):
instr5 = "ldr x0, [x0, #:got_lo12:q2c]"
instr6 = "adrp x0, :got:visited"
instr7 = "fadd v17.2d, v16.2d, v1.2d"
instr8 = "ccmp x0, x1, #4, cc"
parsed_1 = self.parser.parse_instruction(instr1)
parsed_2 = self.parser.parse_instruction(instr2)
@@ -89,6 +98,7 @@ class TestParserAArch64(unittest.TestCase):
parsed_5 = self.parser.parse_instruction(instr5)
parsed_6 = self.parser.parse_instruction(instr6)
parsed_7 = self.parser.parse_instruction(instr7)
parsed_8 = self.parser.parse_instruction(instr8)
self.assertEqual(parsed_1.instruction, "vcvt.F32.S32")
self.assertEqual(parsed_1.operands[0].register.name, "1")
@@ -142,6 +152,11 @@ class TestParserAArch64(unittest.TestCase):
self.assertEqual(parsed_7.operands[0].register.shape, "d")
self.assertEqual(self.parser.get_full_reg_name(parsed_7.operands[2].register), "v1.2d")
self.assertEqual(parsed_8.instruction, "ccmp")
self.assertEqual(parsed_8.operands[0].register.name, "0")
self.assertEqual(parsed_8.operands[0].register.prefix, "x")
self.assertEqual(parsed_8.operands[3].condition, "CC")
def test_parse_line(self):
line_comment = "// -- Begin main"
line_label = ".LBB0_1: // =>This Inner Loop Header: Depth=1"
@@ -151,6 +166,7 @@ class TestParserAArch64(unittest.TestCase):
line_preindexed = "stp x29, x30, [sp, #-16]!"
line_postindexed = "ldp q2, q3, [x11], #64"
line_5_operands = "fcmla z26.d, p0/m, z29.d, z21.d, #90"
line_conditions = "ccmn x11, #1, #3, eq"
instruction_form_1 = {
"instruction": None,
@@ -281,6 +297,20 @@ class TestParserAArch64(unittest.TestCase):
"line": "fcmla z26.d, p0/m, z29.d, z21.d, #90",
"line_number": 8,
}
instruction_form_9 = {
"instruction": "ccmn",
"operands": [
{"register": {"prefix": "x", "name": "11"}},
{"immediate": {"value": 1, "type": "int"}},
{"immediate": {"value": 3, "type": "int"}},
{"condition": "EQ"}
],
"directive": None,
"comment": None,
"label": None,
"line": "ccmn x11, #1, #3, eq",
"line_number": 9,
}
parsed_1 = self.parser.parse_line(line_comment, 1)
parsed_2 = self.parser.parse_line(line_label, 2)
@@ -290,6 +320,7 @@ class TestParserAArch64(unittest.TestCase):
parsed_6 = self.parser.parse_line(line_preindexed, 6)
parsed_7 = self.parser.parse_line(line_postindexed, 7)
parsed_8 = self.parser.parse_line(line_5_operands, 8)
parsed_9 = self.parser.parse_line(line_conditions, 9)
self.assertEqual(parsed_1, instruction_form_1)
self.assertEqual(parsed_2, instruction_form_2)
@@ -299,6 +330,7 @@ class TestParserAArch64(unittest.TestCase):
self.assertEqual(parsed_6, instruction_form_6)
self.assertEqual(parsed_7, instruction_form_7)
self.assertEqual(parsed_8, instruction_form_8)
self.assertEqual(parsed_9, instruction_form_9)
def test_parse_file(self):
parsed = self.parser.parse_file(self.triad_code)
@@ -425,6 +457,11 @@ class TestParserAArch64(unittest.TestCase):
parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict())
).directive
def _get_condition(self, parser, condition):
return AttrDict.convert_dict(
parser.process_operand(parser.condition.parseString(condition, parseAll=True).asDict())
).condition
@staticmethod
def _find_file(name):
testdir = os.path.dirname(__file__)

View File

@@ -43,6 +43,8 @@ class TestSemanticTools(unittest.TestCase):
cls.code_AArch64 = f.read()
with open(cls._find_file("kernel_aarch64_sve.s")) as f:
cls.code_AArch64_SVE = f.read()
with open(cls._find_file("kernel_aarch64_deps.s")) as f:
cls.code_AArch64_deps = f.read()
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
cls.kernel_x86_memdep = reduce_to_section(
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86"
@@ -59,6 +61,9 @@ class TestSemanticTools(unittest.TestCase):
cls.kernel_aarch64_SVE = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64"
)
cls.kernel_aarch64_deps = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64_deps), "aarch64"
)
# set up machine models
cls.machine_model_csx = MachineModel(
@@ -104,6 +109,9 @@ class TestSemanticTools(unittest.TestCase):
for i in range(len(cls.kernel_aarch64_SVE)):
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
for i in range(len(cls.kernel_aarch64_deps)):
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_deps[i])
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_deps[i])
###########
# Tests
@@ -365,7 +373,7 @@ class TestSemanticTools(unittest.TestCase):
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {7, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {6, 7, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=6)), {9, 10})
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=7)), 13)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=8)), 14)
@@ -434,40 +442,76 @@ class TestSemanticTools(unittest.TestCase):
self.semantics_tx2,
)
lc_deps = dg.get_loopcarried_dependencies()
self.assertEqual(len(lc_deps), 2)
self.assertEqual(len(lc_deps), 4)
# based on line 6
self.assertEqual(lc_deps[6]["latency"], 28.0)
dep_path = "6-10-11-12-13-14"
self.assertEqual(lc_deps[dep_path]["latency"], 29.0)
self.assertEqual(
[(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
[
(iform.line_number, lat)
for iform, lat in lc_deps[dep_path]["dependencies"]
],
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 1.0)],
)
dg = KernelDG(
self.kernel_aarch64_deps,
self.parser_AArch64,
self.machine_model_a64fx,
self.semantics_a64fx,
flag_dependencies=True,
)
lc_deps = dg.get_loopcarried_dependencies()
self.assertEqual(len(lc_deps), 2)
# based on line 4
dep_path = "4-5-6-9-10-11-12"
self.assertEqual(lc_deps[dep_path]["latency"], 7.0)
self.assertEqual(
[(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]],
[(4, 1.0), (5, 1.0), (6, 1.0), (9, 1.0), (10, 1.0), (11, 1.0), (12, 1.0)],
)
dg = KernelDG(
self.kernel_aarch64_deps,
self.parser_AArch64,
self.machine_model_a64fx,
self.semantics_a64fx,
flag_dependencies=False,
)
lc_deps = dg.get_loopcarried_dependencies()
self.assertEqual(len(lc_deps), 1)
# based on line 4
dep_path = "4-5-10-11-12"
self.assertEqual(lc_deps[dep_path]["latency"], 5.0)
self.assertEqual(
[(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]],
[(4, 1.0), (5, 1.0), (10, 1.0), (11, 1.0), (12, 1.0)],
)
def test_loop_carried_dependency_x86(self):
lcd_id = 8
lcd_id2 = 5
lcd_id = "8"
lcd_id2 = "5"
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
lc_deps = dg.get_loopcarried_dependencies()
self.assertEqual(len(lc_deps), 2)
# ID 8
self.assertEqual(
lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[lcd_id]["instruction_form"]
lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"]
)
self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
self.assertEqual(
lc_deps[lcd_id]["dependencies"][0][0],
dg.dg.nodes(data=True)[lcd_id]["instruction_form"],
dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"],
)
# w/ flag dependencies: ID 9 w/ len=2
# w/o flag dependencies: ID 5 w/ len=1
# TODO discuss
self.assertEqual(
lc_deps[lcd_id2]["root"],
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"],
)
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
self.assertEqual(
lc_deps[lcd_id2]["dependencies"][0][0],
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"],
)
def test_timeout_during_loop_carried_dependency(self):