mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 00:50:06 +01:00
added more dependency analysis for post/pre indexing and condition flags
This commit is contained in:
14
tests/test_files/kernel_aarch64_deps.s
Normal file
14
tests/test_files/kernel_aarch64_deps.s
Normal file
@@ -0,0 +1,14 @@
|
||||
// OSACA-BEGIN
|
||||
.LBB0_32:
|
||||
ldp q4, q5, [x9, #-32]
|
||||
ldp q6, q7, [x9], #64
|
||||
add x9, x9, x9
|
||||
add x10, x9, #64 // =64
|
||||
fmul v4.2d, v4.2d, v6.2d
|
||||
fmul v5.2d, v4.2d, v7.2d
|
||||
adds x10, x10, x10
|
||||
csel, x9, x1, x9, eq
|
||||
stp q14, q15, [x9, #-32]!
|
||||
stp q14, q15, [x9], #64
|
||||
b.ne .LBB0_32
|
||||
// OSACA-END
|
||||
@@ -73,6 +73,14 @@ class TestParserAArch64(unittest.TestCase):
|
||||
"IACA START",
|
||||
)
|
||||
|
||||
def test_condition_parser(self):
|
||||
self.assertEqual(self._get_condition(self.parser, "EQ"), "EQ")
|
||||
self.assertEqual(self._get_condition(self.parser, "ne"), "NE")
|
||||
self.assertEqual(self._get_condition(self.parser, "Lt"), "LT")
|
||||
self.assertEqual(self._get_condition(self.parser, "Gt"), "GT")
|
||||
with self.assertRaises(ParseException):
|
||||
self._get_condition(self.parser, "LOcondition")
|
||||
|
||||
def test_parse_instruction(self):
|
||||
instr1 = "\t\tvcvt.F32.S32 w1, w2\t\t\t//12.27"
|
||||
instr2 = "b.lo ..B1.4 \t"
|
||||
@@ -81,6 +89,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
instr5 = "ldr x0, [x0, #:got_lo12:q2c]"
|
||||
instr6 = "adrp x0, :got:visited"
|
||||
instr7 = "fadd v17.2d, v16.2d, v1.2d"
|
||||
instr8 = "ccmp x0, x1, #4, cc"
|
||||
|
||||
parsed_1 = self.parser.parse_instruction(instr1)
|
||||
parsed_2 = self.parser.parse_instruction(instr2)
|
||||
@@ -89,6 +98,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
parsed_5 = self.parser.parse_instruction(instr5)
|
||||
parsed_6 = self.parser.parse_instruction(instr6)
|
||||
parsed_7 = self.parser.parse_instruction(instr7)
|
||||
parsed_8 = self.parser.parse_instruction(instr8)
|
||||
|
||||
self.assertEqual(parsed_1.instruction, "vcvt.F32.S32")
|
||||
self.assertEqual(parsed_1.operands[0].register.name, "1")
|
||||
@@ -142,6 +152,11 @@ class TestParserAArch64(unittest.TestCase):
|
||||
self.assertEqual(parsed_7.operands[0].register.shape, "d")
|
||||
self.assertEqual(self.parser.get_full_reg_name(parsed_7.operands[2].register), "v1.2d")
|
||||
|
||||
self.assertEqual(parsed_8.instruction, "ccmp")
|
||||
self.assertEqual(parsed_8.operands[0].register.name, "0")
|
||||
self.assertEqual(parsed_8.operands[0].register.prefix, "x")
|
||||
self.assertEqual(parsed_8.operands[3].condition, "CC")
|
||||
|
||||
def test_parse_line(self):
|
||||
line_comment = "// -- Begin main"
|
||||
line_label = ".LBB0_1: // =>This Inner Loop Header: Depth=1"
|
||||
@@ -151,6 +166,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
line_preindexed = "stp x29, x30, [sp, #-16]!"
|
||||
line_postindexed = "ldp q2, q3, [x11], #64"
|
||||
line_5_operands = "fcmla z26.d, p0/m, z29.d, z21.d, #90"
|
||||
line_conditions = "ccmn x11, #1, #3, eq"
|
||||
|
||||
instruction_form_1 = {
|
||||
"instruction": None,
|
||||
@@ -281,6 +297,20 @@ class TestParserAArch64(unittest.TestCase):
|
||||
"line": "fcmla z26.d, p0/m, z29.d, z21.d, #90",
|
||||
"line_number": 8,
|
||||
}
|
||||
instruction_form_9 = {
|
||||
"instruction": "ccmn",
|
||||
"operands": [
|
||||
{"register": {"prefix": "x", "name": "11"}},
|
||||
{"immediate": {"value": 1, "type": "int"}},
|
||||
{"immediate": {"value": 3, "type": "int"}},
|
||||
{"condition": "EQ"}
|
||||
],
|
||||
"directive": None,
|
||||
"comment": None,
|
||||
"label": None,
|
||||
"line": "ccmn x11, #1, #3, eq",
|
||||
"line_number": 9,
|
||||
}
|
||||
|
||||
parsed_1 = self.parser.parse_line(line_comment, 1)
|
||||
parsed_2 = self.parser.parse_line(line_label, 2)
|
||||
@@ -290,6 +320,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
parsed_6 = self.parser.parse_line(line_preindexed, 6)
|
||||
parsed_7 = self.parser.parse_line(line_postindexed, 7)
|
||||
parsed_8 = self.parser.parse_line(line_5_operands, 8)
|
||||
parsed_9 = self.parser.parse_line(line_conditions, 9)
|
||||
|
||||
self.assertEqual(parsed_1, instruction_form_1)
|
||||
self.assertEqual(parsed_2, instruction_form_2)
|
||||
@@ -299,6 +330,7 @@ class TestParserAArch64(unittest.TestCase):
|
||||
self.assertEqual(parsed_6, instruction_form_6)
|
||||
self.assertEqual(parsed_7, instruction_form_7)
|
||||
self.assertEqual(parsed_8, instruction_form_8)
|
||||
self.assertEqual(parsed_9, instruction_form_9)
|
||||
|
||||
def test_parse_file(self):
|
||||
parsed = self.parser.parse_file(self.triad_code)
|
||||
@@ -425,6 +457,11 @@ class TestParserAArch64(unittest.TestCase):
|
||||
parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict())
|
||||
).directive
|
||||
|
||||
def _get_condition(self, parser, condition):
|
||||
return AttrDict.convert_dict(
|
||||
parser.process_operand(parser.condition.parseString(condition, parseAll=True).asDict())
|
||||
).condition
|
||||
|
||||
@staticmethod
|
||||
def _find_file(name):
|
||||
testdir = os.path.dirname(__file__)
|
||||
|
||||
@@ -43,6 +43,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.code_AArch64 = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_sve.s")) as f:
|
||||
cls.code_AArch64_SVE = f.read()
|
||||
with open(cls._find_file("kernel_aarch64_deps.s")) as f:
|
||||
cls.code_AArch64_deps = f.read()
|
||||
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
|
||||
cls.kernel_x86_memdep = reduce_to_section(
|
||||
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86"
|
||||
@@ -59,6 +61,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.kernel_aarch64_SVE = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64"
|
||||
)
|
||||
cls.kernel_aarch64_deps = reduce_to_section(
|
||||
cls.parser_AArch64.parse_file(cls.code_AArch64_deps), "aarch64"
|
||||
)
|
||||
|
||||
# set up machine models
|
||||
cls.machine_model_csx = MachineModel(
|
||||
@@ -104,6 +109,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
for i in range(len(cls.kernel_aarch64_SVE)):
|
||||
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
|
||||
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
|
||||
for i in range(len(cls.kernel_aarch64_deps)):
|
||||
cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_deps[i])
|
||||
cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_deps[i])
|
||||
|
||||
###########
|
||||
# Tests
|
||||
@@ -365,7 +373,7 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {7, 8})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {6, 7, 8})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=6)), {9, 10})
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=7)), 13)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=8)), 14)
|
||||
@@ -434,40 +442,76 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.semantics_tx2,
|
||||
)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 2)
|
||||
self.assertEqual(len(lc_deps), 4)
|
||||
# based on line 6
|
||||
self.assertEqual(lc_deps[6]["latency"], 28.0)
|
||||
dep_path = "6-10-11-12-13-14"
|
||||
self.assertEqual(lc_deps[dep_path]["latency"], 29.0)
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
|
||||
[
|
||||
(iform.line_number, lat)
|
||||
for iform, lat in lc_deps[dep_path]["dependencies"]
|
||||
],
|
||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 1.0)],
|
||||
)
|
||||
dg = KernelDG(
|
||||
self.kernel_aarch64_deps,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_a64fx,
|
||||
self.semantics_a64fx,
|
||||
flag_dependencies=True,
|
||||
)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 2)
|
||||
# based on line 4
|
||||
dep_path = "4-5-6-9-10-11-12"
|
||||
self.assertEqual(lc_deps[dep_path]["latency"], 7.0)
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]],
|
||||
[(4, 1.0), (5, 1.0), (6, 1.0), (9, 1.0), (10, 1.0), (11, 1.0), (12, 1.0)],
|
||||
)
|
||||
dg = KernelDG(
|
||||
self.kernel_aarch64_deps,
|
||||
self.parser_AArch64,
|
||||
self.machine_model_a64fx,
|
||||
self.semantics_a64fx,
|
||||
flag_dependencies=False,
|
||||
)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 1)
|
||||
# based on line 4
|
||||
dep_path = "4-5-10-11-12"
|
||||
self.assertEqual(lc_deps[dep_path]["latency"], 5.0)
|
||||
self.assertEqual(
|
||||
[(iform.line_number, lat) for iform, lat in lc_deps[dep_path]["dependencies"]],
|
||||
[(4, 1.0), (5, 1.0), (10, 1.0), (11, 1.0), (12, 1.0)],
|
||||
)
|
||||
|
||||
def test_loop_carried_dependency_x86(self):
|
||||
lcd_id = 8
|
||||
lcd_id2 = 5
|
||||
lcd_id = "8"
|
||||
lcd_id2 = "5"
|
||||
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
|
||||
lc_deps = dg.get_loopcarried_dependencies()
|
||||
self.assertEqual(len(lc_deps), 2)
|
||||
# ID 8
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[lcd_id]["instruction_form"]
|
||||
lc_deps[lcd_id]["root"], dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"]
|
||||
)
|
||||
self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id]["dependencies"][0][0],
|
||||
dg.dg.nodes(data=True)[lcd_id]["instruction_form"],
|
||||
dg.dg.nodes(data=True)[int(lcd_id)]["instruction_form"],
|
||||
)
|
||||
# w/ flag dependencies: ID 9 w/ len=2
|
||||
# w/o flag dependencies: ID 5 w/ len=1
|
||||
# TODO discuss
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id2]["root"],
|
||||
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||
dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"],
|
||||
)
|
||||
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
|
||||
self.assertEqual(
|
||||
lc_deps[lcd_id2]["dependencies"][0][0],
|
||||
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||
dg.dg.nodes(data=True)[int(lcd_id2)]["instruction_form"],
|
||||
)
|
||||
|
||||
def test_timeout_during_loop_carried_dependency(self):
|
||||
|
||||
Reference in New Issue
Block a user