Validation (#71)

Validating of OSACA predictions for IVB, SKX, ZEN1, ZEN2, A64FX and TX2 with different kernels.

build_and_run.py contains the configuration used at RRZE's testcluster and UR's qpace4, Analysis.ipynb contains the analysis script and results. Raw data from measurements (122MB) will be attached to next OSACA release.

For now, find the raw data here: https://hawo.net/~sijuhamm/d/UPIhBOtz/validation-data.tar.gz

The analysis report can be viewed at https://nbviewer.jupyter.org/github/RRZE-HPC/OSACA/blob/validation/validation/Analysis.ipynb

Quite a few changes on OSACA included:

Feature: register change tracking via semantic understanding of operations
Feature: recording LCD latency along path and exposing this to frontend
Feature: support for memory reference aliases
Feature: store throughput scaling (similar to load throughput scaling)
Fix: model importer works with latest uops.info export
Fix: immediate type tracking on ARM now preserves type in internal representaion
Removed unused KerncraftAPI
This commit is contained in:
Julian
2021-04-15 14:42:37 +02:00
committed by GitHub
parent 5990cdc2b4
commit 04836cf3f9
44 changed files with 72388 additions and 149982 deletions

View File

@@ -11,7 +11,9 @@ import networkx as nx
from osaca.osaca import get_unmatched_instruction_ratio
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section
from osaca.semantics import (
INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section, ISASemantics
)
class TestSemanticTools(unittest.TestCase):
@@ -26,12 +28,19 @@ class TestSemanticTools(unittest.TestCase):
cls.parser_AArch64 = ParserAArch64()
with open(cls._find_file("kernel_x86.s")) as f:
cls.code_x86 = f.read()
with open(cls._find_file("kernel_x86_memdep.s")) as f:
cls.code_x86_memdep = f.read()
with open(cls._find_file("kernel_aarch64_memdep.s")) as f:
cls.code_aarch64_memdep = f.read()
with open(cls._find_file("kernel_aarch64.s")) as f:
cls.code_AArch64 = f.read()
cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86")
cls.kernel_x86_memdep = reduce_to_section(
cls.parser_x86.parse_file(cls.code_x86_memdep), "x86")
cls.kernel_AArch64 = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64"
)
cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64")
cls.kernel_aarch64_memdep = reduce_to_section(
cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64")
# set up machine models
cls.machine_model_csx = MachineModel(
@@ -40,9 +49,11 @@ class TestSemanticTools(unittest.TestCase):
cls.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")
)
cls.semantics_x86 = ISASemantics("x86")
cls.semantics_csx = ArchSemantics(
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
)
cls.semantics_aarch64 = ISASemantics("aarch64")
cls.semantics_tx2 = ArchSemantics(
cls.machine_model_tx2,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
@@ -52,9 +63,15 @@ class TestSemanticTools(unittest.TestCase):
for i in range(len(cls.kernel_x86)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
for i in range(len(cls.kernel_x86_memdep)):
cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i])
cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i])
for i in range(len(cls.kernel_AArch64)):
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
for i in range(len(cls.kernel_aarch64_memdep)):
cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
###########
# Tests
@@ -249,7 +266,7 @@ class TestSemanticTools(unittest.TestCase):
# 3
# 5_______>9
#
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=3))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=3)), 6)
@@ -259,19 +276,31 @@ class TestSemanticTools(unittest.TestCase):
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5)), 9)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=6))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=6)), 7)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=7))), 0)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=8))), 0)
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=7)), [])
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=8)), [])
with self.assertRaises(ValueError):
dg.get_dependent_instruction_forms()
# test dot creation
dg.export_graph(filepath="/dev/null")
def test_memdependency_x86(self):
dg = KernelDG(self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx,
self.semantics_csx)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12})
with self.assertRaises(ValueError):
dg.get_dependent_instruction_forms()
# test dot creation
dg.export_graph(filepath="/dev/null")
def test_kernelDG_AArch64(self):
dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2)
dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2,
self.semantics_tx2)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {6, 7, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {7, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=6)), {9, 10})
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=7)), 13)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=8)), 14)
@@ -314,7 +343,7 @@ class TestSemanticTools(unittest.TestCase):
self.assertEqual(num_hidden_loads_3, 1)
def test_cyclic_dag(self):
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
dg.dg.add_edge(100, 101, latency=1.0)
dg.dg.add_edge(101, 102, latency=2.0)
dg.dg.add_edge(102, 100, latency=3.0)
@@ -323,10 +352,20 @@ class TestSemanticTools(unittest.TestCase):
with self.assertRaises(NotImplementedError):
dg.get_loopcarried_dependencies()
def test_loop_carried_dependency_aarch64(self):
dg = KernelDG(self.kernel_aarch64_memdep, self.parser_AArch64, self.machine_model_tx2,
self.semantics_tx2)
lc_deps = dg.get_loopcarried_dependencies()
self.assertEqual(len(lc_deps), 2)
# based on line 6
self.assertEqual(lc_deps[6]["latency"], 28.0)
self.assertEqual([(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)])
def test_loop_carried_dependency_x86(self):
lcd_id = 8
lcd_id2 = 5
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx)
dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
lc_deps = dg.get_loopcarried_dependencies()
self.assertEqual(len(lc_deps), 2)
# ID 8
@@ -335,7 +374,8 @@ class TestSemanticTools(unittest.TestCase):
)
self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1)
self.assertEqual(
lc_deps[lcd_id]["dependencies"][0], dg.dg.nodes(data=True)[lcd_id]["instruction_form"]
lc_deps[lcd_id]["dependencies"][0][0],
dg.dg.nodes(data=True)[lcd_id]["instruction_form"]
)
# w/ flag dependencies: ID 9 w/ len=2
# w/o flag dependencies: ID 5 w/ len=1
@@ -345,13 +385,13 @@ class TestSemanticTools(unittest.TestCase):
)
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
self.assertEqual(
lc_deps[lcd_id2]["dependencies"][0],
lc_deps[lcd_id2]["dependencies"][0][0],
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
)
def test_is_read_is_written_x86(self):
# independent form HW model
dag = KernelDG(self.kernel_x86, self.parser_x86, None)
dag = KernelDG(self.kernel_x86, self.parser_x86, None, None)
reg_rcx = AttrDict({"name": "rcx"})
reg_ymm1 = AttrDict({"name": "ymm1"})
@@ -384,7 +424,7 @@ class TestSemanticTools(unittest.TestCase):
def test_is_read_is_written_AArch64(self):
# independent form HW model
dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None)
dag = KernelDG(self.kernel_AArch64, self.parser_AArch64, None, None)
reg_x1 = AttrDict({"prefix": "x", "name": "1"})
reg_w1 = AttrDict({"prefix": "w", "name": "1"})
reg_d1 = AttrDict({"prefix": "d", "name": "1"})