diff --git a/tests/test_cli.py b/tests/test_cli.py index e32cce1..10a449c 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -183,14 +183,38 @@ class TestCLI(unittest.TestCase): output = StringIO() osaca.run(args, output_file=output) # WARNING for length - self.assertTrue(output.getvalue().count("WARNING") == 1) + self.assertTrue( + output.getvalue().count( + "WARNING: You are analyzing a large amount of instruction forms" + ) + == 1 + ) + # WARNING for arch args = parser.parse_args( ["--lines", "100-199", "--ignore-unknown", self._find_test_file(kernel)] ) output = StringIO() osaca.run(args, output_file=output) - # WARNING for arch - self.assertTrue(output.getvalue().count("WARNING") == 1) + self.assertTrue( + output.getvalue().count("WARNING: No micro-architecture was specified") == 1 + ) + # WARNING for timeout + args = parser.parse_args( + ["--ignore-unknown", "--lcd-timeout", "0", self._find_test_file(kernel)] + ) + output = StringIO() + osaca.run(args, output_file=output) + self.assertTrue( + output.getvalue().count("WARNING: LCD analysis timed out") == 1 + ) + args = parser.parse_args( + ["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)] + ) + output = StringIO() + osaca.run(args, output_file=output) + self.assertTrue( + output.getvalue().count("WARNING: LCD analysis timed out") == 0 + ) def test_lines_arg(self): # Run tests with --lines option diff --git a/tests/test_files/kernel_x86_long_LCD.s b/tests/test_files/kernel_x86_long_LCD.s new file mode 100644 index 0000000..1993bcf --- /dev/null +++ b/tests/test_files/kernel_x86_long_LCD.s @@ -0,0 +1,192 @@ +# OSACA-BEGIN + push %r12 + push %r13 + push %r14 + push %r15 + push %rbp + mov %ecx,%r12d + mov %esi,%r14d + mov %r12d,%ecx + mov %r14d,%esi + mov %rdx,%r13 + mov %rdi,%rbp + callq 0x4210d0 + mov %rdx,%r8 + movzbl (%rdi),%r9d + movslq %esi,%rsi + movslq %ecx,%rcx + movzbl (%r8),%r10d + vmovd %r9d,%xmm13 + movzbl 0x4(%r8),%r9d + vpinsrb $0x1,(%rsi,%rdi,1),%xmm13,%xmm14 + lea (%rsi,%rsi,2),%rdx + vmovd %r10d,%xmm1 + vpinsrb $0x1,(%rcx,%r8,1),%xmm1,%xmm0 + vmovd %r9d,%xmm7 + vpinsrb $0x1,0x4(%rcx,%r8,1),%xmm7,%xmm5 + vpinsrb $0x2,(%rdi,%rsi,2),%xmm14,%xmm15 + vpinsrb $0x2,(%r8,%rcx,2),%xmm0,%xmm6 + vpinsrb $0x2,0x4(%r8,%rcx,2),%xmm5,%xmm9 + vpinsrb $0x3,(%rdx,%rdi,1),%xmm15,%xmm4 + movzbl 0x4(%rdi),%r11d + lea (%rcx,%rcx,2),%rax + vpinsrb $0x3,(%rax,%r8,1),%xmm6,%xmm10 + vpinsrb $0x3,0x4(%rax,%r8,1),%xmm9,%xmm11 + vmovd %r11d,%xmm2 + vpinsrb $0x1,0x4(%rsi,%rdi,1),%xmm2,%xmm8 + vpinsrb $0x2,0x4(%rdi,%rsi,2),%xmm8,%xmm3 + movzbl 0x1(%rdi),%r10d + movzbl 0x5(%rdi),%r9d + movzbl 0x1(%r8),%r11d + vmovd %r10d,%xmm1 + movzbl 0x5(%r8),%r10d + vmovd %r9d,%xmm7 + vpmovzxbd %xmm4,%xmm4 + vmovd %r11d,%xmm2 + vpmovzxbd %xmm10,%xmm10 + vpinsrb $0x3,0x4(%rdx,%rdi,1),%xmm3,%xmm12 + vpsubd %xmm10,%xmm4,%xmm14 + vpinsrb $0x1,0x5(%rsi,%rdi,1),%xmm7,%xmm5 + vmovd %r10d,%xmm4 + vpinsrb $0x1,0x5(%rcx,%r8,1),%xmm4,%xmm10 + vpinsrb $0x1,0x1(%rcx,%r8,1),%xmm2,%xmm8 + vpinsrb $0x1,0x1(%rsi,%rdi,1),%xmm1,%xmm0 + vpinsrb $0x2,0x5(%rdi,%rsi,2),%xmm5,%xmm9 + vpinsrb $0x2,0x1(%r8,%rcx,2),%xmm8,%xmm3 + vpinsrb $0x2,0x1(%rdi,%rsi,2),%xmm0,%xmm6 + vpmovzxbd %xmm12,%xmm12 + vpmovzxbd %xmm11,%xmm11 + vpsubd %xmm11,%xmm12,%xmm13 + vpinsrb $0x2,0x5(%r8,%rcx,2),%xmm10,%xmm11 + vpslld $0x10,%xmm13,%xmm15 + vpinsrb $0x3,0x1(%rdx,%rdi,1),%xmm6,%xmm13 + vpaddd %xmm15,%xmm14,%xmm12 + vpinsrb $0x3,0x5(%rdx,%rdi,1),%xmm9,%xmm15 + vpinsrb $0x3,0x1(%rax,%r8,1),%xmm3,%xmm14 + vpinsrb $0x3,0x5(%rax,%r8,1),%xmm11,%xmm1 + movzbl 0x2(%rdi),%r11d + movzbl 0x2(%r8),%r9d + vpmovzxbd %xmm15,%xmm15 + vmovd %r11d,%xmm8 + vmovd %r9d,%xmm5 + vpinsrb $0x1,0x2(%rsi,%rdi,1),%xmm8,%xmm3 + vpinsrb $0x1,0x2(%rcx,%r8,1),%xmm5,%xmm9 + vpinsrb $0x2,0x2(%rdi,%rsi,2),%xmm3,%xmm7 + vpinsrb $0x2,0x2(%r8,%rcx,2),%xmm9,%xmm4 + vpinsrb $0x3,0x2(%rdx,%rdi,1),%xmm7,%xmm3 + vpinsrb $0x3,0x2(%rax,%r8,1),%xmm4,%xmm7 + vpmovzxbd %xmm1,%xmm1 + movzbl 0x6(%r8),%r11d + vpsubd %xmm1,%xmm15,%xmm0 + vpmovzxbd %xmm13,%xmm13 + vpslld $0x10,%xmm0,%xmm2 + vpmovzxbd %xmm14,%xmm14 + vpsubd %xmm14,%xmm13,%xmm6 + vpaddd %xmm2,%xmm6,%xmm11 + vmovd %r11d,%xmm6 + vpinsrb $0x1,0x6(%rcx,%r8,1),%xmm6,%xmm2 + movzbl 0x6(%rdi),%r10d + vpinsrb $0x2,0x6(%r8,%rcx,2),%xmm2,%xmm8 + vmovd %r10d,%xmm10 + vpinsrb $0x1,0x6(%rsi,%rdi,1),%xmm10,%xmm1 + vpinsrb $0x3,0x6(%rax,%r8,1),%xmm8,%xmm9 + vpinsrb $0x2,0x6(%rdi,%rsi,2),%xmm1,%xmm0 + movzbl 0x3(%rdi),%r9d + movzbl 0x7(%rdi),%r11d + vpmovzxbd %xmm3,%xmm3 + vpmovzxbd %xmm7,%xmm7 + vmovd %r9d,%xmm14 + vmovd %r11d,%xmm8 + vpsubd %xmm7,%xmm3,%xmm10 + vpinsrb $0x1,0x3(%rsi,%rdi,1),%xmm14,%xmm15 + vpinsrb $0x1,0x7(%rsi,%rdi,1),%xmm8,%xmm3 + vpinsrb $0x3,0x6(%rdx,%rdi,1),%xmm0,%xmm5 + vpinsrb $0x2,0x3(%rdi,%rsi,2),%xmm15,%xmm1 + vpinsrb $0x2,0x7(%rdi,%rsi,2),%xmm3,%xmm7 + vpaddd %xmm11,%xmm12,%xmm3 + vpmovzxbd %xmm5,%xmm5 + vpmovzxbd %xmm9,%xmm9 + vpsubd %xmm9,%xmm5,%xmm4 + vpslld $0x10,%xmm4,%xmm13 + vpinsrb $0x3,0x7(%rdx,%rdi,1),%xmm7,%xmm15 + vpaddd %xmm13,%xmm10,%xmm10 + vpinsrb $0x3,0x3(%rdx,%rdi,1),%xmm1,%xmm13 + movzbl 0x7(%r8),%edx + movzbl 0x3(%r8),%r10d + vpmovzxbd %xmm15,%xmm15 + vmovd %edx,%xmm5 + vpinsrb $0x1,0x7(%rcx,%r8,1),%xmm5,%xmm9 + vmovd %r10d,%xmm0 + vpinsrb $0x1,0x3(%rcx,%r8,1),%xmm0,%xmm6 + vpinsrb $0x2,0x7(%r8,%rcx,2),%xmm9,%xmm4 + vpinsrb $0x2,0x3(%r8,%rcx,2),%xmm6,%xmm2 + vpinsrb $0x3,0x7(%rax,%r8,1),%xmm4,%xmm1 + vpinsrb $0x3,0x3(%rax,%r8,1),%xmm2,%xmm14 + vpmovzxbd %xmm1,%xmm1 + vpmovzxbd %xmm13,%xmm13 + vpsubd %xmm1,%xmm15,%xmm0 + vpmovzxbd %xmm14,%xmm14 + vpslld $0x10,%xmm0,%xmm2 + vpsubd %xmm14,%xmm13,%xmm6 + vpsubd %xmm11,%xmm12,%xmm1 + vpaddd %xmm2,%xmm6,%xmm8 + vpaddd %xmm8,%xmm10,%xmm12 + vpsubd %xmm8,%xmm10,%xmm0 + vpaddd %xmm12,%xmm3,%xmm8 + vpaddd %xmm0,%xmm1,%xmm7 + vpsubd %xmm12,%xmm3,%xmm3 + vpsubd %xmm0,%xmm1,%xmm5 + vunpcklps %xmm7,%xmm8,%xmm6 + vunpcklps %xmm5,%xmm3,%xmm2 + vunpckhps %xmm7,%xmm8,%xmm9 + vunpckhps %xmm5,%xmm3,%xmm4 + vunpcklpd %xmm2,%xmm6,%xmm10 + vunpckhpd %xmm2,%xmm6,%xmm11 + vunpcklpd %xmm4,%xmm9,%xmm12 + vpaddd %xmm11,%xmm10,%xmm14 + vunpckhpd %xmm4,%xmm9,%xmm13 + vpsubd %xmm11,%xmm10,%xmm1 + vpaddd %xmm13,%xmm12,%xmm15 + vpsubd %xmm13,%xmm12,%xmm0 + vpaddd %xmm15,%xmm14,%xmm9 + vpaddd %xmm0,%xmm1,%xmm7 + vpsubd %xmm15,%xmm14,%xmm8 + vpsubd %xmm0,%xmm1,%xmm6 + vmovdqu 0x279d68(%rip),%xmm15 + vpsrld $0xf,%xmm9,%xmm2 + vpsrld $0xf,%xmm7,%xmm10 + vpand %xmm15,%xmm2,%xmm3 + vmovdqu 0x279d40(%rip),%xmm4 + vpand %xmm15,%xmm10,%xmm11 + vpsrld $0xf,%xmm8,%xmm12 + vpsrld $0xf,%xmm6,%xmm14 + vpmulld %xmm3,%xmm4,%xmm5 + vpand %xmm15,%xmm12,%xmm13 + vpmulld %xmm11,%xmm4,%xmm3 + vpand %xmm15,%xmm14,%xmm1 + vpmulld %xmm13,%xmm4,%xmm2 + vpaddd %xmm3,%xmm7,%xmm7 + vpmulld %xmm1,%xmm4,%xmm0 + vpaddd %xmm5,%xmm9,%xmm4 + vpxor %xmm5,%xmm4,%xmm5 + vpxor %xmm3,%xmm7,%xmm9 + vpaddd %xmm2,%xmm8,%xmm8 + vpaddd %xmm9,%xmm5,%xmm3 + vpxor %xmm2,%xmm8,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpaddd %xmm2,%xmm3,%xmm4 + vpxor %xmm0,%xmm6,%xmm0 + vpaddd %xmm0,%xmm4,%xmm2 + vpxor %xmm1,%xmm1,%xmm1 + vpaddd %xmm2,%xmm1,%xmm1 + vpsrldq $0x8,%xmm1,%xmm3 + vpaddd %xmm3,%xmm1,%xmm5 + vpsrlq $0x20,%xmm5,%xmm6 + vpaddd %xmm6,%xmm5,%xmm7 + vmovd %xmm7,%ecx + movzwl %cx,%eax + shr $0x10,%ecx + add %ecx,%eax + shr %eax + retq +# OSACA-END diff --git a/tests/test_semantics.py b/tests/test_semantics.py index d3f5ef1..46c58d6 100755 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -5,15 +5,14 @@ Unit tests for Semantic Analysis import os import unittest +import time from copy import deepcopy import networkx as nx - from osaca.osaca import get_unmatched_instruction_ratio from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT -from osaca.semantics import ( - INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section, ISASemantics -) +from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics, + KernelDG, MachineModel, reduce_to_section) class TestSemanticTools(unittest.TestCase): @@ -30,6 +29,8 @@ class TestSemanticTools(unittest.TestCase): cls.code_x86 = f.read() with open(cls._find_file("kernel_x86_memdep.s")) as f: cls.code_x86_memdep = f.read() + with open(cls._find_file("kernel_x86_long_LCD.s")) as f: + cls.code_x86_long_LCD = f.read() with open(cls._find_file("kernel_aarch64_memdep.s")) as f: cls.code_aarch64_memdep = f.read() with open(cls._find_file("kernel_aarch64.s")) as f: @@ -38,13 +39,20 @@ class TestSemanticTools(unittest.TestCase): cls.code_AArch64_SVE = f.read() cls.kernel_x86 = reduce_to_section(cls.parser_x86.parse_file(cls.code_x86), "x86") cls.kernel_x86_memdep = reduce_to_section( - cls.parser_x86.parse_file(cls.code_x86_memdep), "x86") + cls.parser_x86.parse_file(cls.code_x86_memdep), "x86" + ) + cls.kernel_x86_long_LCD = reduce_to_section( + cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86" + ) cls.kernel_AArch64 = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64") + cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64" + ) cls.kernel_aarch64_memdep = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64") + cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64" + ) cls.kernel_aarch64_SVE = reduce_to_section( - cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64") + cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64" + ) # set up machine models cls.machine_model_csx = MachineModel( @@ -77,6 +85,9 @@ class TestSemanticTools(unittest.TestCase): for i in range(len(cls.kernel_x86_memdep)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i]) + for i in range(len(cls.kernel_x86_long_LCD)): + cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i]) + cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i]) for i in range(len(cls.kernel_AArch64)): cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i]) @@ -298,8 +309,9 @@ class TestSemanticTools(unittest.TestCase): dg.export_graph(filepath="/dev/null") def test_memdependency_x86(self): - dg = KernelDG(self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, - self.semantics_csx) + dg = KernelDG( + self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx + ) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8}) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12}) @@ -309,8 +321,9 @@ class TestSemanticTools(unittest.TestCase): dg.export_graph(filepath="/dev/null") def test_kernelDG_AArch64(self): - dg = KernelDG(self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, - self.semantics_tx2) + dg = KernelDG( + self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2 + ) self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg)) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8}) self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=4)), {9, 10}) @@ -334,10 +347,14 @@ class TestSemanticTools(unittest.TestCase): dg.get_dependent_instruction_forms() # test dot creation dg.export_graph(filepath="/dev/null") - + def test_kernelDG_SVE(self): - dg = KernelDG(self.kernel_aarch64_SVE, self.parser_AArch64, self.machine_model_a64fx, - self.semantics_a64fx) + KernelDG( + self.kernel_aarch64_SVE, + self.parser_AArch64, + self.machine_model_a64fx, + self.semantics_a64fx, + ) # TODO check for correct analysis def test_hidden_load(self): @@ -372,14 +389,20 @@ class TestSemanticTools(unittest.TestCase): dg.get_loopcarried_dependencies() def test_loop_carried_dependency_aarch64(self): - dg = KernelDG(self.kernel_aarch64_memdep, self.parser_AArch64, self.machine_model_tx2, - self.semantics_tx2) + dg = KernelDG( + self.kernel_aarch64_memdep, + self.parser_AArch64, + self.machine_model_tx2, + self.semantics_tx2, + ) lc_deps = dg.get_loopcarried_dependencies() self.assertEqual(len(lc_deps), 2) # based on line 6 self.assertEqual(lc_deps[6]["latency"], 28.0) - self.assertEqual([(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']], - [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)]) + self.assertEqual( + [(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']], + [(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)], + ) def test_loop_carried_dependency_x86(self): lcd_id = 8 @@ -394,7 +417,7 @@ class TestSemanticTools(unittest.TestCase): self.assertEqual(len(lc_deps[lcd_id]["dependencies"]), 1) self.assertEqual( lc_deps[lcd_id]["dependencies"][0][0], - dg.dg.nodes(data=True)[lcd_id]["instruction_form"] + dg.dg.nodes(data=True)[lcd_id]["instruction_form"], ) # w/ flag dependencies: ID 9 w/ len=2 # w/o flag dependencies: ID 5 w/ len=1 @@ -408,6 +431,31 @@ class TestSemanticTools(unittest.TestCase): dg.dg.nodes(data=True)[lcd_id2]["instruction_form"], ) + def test_timeout_during_loop_carried_dependency(self): + start_time = time.perf_counter() + KernelDG( + self.kernel_x86_long_LCD, + self.parser_x86, + self.machine_model_csx, + self.semantics_x86, + timeout=10 + ) + end_time = time.perf_counter() + time_10 = end_time - start_time + start_time = time.perf_counter() + KernelDG( + self.kernel_x86_long_LCD, + self.parser_x86, + self.machine_model_csx, + self.semantics_x86, + timeout=2 + ) + end_time = time.perf_counter() + time_2 = end_time - start_time + self.assertTrue(time_10 > 10) + self.assertTrue(2 < time_2) + self.assertTrue(time_2 < (time_10 - 7)) + def test_is_read_is_written_x86(self): # independent form HW model dag = KernelDG(self.kernel_x86, self.parser_x86, None, None) @@ -440,7 +488,6 @@ class TestSemanticTools(unittest.TestCase): self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1)) self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2)) self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm)) - def test_is_read_is_written_AArch64(self): # independent form HW model