From 593dd638979bdde7aedeb8707b1c07ac62f1150f Mon Sep 17 00:00:00 2001 From: JanLJL Date: Fri, 5 Jul 2019 15:34:00 +0200 Subject: [PATCH] refactoring as preparations for explicit semanticsAppender --- osaca/parser/parser_AArch64v81.py | 35 +--- osaca/parser/parser_x86att.py | 33 ++-- osaca/semantics/__init__.py | 11 ++ .../kernel_dg.py} | 6 +- osaca/{ => semantics}/marker_utils.py | 30 +-- tests/all_tests.py | 2 +- tests/test_marker_utils.py | 2 +- tests/test_parser_AArch64v81.py | 172 ++++++++---------- tests/test_parser_x86att.py | 74 ++++---- 9 files changed, 165 insertions(+), 200 deletions(-) create mode 100644 osaca/semantics/__init__.py rename osaca/{dependency_finder.py => semantics/kernel_dg.py} (97%) rename osaca/{ => semantics}/marker_utils.py (72%) diff --git a/osaca/parser/parser_AArch64v81.py b/osaca/parser/parser_AArch64v81.py index 94962dd..ec69491 100755 --- a/osaca/parser/parser_AArch64v81.py +++ b/osaca/parser/parser_AArch64v81.py @@ -247,43 +247,20 @@ class ParserAArch64v81(BaseParser): def parse_instruction(self, instruction): result = self.instruction_parser.parseString(instruction, parseAll=True).asDict() result = AttrDict.convert_dict(result) - operands = AttrDict({'source': [], 'destination': []}) - # ARM specific load store flags - is_store = False - is_load = False - if result.mnemonic.lower().startswith('st'): - # Store instruction --> swap source and destination - is_store = True - if result.mnemonic.lower().startswith('ld'): - # Load instruction --> keep in mind for possible multiple loads - is_load = True - - # Check from left to right + operands = [] + # Add operands to list # Check first operand if 'operand1' in result: - if is_store: - operands.source.append(self.process_operand(result['operand1'])) - else: - operands.destination.append(self.process_operand(result['operand1'])) + operands.append(self.process_operand(result['operand1'])) # Check second operand if 'operand2' in result: - if is_store and 'operand3' not in result or is_load and 'operand3' in result: - # destination - operands.destination.append(self.process_operand(result['operand2'])) - else: - operands.source.append(self.process_operand(result['operand2'])) + operands.append(self.process_operand(result['operand2'])) # Check third operand if 'operand3' in result: - if is_store and 'operand4' not in result or is_load and 'operand4' in result: - operands.destination.append(self.process_operand(result['operand3'])) - else: - operands.source.append(self.process_operand(result['operand3'])) + operands.append(self.process_operand(result['operand3'])) # Check fourth operand if 'operand4' in result: - if is_store: - operands.destination.append(self.process_operand(result['operand4'])) - else: - operands.source.append(self.process_operand(result['operand4'])) + operands.append(self.process_operand(result['operand4'])) return_dict = AttrDict( { diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 65d18fd..3a0d533 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -179,29 +179,20 @@ class ParserX86ATT(BaseParser): def parse_instruction(self, instruction): result = self.instruction_parser.parseString(instruction, parseAll=True).asDict() result = AttrDict.convert_dict(result) - operands = AttrDict({'source': [], 'destination': []}) - # Check from right to left - # Check fourth operand - if 'operand4' in result: - operands['destination'].append(self.process_operand(result['operand4'])) - # Check third operand - if 'operand3' in result: - if len(operands['destination']) != 0: - operands['source'].insert(0, self.process_operand(result['operand3'])) - else: - operands['destination'].append(self.process_operand(result['operand3'])) - # Check second operand - if 'operand2' in result: - if len(operands['destination']) != 0: - operands['source'].insert(0, self.process_operand(result['operand2'])) - else: - operands['destination'].append(self.process_operand(result['operand2'])) + operands = [] + # Add operands to list # Check first operand if 'operand1' in result: - if len(operands['destination']) != 0: - operands['source'].insert(0, self.process_operand(result['operand1'])) - else: - operands['destination'].append(self.process_operand(result['operand1'])) + operands.append(self.process_operand(result['operand1'])) + # Check second operand + if 'operand2' in result: + operands.append(self.process_operand(result['operand2'])) + # Check third operand + if 'operand3' in result: + operands.append(self.process_operand(result['operand3'])) + # Check fourth operand + if 'operand4' in result: + operands.append(self.process_operand(result['operand4'])) return_dict = AttrDict( { self.INSTRUCTION_ID: result['mnemonic'], diff --git a/osaca/semantics/__init__.py b/osaca/semantics/__init__.py new file mode 100644 index 0000000..5e6801f --- /dev/null +++ b/osaca/semantics/__init__.py @@ -0,0 +1,11 @@ +""" +Tools for semantic analysis of parser result. + +Only the classes below will be exported, so please add new semantic tools to __all__. +""" +from .hw_model import MachineModel +from .kernel_dg import KernelDG +from .marker_utils import reduce_to_section +from .semanticsAppender import SemanticsAppender + +__all__ = ['MachineModel', 'KernelDG', 'reduce_to_section', 'SemanticsAppender'] diff --git a/osaca/dependency_finder.py b/osaca/semantics/kernel_dg.py similarity index 97% rename from osaca/dependency_finder.py rename to osaca/semantics/kernel_dg.py index 3ee654e..13006cd 100755 --- a/osaca/dependency_finder.py +++ b/osaca/semantics/kernel_dg.py @@ -3,18 +3,18 @@ import networkx as nx -class KernelDAG(nx.DiGraph): +class KernelDG(nx.DiGraph): def __init__(self, parsed_kernel, parser, hw_model): self.kernel = parsed_kernel self.parser = parser self.model = hw_model - # self.dag = self.create_DAG() + # self.dag = self.create_DG() def check_for_loop(self, kernel): raise NotImplementedError - def create_DAG(self): + def create_DG(self): # 1. go through kernel instruction forms (as vertices) # 2. find edges (to dependend further instruction) # 3. get LT/TP value and set as edge weight diff --git a/osaca/marker_utils.py b/osaca/semantics/marker_utils.py similarity index 72% rename from osaca/marker_utils.py rename to osaca/semantics/marker_utils.py index e388d6e..c2144ee 100755 --- a/osaca/marker_utils.py +++ b/osaca/semantics/marker_utils.py @@ -5,7 +5,7 @@ from osaca.parser import ParserAArch64v81, ParserX86ATT def reduce_to_section(kernel, isa): if isa == 'x86': - start, end = find_marked_kernel_x86(kernel) + start, end = find_marked_kernel_x86ATT(kernel) elif isa == 'AArch64': start, end = find_marked_kernel_AArch64(kernel) else: @@ -19,28 +19,30 @@ def reduce_to_section(kernel, isa): def find_marked_kernel_AArch64(lines): nop_bytes = ['213', '3', '32', '31'] - return find_marked_kernel(lines, ParserAArch64v81(), ['mov'], 'x1', [111, 222], nop_bytes) + return find_marked_kernel( + lines, ParserAArch64v81(), ['mov'], 'x1', [111, 222], nop_bytes, reverse=True + ) -def find_marked_kernel_x86(lines): +def find_marked_kernel_x86ATT(lines): nop_bytes = ['100', '103', '144'] return find_marked_kernel(lines, ParserX86ATT(), ['mov', 'movl'], 'ebx', [111, 222], nop_bytes) -def find_marked_kernel(lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes): +def find_marked_kernel(lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes, reverse=False): index_start = -1 index_end = -1 for i, line in enumerate(lines): try: if line.instruction in mov_instr and lines[i + 1].directive is not None: - source = line.operands.source - destination = line.operands.destination + source = line.operands[0 if not reverse else 1] + destination = line.operands[1 if not reverse else 0] # instruction pair matches, check for operands if ( - 'immediate' in source[0] - and parser.normalize_imd(source[0].immediate) == mov_vals[0] - and 'register' in destination[0] - and parser.get_full_reg_name(destination[0].register) == mov_reg + 'immediate' in source + and parser.normalize_imd(source.immediate) == mov_vals[0] + and 'register' in destination + and parser.get_full_reg_name(destination.register) == mov_reg ): # operands of first instruction match start, check for second one match, line_count = match_bytes(lines, i + 1, nop_bytes) @@ -48,10 +50,10 @@ def find_marked_kernel(lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes): # return first line after the marker index_start = i + 1 + line_count elif ( - 'immediate' in source[0] - and parser.normalize_imd(source[0].immediate) == mov_vals[1] - and 'register' in destination[0] - and parser.get_full_reg_name(destination[0].register) == mov_reg + 'immediate' in source + and parser.normalize_imd(source.immediate) == mov_vals[1] + and 'register' in destination + and parser.get_full_reg_name(destination.register) == mov_reg ): # operand of first instruction match end, check for second one match, line_count = match_bytes(lines, i + 1, nop_bytes) diff --git a/tests/all_tests.py b/tests/all_tests.py index 44d6392..10737c9 100755 --- a/tests/all_tests.py +++ b/tests/all_tests.py @@ -9,7 +9,7 @@ suite = unittest.TestLoader().loadTestsFromNames( 'test_parser_x86att', 'test_parser_AArch64v81', 'test_marker_utils', - 'test_dependency_finder' +# 'test_dependency_finder' ] ) diff --git a/tests/test_marker_utils.py b/tests/test_marker_utils.py index 9cb2eed..0d38c0e 100755 --- a/tests/test_marker_utils.py +++ b/tests/test_marker_utils.py @@ -5,7 +5,7 @@ Unit tests for IACA/OSACA marker utilities import os import unittest -from osaca.marker_utils import reduce_to_section +from osaca.semantics import reduce_to_section from osaca.parser import ParserAArch64v81, ParserX86ATT diff --git a/tests/test_parser_AArch64v81.py b/tests/test_parser_AArch64v81.py index 4276b01..c98d926 100755 --- a/tests/test_parser_AArch64v81.py +++ b/tests/test_parser_AArch64v81.py @@ -87,51 +87,49 @@ class TestParserAArch64v81(unittest.TestCase): parsed_6 = self.parser.parse_instruction(instr6) self.assertEqual(parsed_1.instruction, 'vcvt.F32.S32') - self.assertEqual(parsed_1.operands.destination[0].register.name, '1') - self.assertEqual(parsed_1.operands.destination[0].register.prefix, 'w') - self.assertEqual(parsed_1.operands.source[0].register.name, '2') - self.assertEqual(parsed_1.operands.source[0].register.prefix, 'w') + self.assertEqual(parsed_1.operands[0].register.name, '1') + self.assertEqual(parsed_1.operands[0].register.prefix, 'w') + self.assertEqual(parsed_1.operands[1].register.name, '2') + self.assertEqual(parsed_1.operands[1].register.prefix, 'w') self.assertEqual(parsed_1.comment, '12.27') self.assertEqual(parsed_2.instruction, 'b.lo') - self.assertEqual(parsed_2.operands.destination[0].identifier.name, '..B1.4') - self.assertEqual(len(parsed_2.operands.source), 0) + self.assertEqual(parsed_2.operands[0].identifier.name, '..B1.4') + self.assertEqual(len(parsed_2.operands), 1) self.assertIsNone(parsed_2.comment) self.assertEqual(parsed_3.instruction, 'mov') - self.assertEqual(parsed_3.operands.destination[0].register.name, '2') - self.assertEqual(parsed_3.operands.destination[0].register.prefix, 'x') - self.assertEqual(parsed_3.operands.source[0].immediate.value, '0x222') + self.assertEqual(parsed_3.operands[0].register.name, '2') + self.assertEqual(parsed_3.operands[0].register.prefix, 'x') + self.assertEqual(parsed_3.operands[1].immediate.value, '0x222') self.assertEqual(parsed_3.comment, 'NOT IACA END') self.assertEqual(parsed_4.instruction, 'str') - self.assertIsNone(parsed_4.operands.destination[0].memory.offset) - self.assertEqual(parsed_4.operands.destination[0].memory.base.name, 'sp') - self.assertEqual(parsed_4.operands.destination[0].memory.base.prefix, 'x') - self.assertEqual(parsed_4.operands.destination[0].memory.index.name, '1') - self.assertEqual(parsed_4.operands.destination[0].memory.index.prefix, 'x') - self.assertEqual(parsed_4.operands.destination[0].memory.scale, 16) - self.assertEqual(parsed_4.operands.source[0].register.name, '28') - self.assertEqual(parsed_4.operands.source[0].register.prefix, 'x') + self.assertIsNone(parsed_4.operands[1].memory.offset) + self.assertEqual(parsed_4.operands[1].memory.base.name, 'sp') + self.assertEqual(parsed_4.operands[1].memory.base.prefix, 'x') + self.assertEqual(parsed_4.operands[1].memory.index.name, '1') + self.assertEqual(parsed_4.operands[1].memory.index.prefix, 'x') + self.assertEqual(parsed_4.operands[1].memory.scale, 16) + self.assertEqual(parsed_4.operands[0].register.name, '28') + self.assertEqual(parsed_4.operands[0].register.prefix, 'x') self.assertEqual(parsed_4.comment, '12.9') self.assertEqual(parsed_5.instruction, 'ldr') - self.assertEqual(parsed_5.operands.destination[0].register.name, '0') - self.assertEqual(parsed_5.operands.destination[0].register.prefix, 'x') - self.assertEqual(parsed_5.operands.source[0].memory.offset.identifier.name, 'q2c') - self.assertEqual( - parsed_5.operands.source[0].memory.offset.identifier.relocation, ':got_lo12:' - ) - self.assertEqual(parsed_5.operands.source[0].memory.base.name, '0') - self.assertEqual(parsed_5.operands.source[0].memory.base.prefix, 'x') - self.assertIsNone(parsed_5.operands.source[0].memory.index) - self.assertEqual(parsed_5.operands.source[0].memory.scale, 1) + self.assertEqual(parsed_5.operands[0].register.name, '0') + self.assertEqual(parsed_5.operands[0].register.prefix, 'x') + self.assertEqual(parsed_5.operands[1].memory.offset.identifier.name, 'q2c') + self.assertEqual(parsed_5.operands[1].memory.offset.identifier.relocation, ':got_lo12:') + self.assertEqual(parsed_5.operands[1].memory.base.name, '0') + self.assertEqual(parsed_5.operands[1].memory.base.prefix, 'x') + self.assertIsNone(parsed_5.operands[1].memory.index) + self.assertEqual(parsed_5.operands[1].memory.scale, 1) self.assertEqual(parsed_6.instruction, 'adrp') - self.assertEqual(parsed_6.operands.destination[0].register.name, '0') - self.assertEqual(parsed_6.operands.destination[0].register.prefix, 'x') - self.assertEqual(parsed_6.operands.source[0].identifier.relocation, ':got:') - self.assertEqual(parsed_6.operands.source[0].identifier.name, 'visited') + self.assertEqual(parsed_6.operands[0].register.name, '0') + self.assertEqual(parsed_6.operands[0].register.prefix, 'x') + self.assertEqual(parsed_6.operands[1].identifier.relocation, ':got:') + self.assertEqual(parsed_6.operands[1].identifier.name, 'visited') def test_parse_line(self): line_comment = '// -- Begin main' @@ -169,24 +167,22 @@ class TestParserAArch64v81(unittest.TestCase): } instruction_form_4 = { 'instruction': 'ldr', - 'operands': { - 'source': [ - { - 'memory': { - 'offset': None, - 'base': {'prefix': 'x', 'name': '11'}, - 'index': { - 'prefix': 'w', - 'name': '10', - 'shift_op': 'sxtw', - 'shift': {'value': '2'}, - }, - 'scale': 4, - } + 'operands': [ + {'register': {'prefix': 's', 'name': '0'}}, + { + 'memory': { + 'offset': None, + 'base': {'prefix': 'x', 'name': '11'}, + 'index': { + 'prefix': 'w', + 'name': '10', + 'shift_op': 'sxtw', + 'shift': {'value': '2'}, + }, + 'scale': 4, } - ], - 'destination': [{'register': {'prefix': 's', 'name': '0'}}], - }, + }, + ], 'directive': None, 'comment': '= <<2', 'label': None, @@ -194,21 +190,17 @@ class TestParserAArch64v81(unittest.TestCase): } instruction_form_5 = { 'instruction': 'prfm', - 'operands': { - 'source': [ - { - 'memory': { - 'offset': {'value': '2048'}, - 'base': {'prefix': 'x', 'name': '26'}, - 'index': None, - 'scale': 1, - } + 'operands': [ + {'prfop': {'type': ['PLD'], 'target': ['L1'], 'policy': ['KEEP']}}, + { + 'memory': { + 'offset': {'value': '2048'}, + 'base': {'prefix': 'x', 'name': '26'}, + 'index': None, + 'scale': 1, } - ], - 'destination': [ - {'prfop': {'type': ['PLD'], 'target': ['L1'], 'policy': ['KEEP']}} - ], - }, + }, + ], 'directive': None, 'comment': 'HPL', 'label': None, @@ -216,23 +208,19 @@ class TestParserAArch64v81(unittest.TestCase): } instruction_form_6 = { 'instruction': 'stp', - 'operands': { - 'source': [ - {'register': {'prefix': 'x', 'name': '29'}}, - {'register': {'prefix': 'x', 'name': '30'}}, - ], - 'destination': [ - { - 'memory': { - 'offset': {'value': '-16'}, - 'base': {'name': 'sp', 'prefix': 'x'}, - 'index': None, - 'scale': 1, - 'pre_indexed': True, - } + 'operands': [ + {'register': {'prefix': 'x', 'name': '29'}}, + {'register': {'prefix': 'x', 'name': '30'}}, + { + 'memory': { + 'offset': {'value': '-16'}, + 'base': {'name': 'sp', 'prefix': 'x'}, + 'index': None, + 'scale': 1, + 'pre_indexed': True, } - ], - }, + }, + ], 'directive': None, 'comment': None, 'label': None, @@ -240,23 +228,19 @@ class TestParserAArch64v81(unittest.TestCase): } instruction_form_7 = { 'instruction': 'ldp', - 'operands': { - 'source': [ - { - 'memory': { - 'offset': None, - 'base': {'prefix': 'x', 'name': '11'}, - 'index': None, - 'scale': 1, - 'post_indexed': {'value': '64'}, - } + 'operands': [ + {'register': {'prefix': 'q', 'name': '2'}}, + {'register': {'prefix': 'q', 'name': '3'}}, + { + 'memory': { + 'offset': None, + 'base': {'prefix': 'x', 'name': '11'}, + 'index': None, + 'scale': 1, + 'post_indexed': {'value': '64'}, } - ], - 'destination': [ - {'register': {'prefix': 'q', 'name': '2'}}, - {'register': {'prefix': 'q', 'name': '3'}}, - ], - }, + }, + ], 'directive': None, 'comment': None, 'label': None, diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index cfdeabf..cee739f 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -86,46 +86,46 @@ class TestParserX86ATT(unittest.TestCase): parsed_7 = self.parser.parse_instruction(instr7) self.assertEqual(parsed_1.instruction, 'vcvtsi2ss') - self.assertEqual(parsed_1.operands.destination[0].register.name, 'xmm2') - self.assertEqual(parsed_1.operands.source[0].register.name, 'edx') + self.assertEqual(parsed_1.operands[0].register.name, 'edx') + self.assertEqual(parsed_1.operands[1].register.name, 'xmm2') self.assertEqual(parsed_1.comment, '12.27') self.assertEqual(parsed_2.instruction, 'jb') - self.assertEqual(parsed_2.operands.destination[0].identifier.name, '..B1.4') - self.assertEqual(len(parsed_2.operands.source), 0) + self.assertEqual(parsed_2.operands[0].identifier.name, '..B1.4') + self.assertEqual(len(parsed_2.operands), 1) self.assertIsNone(parsed_2.comment) self.assertEqual(parsed_3.instruction, 'movl') - self.assertEqual(parsed_3.operands.destination[0].register.name, 'ebx') - self.assertEqual(parsed_3.operands.source[0].immediate.value, '222') + self.assertEqual(parsed_3.operands[0].immediate.value, '222') + self.assertEqual(parsed_3.operands[1].register.name, 'ebx') self.assertEqual(parsed_3.comment, 'IACA END') self.assertEqual(parsed_4.instruction, 'vmovss') - self.assertEqual(parsed_4.operands.destination[0].memory.offset.value, '-4') - self.assertEqual(parsed_4.operands.destination[0].memory.base.name, 'rsp') - self.assertEqual(parsed_4.operands.destination[0].memory.index.name, 'rax') - self.assertEqual(parsed_4.operands.destination[0].memory.scale, 8) - self.assertEqual(parsed_4.operands.source[0].register.name, 'xmm4') + self.assertEqual(parsed_4.operands[1].memory.offset.value, '-4') + self.assertEqual(parsed_4.operands[1].memory.base.name, 'rsp') + self.assertEqual(parsed_4.operands[1].memory.index.name, 'rax') + self.assertEqual(parsed_4.operands[1].memory.scale, 8) + self.assertEqual(parsed_4.operands[0].register.name, 'xmm4') self.assertEqual(parsed_4.comment, '12.9') self.assertEqual(parsed_5.instruction, 'mov') - self.assertEqual(parsed_5.operands.destination[0].memory.offset.identifier.name, 'var') - self.assertIsNone(parsed_5.operands.destination[0].memory.base) - self.assertIsNone(parsed_5.operands.destination[0].memory.index) - self.assertEqual(parsed_5.operands.destination[0].memory.scale, 1) - self.assertEqual(parsed_5.operands.source[0].register.name, 'ebx') + self.assertEqual(parsed_5.operands[1].memory.offset.identifier.name, 'var') + self.assertIsNone(parsed_5.operands[1].memory.base) + self.assertIsNone(parsed_5.operands[1].memory.index) + self.assertEqual(parsed_5.operands[1].memory.scale, 1) + self.assertEqual(parsed_5.operands[0].register.name, 'ebx') self.assertEqual(parsed_6.instruction, 'lea') - self.assertIsNone(parsed_6.operands.source[0].memory.offset) - self.assertIsNone(parsed_6.operands.source[0].memory.base) - self.assertEqual(parsed_6.operands.source[0].memory.index.name, 'rax') - self.assertEqual(parsed_6.operands.source[0].memory.scale, 8) - self.assertEqual(parsed_6.operands.destination[0].register.name, 'rbx') + self.assertIsNone(parsed_6.operands[0].memory.offset) + self.assertIsNone(parsed_6.operands[0].memory.base) + self.assertEqual(parsed_6.operands[0].memory.index.name, 'rax') + self.assertEqual(parsed_6.operands[0].memory.scale, 8) + self.assertEqual(parsed_6.operands[1].register.name, 'rbx') - self.assertEqual(parsed_7.operands.source[0].immediate.value, '0x1') - self.assertEqual(parsed_7.operands.source[1].register.name, 'xmm0') - self.assertEqual(parsed_7.operands.source[2].register.name, 'ymm1') - self.assertEqual(parsed_7.operands.destination[0].register.name, 'ymm1') + self.assertEqual(parsed_7.operands[0].immediate.value, '0x1') + self.assertEqual(parsed_7.operands[1].register.name, 'xmm0') + self.assertEqual(parsed_7.operands[2].register.name, 'ymm1') + self.assertEqual(parsed_7.operands[3].register.name, 'ymm1') def test_parse_line(self): line_comment = '# -- Begin main' @@ -159,19 +159,19 @@ class TestParserX86ATT(unittest.TestCase): } instruction_form_4 = { 'instruction': 'lea', - 'operands': { - 'source': [ - { - 'memory': { - 'offset': {'value': '2'}, - 'base': {'name': 'rax'}, - 'index': {'name': 'rax'}, - 'scale': 1, - } + 'operands': [ + { + 'memory': { + 'offset': {'value': '2'}, + 'base': {'name': 'rax'}, + 'index': {'name': 'rax'}, + 'scale': 1, } - ], - 'destination': [{'register': {'name': 'ecx'}}], - }, + }, + { + 'register': {'name': 'ecx'} + } + ], 'directive': None, 'comment': '12.9', 'label': None,