From 0925af21a0e89d5269bc5a0ea85ccb8c97eda9be Mon Sep 17 00:00:00 2001 From: JanLJL Date: Fri, 24 May 2019 15:10:02 +0200 Subject: [PATCH] finished ARM parser and added tests --- osaca/parser/__init__.py | 4 +- osaca/parser/parser_AArch64v81.py | 364 ++++++++++ osaca/parser/parser_ARMv81.py | 11 - osaca/parser/parser_x86att.py | 65 +- tests/all_tests.py | 3 +- tests/test_files/triad-arm-iaca.s | 645 ++++++++++++++++++ .../{triad-iaca.s => triad-x86-iaca.s} | 0 tests/test_parser_AArch64v81.py | 243 +++++++ tests/test_parser_x86att.py | 81 ++- 9 files changed, 1349 insertions(+), 67 deletions(-) create mode 100755 osaca/parser/parser_AArch64v81.py delete mode 100755 osaca/parser/parser_ARMv81.py create mode 100644 tests/test_files/triad-arm-iaca.s rename tests/test_files/{triad-iaca.s => triad-x86-iaca.s} (100%) create mode 100755 tests/test_parser_AArch64v81.py diff --git a/osaca/parser/__init__.py b/osaca/parser/__init__.py index 9d550cf..a5aa87c 100644 --- a/osaca/parser/__init__.py +++ b/osaca/parser/__init__.py @@ -4,6 +4,6 @@ Collection of parsers supported by OSACA. Only the parser below will be exported, so please add new parsers to __all__. """ from .parser_x86att import ParserX86ATT -from .parser_ARMv81 import ParserARMv81 +from .parser_AArch64v81 import ParserAArch64v81 -__all__ = ['ParserX86ATT', 'ParserARMv81'] +__all__ = ['ParserX86ATT', 'ParserAArch64v81'] diff --git a/osaca/parser/parser_AArch64v81.py b/osaca/parser/parser_AArch64v81.py new file mode 100755 index 0000000..57cfba4 --- /dev/null +++ b/osaca/parser/parser_AArch64v81.py @@ -0,0 +1,364 @@ +#!/usr/bin/env python3 + + +import pyparsing as pp + +from .base_parser import BaseParser + + +class ParserAArch64v81(BaseParser): + def __init__(self): + super().__init__() + + def construct_parser(self): + # Comment + symbol_comment = '//' + self.comment = pp.Literal(symbol_comment) + pp.Group( + pp.ZeroOrMore(pp.Word(pp.printables)) + ).setResultsName(self.COMMENT_ID) + # Define ARM assembly identifier + relocation = pp.Combine(pp.Literal(':') + pp.Word(pp.alphanums + '_') + pp.Literal(':')) + first = pp.Word(pp.alphas + '_.', exact=1) + rest = pp.Word(pp.alphanums + '_.') + identifier = pp.Group( + pp.Optional(relocation).setResultsName('relocation') + + pp.Combine(first + pp.Optional(rest)).setResultsName('name') + ).setResultsName('identifier') + # Label + self.label = pp.Group( + identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment) + ).setResultsName(self.LABEL_ID) + # Directive + decimal_number = pp.Combine( + pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) + ).setResultsName('value') + hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value') + directive_option = pp.Combine( + pp.Word(pp.alphas + '#@.%', exact=1) + + pp.Optional(pp.Word(pp.printables, excludeChars=',')) + ) + directive_parameter = ( + pp.quotedString | directive_option | identifier | hex_number | decimal_number + ) + commaSeparatedList = pp.delimitedList(pp.Optional(directive_parameter), delim=',') + self.directive = pp.Group( + pp.Literal('.') + + pp.Word(pp.alphanums + '_').setResultsName('name') + + commaSeparatedList.setResultsName('parameters') + + pp.Optional(self.comment) + ).setResultsName(self.DIRECTIVE_ID) + + ############################## + # Instructions + # Mnemonic + # (?P[a-zA-Z][a-zA-Z0-9]*)(?PS?)(P?.[a-zA-Z]{2}) + mnemonic = pp.Word(pp.alphanums + '.').setResultsName('mnemonic') + # Immediate: + # int: ^-?[0-9]+ | hex: ^0x[0-9a-fA-F]+ | fp: ^[0-9]{1}.[0-9]+[eE]{1}[\+-]{1}[0-9]+[fF]? + symbol_immediate = '#' + mantissa = pp.Combine( + pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) + pp.Literal('.') + pp.Word(pp.nums) + ).setResultsName('mantissa') + exponent = ( + pp.CaselessLiteral('e') + + pp.Word('+-').setResultsName('e_sign') + + pp.Word(pp.nums).setResultsName('exponent') + ) + float_ = pp.Group( + mantissa + pp.Optional(exponent) + pp.CaselessLiteral('f') + ).setResultsName('float') + double_ = pp.Group(mantissa + pp.Optional(exponent)).setResultsName('double') + immediate = pp.Group( + pp.Optional(pp.Literal(symbol_immediate)) + + (hex_number ^ decimal_number ^ float_ ^ double_) + | (pp.Optional(pp.Literal(symbol_immediate)) + identifier) + ).setResultsName(self.IMMEDIATE_ID) + shift_op = ( + pp.CaselessLiteral('lsl') + ^ pp.CaselessLiteral('lsr') + ^ pp.CaselessLiteral('asr') + ^ pp.CaselessLiteral('ror') + ^ pp.CaselessLiteral('sxtw') + ^ pp.CaselessLiteral('uxtw') + ) + arith_immediate = pp.Group( + immediate.setResultsName('base_immediate') + + pp.Suppress(pp.Literal(',')) + + shift_op.setResultsName('shift_op') + + immediate.setResultsName('shift') + ).setResultsName(self.IMMEDIATE_ID) + # Register: + # scalar: [XWBHSDQ][0-9]{1,2} | vector: V[0-9]{1,2}\.[12468]{1,2}[BHSD]()? + # define SP and ZR register aliases as regex, due to pyparsing does not support + # proper lookahead + alias_r31_sp = pp.Regex('(?P[a-zA-Z])?(?P(sp|SP))') + alias_r31_zr = pp.Regex('(?P[a-zA-Z])?(?P(zr|ZR))') + scalar = pp.Word(pp.alphas, exact=1).setResultsName('prefix') + pp.Word( + pp.nums + ).setResultsName('name') + index = pp.Literal('[') + pp.Word(pp.nums).setResultsName('index') + pp.Literal(']') + vector = ( + pp.CaselessLiteral('v').setResultsName('prefix') + + pp.Word(pp.nums).setResultsName('name') + + pp.Literal('.') + + pp.Optional(pp.Word('12468')).setResultsName('lanes') + + pp.Word(pp.alphas, exact=1).setResultsName('shape') + + pp.Optional(index) + ) + self.list_element = vector ^ scalar + register_list = ( + pp.Literal('{') + + ( + pp.delimitedList(pp.Combine(self.list_element), delim=',').setResultsName('list') + ^ pp.delimitedList(pp.Combine(self.list_element), delim='-').setResultsName( + 'range' + ) + ) + + pp.Literal('}') + + pp.Optional(index) + ) + register = pp.Group( + (alias_r31_sp | alias_r31_zr | vector | scalar | register_list) + + pp.Optional( + pp.Suppress(pp.Literal(',')) + + shift_op.setResultsName('shift_op') + + immediate.setResultsName('shift') + ) + ).setResultsName(self.REGISTER_ID) + # Memory + register_index = register.setResultsName('index') + pp.Optional( + pp.Literal(',') + pp.Word(pp.alphas) + immediate.setResultsName('scale') + ) + memory = pp.Group( + pp.Literal('[') + + pp.Optional(register.setResultsName('base')) + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(register_index ^ immediate.setResultsName('offset')) + + pp.Literal(']') + + pp.Optional( + pp.Literal('!').setResultsName('pre-indexed') + | (pp.Suppress(pp.Literal(',')) + immediate.setResultsName('post-indexed')) + ) + ).setResultsName(self.MEMORY_ID) + prefetch_op = pp.Group( + pp.Group(pp.CaselessLiteral('PLD') ^ pp.CaselessLiteral('PST')).setResultsName('type') + + pp.Group( + pp.CaselessLiteral('L1') ^ pp.CaselessLiteral('L2') ^ pp.CaselessLiteral('L3') + ).setResultsName('target') + + pp.Group(pp.CaselessLiteral('KEEP') ^ pp.CaselessLiteral('STRM')).setResultsName( + 'policy' + ) + ).setResultsName('prfop') + # Combine to instruction form + operand_first = pp.Group( + register ^ immediate ^ memory ^ arith_immediate ^ (prefetch_op | identifier) + ) + operand_rest = pp.Group((register ^ immediate ^ memory ^ arith_immediate) | identifier) + self.instruction_parser = ( + mnemonic + + pp.Optional(operand_first).setResultsName('operand1') + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(operand_rest).setResultsName('operand2') + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(operand_rest).setResultsName('operand3') + + pp.Optional(pp.Suppress(pp.Literal(','))) + + pp.Optional(operand_rest).setResultsName('operand4') + + pp.Optional(self.comment) + ) + self.opf = operand_first + self.opr = operand_rest + self.mem = memory + self.reg = register + self.idf = identifier + self.prfop = prefetch_op + self.imd = immediate + self.aimd = arith_immediate + + def parse_line(self, line, line_number=None): + """ + Parse line and return instruction form. + + :param str line: line of assembly code + :param int line_id: default None, identifier of instruction form + :return: parsed instruction form + """ + instruction_form = { + 'instruction': None, + 'operands': None, + 'directive': None, + 'comment': None, + 'label': None, + 'line_number': line_number, + } + result = None + + # 1. Parse comment + try: + result = self._process_operand(self.comment.parseString(line, parseAll=True).asDict()) + instruction_form['comment'] = ' '.join(result[self.COMMENT_ID]) + except pp.ParseException: + pass + + # 2. Parse label + if result is None: + try: + result = self._process_operand( + self.label.parseString(line, parseAll=True).asDict() + ) + instruction_form['label'] = result[self.LABEL_ID]['name'] + if self.COMMENT_ID in result[self.LABEL_ID]: + instruction_form['comment'] = ' '.join(result[self.LABEL_ID][self.COMMENT_ID]) + except pp.ParseException: + pass + + # 3. Parse directive + if result is None: + try: + result = self._process_operand( + self.directive.parseString(line, parseAll=True).asDict() + ) + instruction_form['directive'] = { + 'name': result[self.DIRECTIVE_ID]['name'], + 'parameters': result[self.DIRECTIVE_ID]['parameters'], + } + if self.COMMENT_ID in result[self.DIRECTIVE_ID]: + instruction_form['comment'] = ' '.join( + result[self.DIRECTIVE_ID][self.COMMENT_ID] + ) + except pp.ParseException: + pass + + # 4. Parse instruction + if result is None: + try: + result = self.parse_instruction(line) + except (pp.ParseException, KeyError): + print( + '\n\n*-*-*-*-*-*-*-*-*-*-\n{}: {}\n*-*-*-*-*-*-*-*-*-*-\n\n'.format( + line_number, line + ) + ) + instruction_form['instruction'] = result['instruction'] + instruction_form['operands'] = result['operands'] + instruction_form['comment'] = result['comment'] + + return instruction_form + + def parse_instruction(self, instruction): + result = self.instruction_parser.parseString(instruction, parseAll=True).asDict() + operands = {'source': [], 'destination': []} + # ARM specific store flags + is_store = False + store_ex = False + if result['mnemonic'].lower().startswith('st'): + # Store instruction --> swap source and destination + is_store = True + if result['mnemonic'].lower().startswith('strex'): + # Store exclusive --> first reg ist used for return state + store_ex = True + + # Check from left to right + # Check first operand + if 'operand1' in result: + if is_store and not store_ex: + operands['source'].append(self._process_operand(result['operand1'])) + else: + operands['destination'].append(self._process_operand(result['operand1'])) + # Check second operand + if 'operand2' in result: + if is_store and 'operand3' not in result: + # destination + operands['destination'].append(self._process_operand(result['operand2'])) + else: + operands['source'].append(self._process_operand(result['operand2'])) + # Check third operand + if 'operand3' in result: + if is_store and 'operand4' not in result: + operands['destination'].append(self._process_operand(result['operand3'])) + else: + operands['source'].append(self._process_operand(result['operand3'])) + # Check fourth operand + if 'operand4' in result: + if is_store: + operands['destination'].append(self._process_operand(result['operand4'])) + else: + operands['source'].append(self._process_operand(result['operand4'])) + + return_dict = { + 'instruction': result['mnemonic'], + 'operands': operands, + 'comment': ' '.join(result['comment']) if 'comment' in result else None, + } + return return_dict + + def _process_operand(self, operand): + # structure memory addresses + if 'memory' in operand: + return self.substitute_memory_address(operand['memory']) + # structure register lists + if 'register' in operand and ( + 'list' in operand['register'] or 'range' in operand['register'] + ): + # TODO: discuss if ranges should be converted to lists + return self.substitute_register_list(operand['register']) + # add value attribute to floating point immediates without exponent + if 'immediate' in operand: + return self.substitute_immediate(operand['immediate']) + if 'label' in operand: + return self.substitute_label(operand['label']) + return operand + + def substitute_memory_address(self, memory_address): + # Remove unnecessarily created dictionary entries during parsing + offset = None if 'offset' not in memory_address else memory_address['offset'] + base = None if 'base' not in memory_address else memory_address['base'] + index = None if 'index' not in memory_address else memory_address['index'] + scale = '1' + valid_shift_ops = ['lsl', 'uxtw', 'sxtw'] + if 'index' in memory_address: + if 'shift' in memory_address['index']: + if memory_address['index']['shift_op'].lower() in valid_shift_ops: + scale = str(2 ** int(memory_address['index']['shift']['value'])) + new_dict = {'offset': offset, 'base': base, 'index': index, 'scale': scale} + return {'memory': new_dict} + + def substitute_register_list(self, register_list): + # Remove unnecessarily created dictionary entries during parsing + vlist = [] + dict_name = '' + if 'list' in register_list: + dict_name = 'list' + if 'range' in register_list: + dict_name = 'range' + for v in register_list[dict_name]: + vlist.append(self.list_element.parseString(v, parseAll=True).asDict()) + index = None if 'index' not in register_list else register_list['index'] + new_dict = {dict_name: vlist, 'index': index} + return {'register': new_dict} + + def substitute_immediate(self, immediate): + dict_name = '' + if 'identifier' in immediate: + # actually an identifier, change declaration + return immediate + if 'value' in immediate: + # normal integer value, nothing to do + return {'immediate': immediate} + if 'base_immediate' in immediate: + # arithmetic immediate, nothing to do + return {'immediate': immediate} + if 'float' in immediate: + dict_name = 'float' + if 'double' in immediate: + dict_name = 'double' + if 'exponent' in immediate[dict_name]: + # nothing to do + return {'immediate': immediate} + else: + # change 'mantissa' key to 'value' + return {'immediate': {'value': immediate[dict_name]['mantissa']}} + + def substitute_label(self, label): + # remove duplicated 'name' level due to identifier + label['name'] = label['name']['name'] + return {'label': label} diff --git a/osaca/parser/parser_ARMv81.py b/osaca/parser/parser_ARMv81.py deleted file mode 100755 index 9c9ecb7..0000000 --- a/osaca/parser/parser_ARMv81.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 - -from .base_parser import BaseParser - - -class ParserARMv81(BaseParser): - # TODO - - def __init__(self): - # TODO - raise NotImplementedError diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 42df884..5ff4d73 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -18,7 +18,9 @@ class ParserX86ATT(BaseParser): # Define x86 assembly identifier first = pp.Word(pp.alphas + '_.', exact=1) rest = pp.Word(pp.alphanums + '_.') - identifier = pp.Combine(first + pp.Optional(rest)).setResultsName('identifier') + identifier = pp.Group( + pp.Combine(first + pp.Optional(rest)).setResultsName('name') + ).setResultsName('identifier') # Label self.label = pp.Group( identifier.setResultsName('name') + pp.Literal(':') + pp.Optional(self.comment) @@ -28,7 +30,9 @@ class ParserX86ATT(BaseParser): pp.Optional(pp.Literal('-')) + pp.Word(pp.nums) ).setResultsName('value') hex_number = pp.Combine(pp.Literal('0x') + pp.Word(pp.hexnums)).setResultsName('value') - directive_option = pp.Combine(pp.Word('#@.', exact=1) + pp.Word(pp.printables)) + directive_option = pp.Combine( + pp.Word('#@.', exact=1) + pp.Word(pp.printables, excludeChars=',') + ) directive_parameter = ( pp.quotedString | directive_option | identifier | hex_number | decimal_number ) @@ -63,7 +67,9 @@ class ParserX86ATT(BaseParser): pp.Literal(symbol_immediate) + (hex_number | decimal_number | identifier) ).setResultsName(self.IMMEDIATE_ID) # Memory: offset(base, index, scale) - offset = identifier | hex_number | decimal_number + offset = pp.Group(identifier | hex_number | decimal_number).setResultsName( + self.IMMEDIATE_ID + ) scale = pp.Word('1248', exact=1) memory = pp.Group( pp.Optional(offset.setResultsName('offset')) @@ -76,16 +82,15 @@ class ParserX86ATT(BaseParser): + pp.Literal(')') ).setResultsName(self.MEMORY_ID) # Combine to instruction form - operand1 = pp.Group(register ^ immediate ^ memory ^ identifier).setResultsName('operand1') - operand2 = pp.Group(register ^ immediate ^ memory).setResultsName('operand2') - operand3 = pp.Group(register ^ immediate ^ memory).setResultsName('operand3') + operand_first = pp.Group(register ^ immediate ^ memory ^ identifier) + operand_rest = pp.Group(register ^ immediate ^ memory) self.instruction_parser = ( mnemonic - + pp.Optional(operand1) + + pp.Optional(operand_first).setResultsName('operand1') + pp.Optional(pp.Suppress(pp.Literal(','))) - + pp.Optional(operand2) + + pp.Optional(operand_rest).setResultsName('operand2') + pp.Optional(pp.Suppress(pp.Literal(','))) - + pp.Optional(operand3) + + pp.Optional(operand_rest).setResultsName('operand3') + pp.Optional(self.comment) ) @@ -109,7 +114,7 @@ class ParserX86ATT(BaseParser): # 1. Parse comment try: - result = self.comment.parseString(line, parseAll=True).asDict() + result = self._process_operand(self.comment.parseString(line, parseAll=True).asDict()) instruction_form['comment'] = ' '.join(result[self.COMMENT_ID]) except pp.ParseException: pass @@ -117,7 +122,9 @@ class ParserX86ATT(BaseParser): # 2. Parse label if result is None: try: - result = self.label.parseString(line, parseAll=True).asDict() + result = self._process_operand( + self.label.parseString(line, parseAll=True).asDict() + ) instruction_form['label'] = result[self.LABEL_ID]['name'] if self.COMMENT_ID in result[self.LABEL_ID]: instruction_form['comment'] = ' '.join(result[self.LABEL_ID][self.COMMENT_ID]) @@ -127,7 +134,9 @@ class ParserX86ATT(BaseParser): # 3. Parse directive if result is None: try: - result = self.directive.parseString(line, parseAll=True).asDict() + result = self._process_operand( + self.directive.parseString(line, parseAll=True).asDict() + ) instruction_form['directive'] = { 'name': result[self.DIRECTIVE_ID]['name'], 'parameters': result[self.DIRECTIVE_ID]['parameters'], @@ -157,23 +166,23 @@ class ParserX86ATT(BaseParser): def parse_instruction(self, instruction): result = self.instruction_parser.parseString(instruction, parseAll=True).asDict() - operands = {'sources': []} + operands = {'source': [], 'destination': []} # Check from right to left # Check third operand if 'operand3' in result: - operands['destination'] = self._process_operand(result['operand3']) + operands['destination'].append(self._process_operand(result['operand3'])) # Check second operand if 'operand2' in result: - if 'destination' in operands: - operands['sources'].insert(0, self._process_operand(result['operand2'])) + if len(operands['destination']) != 0: + operands['source'].insert(0, self._process_operand(result['operand2'])) else: - operands['destination'] = self._process_operand(result['operand2']) + operands['destination'].append(self._process_operand(result['operand2'])) # Check first operand if 'operand1' in result: - if 'destination' in operands: - operands['sources'].insert(0, self._process_operand(result['operand1'])) + if len(operands['destination']) != 0: + operands['source'].insert(0, self._process_operand(result['operand1'])) else: - operands['destination'] = self._process_operand(result['operand1']) + operands['destination'].append(self._process_operand(result['operand1'])) return_dict = { 'instruction': result['mnemonic'], 'operands': operands, @@ -185,6 +194,10 @@ class ParserX86ATT(BaseParser): # For the moment, only used to structure memory addresses if 'memory' in operand: return self.substitute_memory_address(operand['memory']) + if 'immediate' in operand: + return self.substitue_immediate(operand['immediate']) + if 'label' in operand: + return self.substitute_label(operand['label']) return operand def substitute_memory_address(self, memory_address): @@ -195,3 +208,15 @@ class ParserX86ATT(BaseParser): scale = '1' if 'scale' not in memory_address else memory_address['scale'] new_dict = {'offset': offset, 'base': base, 'index': index, 'scale': scale} return {'memory': new_dict} + + def substitute_label(self, label): + # remove duplicated 'name' level due to identifier + label['name'] = label['name']['name'] + return {'label': label} + + def substitue_immediate(self, immediate): + if 'identifier' in immediate: + # actually an identifier, change declaration + return immediate + # otherwise nothing to do + return {'immediate': immediate} diff --git a/tests/all_tests.py b/tests/all_tests.py index e6d5186..f4ac5e1 100755 --- a/tests/all_tests.py +++ b/tests/all_tests.py @@ -6,7 +6,8 @@ import unittest sys.path[0:0] = ['.', '..'] suite = unittest.TestLoader().loadTestsFromNames( [ - 'test_parser_x86att' + 'test_parser_x86att', + 'test_parser_AArch64v81' ] ) diff --git a/tests/test_files/triad-arm-iaca.s b/tests/test_files/triad-arm-iaca.s new file mode 100644 index 0000000..ff2b692 --- /dev/null +++ b/tests/test_files/triad-arm-iaca.s @@ -0,0 +1,645 @@ + .text + .file "triad.c" + .section .rodata.cst8,"aM",@progbits,8 + .p2align 3 // -- Begin function triad +.LCPI0_0: + .xword 4596373779694328218 // double 0.20000000000000001 +.LCPI0_1: + .xword 4652007308841189376 // double 1000 +.LCPI0_2: + .xword 4517329193108106637 // double 9.9999999999999995E-7 +.LCPI0_3: + .xword 4629700416936869888 // double 32 +.LCPI0_4: + .xword 4562146422526312448 // double 9.765625E-4 + .text + .globl triad + .p2align 6 + .type triad,@function +triad: // @triad + .cfi_startproc +// %bb.0: + sub sp, sp, #224 // =224 + str d8, [sp, #112] // 8-byte Folded Spill + stp x28, x27, [sp, #128] // 16-byte Folded Spill + stp x26, x25, [sp, #144] // 16-byte Folded Spill + stp x24, x23, [sp, #160] // 16-byte Folded Spill + stp x22, x21, [sp, #176] // 16-byte Folded Spill + stp x20, x19, [sp, #192] // 16-byte Folded Spill + stp x29, x30, [sp, #208] // 16-byte Folded Spill + add x29, sp, #208 // =208 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + .cfi_offset w19, -24 + .cfi_offset w20, -32 + .cfi_offset w21, -40 + .cfi_offset w22, -48 + .cfi_offset w23, -56 + .cfi_offset w24, -64 + .cfi_offset w25, -72 + .cfi_offset w26, -80 + .cfi_offset w27, -88 + .cfi_offset w28, -96 + .cfi_offset b8, -112 + mov w19, w0 + orr w0, wzr, #0x40 + sbfiz x23, x19, #3, #32 + mov x1, x23 + bl aligned_alloc + mov x20, x0 + orr w0, wzr, #0x40 + mov x1, x23 + bl aligned_alloc + str x0, [sp, #88] // 8-byte Folded Spill + orr w0, wzr, #0x40 + mov x1, x23 + bl aligned_alloc + mov x22, x0 + orr w0, wzr, #0x40 + mov x1, x23 + bl aligned_alloc + mov x23, x0 + cmp w19, #0 // =0 + b.le .LBB0_3 +// %bb.1: + mov w24, w19 + cmp w19, #7 // =7 + b.hi .LBB0_9 +// %bb.2: + mov x8, xzr + b .LBB0_17 +.LBB0_3: + adrp x8, .LCPI0_0 + orr w25, wzr, #0x1 + ldr d8, [x8, :lo12:.LCPI0_0] + .p2align 6 +.LBB0_4: // =>This Loop Header: Depth=1 + // Child Loop BB0_5 Depth 2 + sub x0, x29, #88 // =88 + add x1, sp, #96 // =96 + bl timing + mov w21, w25 + cbz w25, .LBB0_8 + .p2align 6 +.LBB0_5: // Parent Loop BB0_4 Depth=1 + // => This Inner Loop Header: Depth=2 + ldr d0, [x20] + fcmp d0, #0.0 + b.le .LBB0_7 +// %bb.6: // in Loop: Header=BB0_5 Depth=2 + mov x0, x20 + bl dummy +.LBB0_7: // in Loop: Header=BB0_5 Depth=2 + subs w21, w21, #1 // =1 + b.ne .LBB0_5 +.LBB0_8: // in Loop: Header=BB0_4 Depth=1 + add x0, sp, #104 // =104 + add x1, sp, #96 // =96 + bl timing + ldr d0, [sp, #104] + ldur d1, [x29, #-88] + fsub d1, d0, d1 + lsl w25, w25, #1 + fcmp d1, d8 + b.mi .LBB0_4 + b .LBB0_38 +.LBB0_9: + and x8, x24, #0xfffffff8 + sub x10, x8, #8 // =8 + lsr x11, x10, #3 + add w9, w11, #1 // =1 + and x9, x9, #0x3 + cmp x10, #24 // =24 + b.hs .LBB0_11 +// %bb.10: + orr w13, wzr, #0x20 + cbnz x9, .LBB0_14 + b .LBB0_16 +.LBB0_11: + mov x16, #28286 + movk x16, #29109, lsl #16 + ldr x15, [sp, #88] // 8-byte Folded Reload + movk x16, #34426, lsl #32 + movk x16, #16000, lsl #48 + dup v0.2d, x16 + mvn x11, x11 + mov x10, xzr + add x11, x9, x11 + add x12, x23, #128 // =128 + add x13, x20, #128 // =128 + add x14, x22, #128 // =128 + add x15, x15, #128 // =128 + .p2align 6 +.LBB0_12: // =>This Inner Loop Header: Depth=1 + stp q0, q0, [x12] + stp q0, q0, [x12, #-128] + stp q0, q0, [x12, #32] + stp q0, q0, [x12, #-96] + stp q0, q0, [x14] + add x10, x10, #32 // =32 + stp q0, q0, [x14, #-128] + stp q0, q0, [x14, #32] + stp q0, q0, [x14, #-96] + stp q0, q0, [x15] + stp q0, q0, [x15, #-128] + stp q0, q0, [x15, #32] + stp q0, q0, [x15, #-96] + stp q0, q0, [x13] + stp q0, q0, [x13, #-128] + stp q0, q0, [x13, #32] + stp q0, q0, [x13, #-96] + stp q0, q0, [x12, #64] + stp q0, q0, [x12, #-64] + stp q0, q0, [x12, #96] + stp q0, q0, [x12, #-32] + add x12, x12, #256 // =256 + stp q0, q0, [x14, #64] + stp q0, q0, [x14, #-64] + stp q0, q0, [x14, #96] + stp q0, q0, [x14, #-32] + add x14, x14, #256 // =256 + stp q0, q0, [x15, #64] + stp q0, q0, [x15, #-64] + stp q0, q0, [x15, #96] + stp q0, q0, [x15, #-32] + add x15, x15, #256 // =256 + stp q0, q0, [x13, #64] + stp q0, q0, [x13, #-64] + stp q0, q0, [x13, #96] + stp q0, q0, [x13, #-32] + add x13, x13, #256 // =256 + adds x11, x11, #4 // =4 + b.ne .LBB0_12 +// %bb.13: + lsl x10, x10, #3 + orr x13, x10, #0x20 + cbz x9, .LBB0_16 +.LBB0_14: + ldr x14, [sp, #88] // 8-byte Folded Reload + add x10, x23, x13 + add x11, x22, x13 + add x12, x20, x13 + add x13, x14, x13 + mov x14, #28286 + movk x14, #29109, lsl #16 + movk x14, #34426, lsl #32 + movk x14, #16000, lsl #48 + dup v0.2d, x14 + neg x9, x9 + .p2align 6 +.LBB0_15: // =>This Inner Loop Header: Depth=1 + stp q0, q0, [x10] + stp q0, q0, [x11] + stp q0, q0, [x10, #-32] + stp q0, q0, [x13] + stp q0, q0, [x11, #-32] + add x10, x10, #64 // =64 + stp q0, q0, [x12] + stp q0, q0, [x13, #-32] + add x11, x11, #64 // =64 + stp q0, q0, [x12, #-32] + add x12, x12, #64 // =64 + add x13, x13, #64 // =64 + adds x9, x9, #1 // =1 + b.ne .LBB0_15 +.LBB0_16: + cmp x8, x24 + b.eq .LBB0_19 +.LBB0_17: + ldr x10, [sp, #88] // 8-byte Folded Reload + mov x13, #28286 + movk x13, #29109, lsl #16 + lsl x12, x8, #3 + movk x13, #34426, lsl #32 + add x9, x20, x12 + movk x13, #16000, lsl #48 + add x10, x10, x12 + add x11, x22, x12 + add x12, x23, x12 + sub x8, x24, x8 + .p2align 6 +.LBB0_18: // =>This Inner Loop Header: Depth=1 + str x13, [x12], #8 + str x13, [x11], #8 + str x13, [x10], #8 + str x13, [x9], #8 + subs x8, x8, #1 // =1 + b.ne .LBB0_18 +.LBB0_19: + ldr x10, [sp, #88] // 8-byte Folded Reload + add x8, x20, #256 // =256 + and x26, x24, #0xfffffff8 + str x8, [sp, #40] // 8-byte Folded Spill + add x8, x23, #256 // =256 + sub x27, x26, #8 // =8 + str x8, [sp, #32] // 8-byte Folded Spill + add x8, x22, #256 // =256 + orr w25, wzr, #0x1 + str x8, [sp, #24] // 8-byte Folded Spill + add x8, x10, #256 // =256 + str x8, [sp, #16] // 8-byte Folded Spill + lsr x8, x27, #3 + add w9, w8, #1 // =1 + mvn x8, x8 + and x28, x9, #0x7 + add x8, x28, x8 + str x8, [sp, #8] // 8-byte Folded Spill + neg x8, x28 + str x8, [sp, #80] // 8-byte Folded Spill + add x8, x10, #32 // =32 + str x8, [sp, #72] // 8-byte Folded Spill + add x8, x22, #32 // =32 + str x8, [sp, #64] // 8-byte Folded Spill + add x8, x20, #32 // =32 + str x8, [sp, #56] // 8-byte Folded Spill + add x8, x23, #32 // =32 + str x8, [sp, #48] // 8-byte Folded Spill + adrp x8, .LCPI0_0 + ldr d8, [x8, :lo12:.LCPI0_0] + .p2align 6 +.LBB0_20: // =>This Loop Header: Depth=1 + // Child Loop BB0_22 Depth 2 + // Child Loop BB0_29 Depth 3 + // Child Loop BB0_32 Depth 3 + // Child Loop BB0_35 Depth 3 + sub x0, x29, #88 // =88 + add x1, sp, #96 // =96 + bl timing + cbz w25, .LBB0_37 +// %bb.21: // in Loop: Header=BB0_20 Depth=1 + mov w21, wzr + .p2align 6 +.LBB0_22: // Parent Loop BB0_20 Depth=1 + // => This Loop Header: Depth=2 + // Child Loop BB0_29 Depth 3 + // Child Loop BB0_32 Depth 3 + // Child Loop BB0_35 Depth 3 + ldr d0, [x20] + fcmp d0, #0.0 + b.le .LBB0_24 +// %bb.23: // in Loop: Header=BB0_22 Depth=2 + mov x0, x20 + bl dummy +.LBB0_24: // in Loop: Header=BB0_22 Depth=2 + cmp w19, #7 // =7 + b.hi .LBB0_26 +// %bb.25: // in Loop: Header=BB0_22 Depth=2 + mov x12, xzr + b .LBB0_34 + .p2align 6 +.LBB0_26: // in Loop: Header=BB0_22 Depth=2 + cmp x27, #56 // =56 + b.hs .LBB0_28 +// %bb.27: // in Loop: Header=BB0_22 Depth=2 + mov x8, xzr + cbnz x28, .LBB0_31 + b .LBB0_33 + .p2align 6 +.LBB0_28: // in Loop: Header=BB0_22 Depth=2 + ldp x9, x10, [sp, #16] // 8-byte Folded Reload + ldp x11, x12, [sp, #32] // 8-byte Folded Reload + ldr x13, [sp, #8] // 8-byte Folded Reload + mov x8, xzr + .p2align 6 + mov x1, #111 // OSACA START + .byte 213,3,32,31 // OSACA START +.LBB0_29: // Parent Loop BB0_20 Depth=1 + // Parent Loop BB0_22 Depth=2 + // => This Inner Loop Header: Depth=3 + ldp q2, q5, [x10, #-256] + ldp q6, q7, [x10, #-224] + ldp q16, q17, [x11, #-256] + ldp q18, q19, [x11, #-224] + fmul v2.2d, v2.2d, v16.2d + fmul v5.2d, v5.2d, v17.2d + fmul v6.2d, v6.2d, v18.2d + ldp q0, q1, [x9, #-256] + ldp q3, q4, [x9, #-224] + fmul v7.2d, v7.2d, v19.2d + fadd v0.2d, v0.2d, v2.2d + fadd v2.2d, v1.2d, v5.2d + stp q0, q2, [x12, #-256] + fadd v1.2d, v3.2d, v6.2d + ldp q6, q17, [x10, #-192] + ldp q18, q19, [x10, #-160] + ldp q20, q21, [x11, #-192] + ldp q22, q23, [x11, #-160] + fmul v6.2d, v6.2d, v20.2d + fmul v17.2d, v17.2d, v21.2d + fmul v18.2d, v18.2d, v22.2d + fadd v3.2d, v4.2d, v7.2d + stp q1, q3, [x12, #-224] + ldp q4, q5, [x9, #-192] + ldp q7, q16, [x9, #-160] + fmul v19.2d, v19.2d, v23.2d + fadd v4.2d, v4.2d, v6.2d + fadd v6.2d, v5.2d, v17.2d + stp q4, q6, [x12, #-192] + fadd v5.2d, v7.2d, v18.2d + ldp q18, q21, [x10, #-128] + ldp q22, q23, [x10, #-96] + ldp q24, q25, [x11, #-128] + ldp q26, q27, [x11, #-96] + fmul v18.2d, v18.2d, v24.2d + fmul v21.2d, v21.2d, v25.2d + fmul v22.2d, v22.2d, v26.2d + fadd v7.2d, v16.2d, v19.2d + stp q5, q7, [x12, #-160] + ldp q16, q17, [x9, #-128] + ldp q19, q20, [x9, #-96] + fadd v16.2d, v16.2d, v18.2d + fadd v18.2d, v17.2d, v21.2d + stp q16, q18, [x12, #-128] + fadd v17.2d, v19.2d, v22.2d + ldp q22, q25, [x10, #-64] + ldp q28, q29, [x11, #-64] + fmul v23.2d, v23.2d, v27.2d + ldp q26, q27, [x10, #-32] + fmul v22.2d, v22.2d, v28.2d + fmul v25.2d, v25.2d, v29.2d + ldp q28, q29, [x11, #-32] + fmul v26.2d, v26.2d, v28.2d + fmul v27.2d, v27.2d, v29.2d + fadd v19.2d, v20.2d, v23.2d + stp q17, q19, [x12, #-96] + ldp q20, q21, [x9, #-64] + ldp q23, q24, [x9, #-32] + fadd v20.2d, v20.2d, v22.2d + fadd v22.2d, v21.2d, v25.2d + stp q20, q22, [x12, #-64] + fadd v21.2d, v23.2d, v26.2d + fadd v23.2d, v24.2d, v27.2d + stp q21, q23, [x12, #-32] + ldp q24, q25, [x10] + ldp q28, q29, [x11] + ldp q26, q27, [x10, #32] + fmul v24.2d, v24.2d, v28.2d + fmul v25.2d, v25.2d, v29.2d + ldp q28, q29, [x11, #32] + fmul v26.2d, v26.2d, v28.2d + fmul v27.2d, v27.2d, v29.2d + ldp q28, q29, [x9] + fadd v24.2d, v28.2d, v24.2d + fadd v25.2d, v29.2d, v25.2d + stp q24, q25, [x12] + ldp q28, q29, [x9, #32] + fadd v26.2d, v28.2d, v26.2d + fadd v27.2d, v29.2d, v27.2d + stp q26, q27, [x12, #32] + ldp q24, q25, [x10, #64] + ldp q28, q29, [x11, #64] + ldp q26, q27, [x10, #96] + fmul v24.2d, v24.2d, v28.2d + fmul v25.2d, v25.2d, v29.2d + ldp q28, q29, [x11, #96] + fmul v26.2d, v26.2d, v28.2d + fmul v27.2d, v27.2d, v29.2d + ldp q28, q29, [x9, #64] + fadd v24.2d, v28.2d, v24.2d + fadd v25.2d, v29.2d, v25.2d + stp q24, q25, [x12, #64] + ldp q28, q29, [x9, #96] + fadd v26.2d, v28.2d, v26.2d + fadd v27.2d, v29.2d, v27.2d + stp q26, q27, [x12, #96] + ldp q24, q25, [x10, #128] + ldp q28, q29, [x11, #128] + ldp q26, q27, [x10, #160] + fmul v24.2d, v24.2d, v28.2d + fmul v25.2d, v25.2d, v29.2d + ldp q28, q29, [x11, #160] + fmul v26.2d, v26.2d, v28.2d + fmul v27.2d, v27.2d, v29.2d + ldp q28, q29, [x9, #128] + fadd v24.2d, v28.2d, v24.2d + fadd v25.2d, v29.2d, v25.2d + stp q24, q25, [x12, #128] + ldp q28, q29, [x9, #160] + fadd v26.2d, v28.2d, v26.2d + fadd v27.2d, v29.2d, v27.2d + stp q26, q27, [x12, #160] + ldp q24, q25, [x10, #192] + ldp q26, q27, [x11, #192] + fmul v24.2d, v24.2d, v26.2d + ldp q26, q28, [x10, #224] + fmul v25.2d, v25.2d, v27.2d + ldp q27, q0, [x11, #224] + fmul v2.2d, v26.2d, v27.2d + fmul v0.2d, v28.2d, v0.2d + ldp q1, q3, [x9, #192] + ldp q4, q5, [x9, #224] + fadd v1.2d, v1.2d, v24.2d + fadd v3.2d, v3.2d, v25.2d + stp q1, q3, [x12, #192] + fadd v2.2d, v4.2d, v2.2d + fadd v0.2d, v5.2d, v0.2d + stp q2, q0, [x12, #224] + add x8, x8, #64 // =64 + add x12, x12, #512 // =512 + add x11, x11, #512 // =512 + add x10, x10, #512 // =512 + add x9, x9, #512 // =512 + adds x13, x13, #8 // =8 + b.ne .LBB0_29 + mov x1, #222 // OSACA END + .byte 213,3,32,31 // OSACA END +// %bb.30: // in Loop: Header=BB0_22 Depth=2 + cbz x28, .LBB0_33 +.LBB0_31: // in Loop: Header=BB0_22 Depth=2 + lsl x11, x8, #3 + ldp x9, x8, [sp, #64] // 8-byte Folded Reload + ldp x12, x10, [sp, #48] // 8-byte Folded Reload + add x8, x8, x11 + add x9, x9, x11 + add x10, x10, x11 + add x11, x12, x11 + ldr x12, [sp, #80] // 8-byte Folded Reload + .p2align 6 +.LBB0_32: // Parent Loop BB0_20 Depth=1 + // Parent Loop BB0_22 Depth=2 + // => This Inner Loop Header: Depth=3 + ldp q4, q5, [x9, #-32] + ldp q6, q7, [x9], #64 + ldp q16, q17, [x11, #-32] + ldp q18, q19, [x11], #64 + fmul v4.2d, v4.2d, v16.2d + fmul v5.2d, v5.2d, v17.2d + fmul v6.2d, v6.2d, v18.2d + fmul v7.2d, v7.2d, v19.2d + ldp q0, q1, [x8, #-32] + ldp q2, q3, [x8], #64 + fadd v0.2d, v0.2d, v4.2d + fadd v1.2d, v1.2d, v5.2d + stp q0, q1, [x10, #-32] + fadd v2.2d, v2.2d, v6.2d + fadd v3.2d, v3.2d, v7.2d + stp q2, q3, [x10] + add x10, x10, #64 // =64 + adds x12, x12, #1 // =1 + b.ne .LBB0_32 +.LBB0_33: // in Loop: Header=BB0_22 Depth=2 + mov x12, x26 + cmp x26, x24 + b.eq .LBB0_36 +.LBB0_34: // in Loop: Header=BB0_22 Depth=2 + ldr x8, [sp, #88] // 8-byte Folded Reload + lsl x11, x12, #3 + sub x12, x24, x12 + add x8, x8, x11 + add x9, x22, x11 + add x10, x23, x11 + add x11, x20, x11 + .p2align 6 +.LBB0_35: // Parent Loop BB0_20 Depth=1 + // Parent Loop BB0_22 Depth=2 + // => This Inner Loop Header: Depth=3 + ldr d0, [x8], #8 + ldr d1, [x9], #8 + ldr d2, [x10], #8 + fmul d1, d1, d2 + fadd d0, d0, d1 + str d0, [x11], #8 + subs x12, x12, #1 // =1 + b.ne .LBB0_35 +.LBB0_36: // in Loop: Header=BB0_22 Depth=2 + add w21, w21, #1 // =1 + cmp w21, w25 + b.ne .LBB0_22 +.LBB0_37: // in Loop: Header=BB0_20 Depth=1 + add x0, sp, #104 // =104 + add x1, sp, #96 // =96 + bl timing + ldr d0, [sp, #104] + ldur d1, [x29, #-88] + fsub d1, d0, d1 + lsl w25, w25, #1 + fcmp d1, d8 + b.mi .LBB0_20 +.LBB0_38: + scvtf d4, w19 + lsr w1, w25, #1 + adrp x8, .LCPI0_1 + scvtf d6, w1 + fadd d2, d4, d4 + ldr d5, [x8, :lo12:.LCPI0_1] + adrp x8, .LCPI0_2 + fmov d0, #8.00000000 + fmul d2, d2, d6 + ldr d3, [x8, :lo12:.LCPI0_2] + adrp x8, .LCPI0_3 + adrp x0, .L.str + fmul d2, d2, d3 + ldr d3, [x8, :lo12:.LCPI0_3] + adrp x8, .LCPI0_4 + add x0, x0, :lo12:.L.str + fmul d3, d6, d3 + fmul d0, d4, d0 + fmul d3, d3, d4 + fmul d4, d4, d6 + fdiv d3, d3, d1 + fdiv d4, d4, d1 + fdiv d4, d4, d5 + fdiv d0, d0, d5 + fdiv d2, d2, d1 + ldr d7, [x8, :lo12:.LCPI0_4] + fmul d3, d3, d7 + fdiv d4, d4, d5 + fmul d3, d3, d7 + mov w2, w19 + bl printf + mov x0, x20 + bl free + ldr x0, [sp, #88] // 8-byte Folded Reload + bl free + mov x0, x22 + bl free + mov x0, x23 + bl free + ldp x29, x30, [sp, #208] // 16-byte Folded Reload + ldp x20, x19, [sp, #192] // 16-byte Folded Reload + ldp x22, x21, [sp, #176] // 16-byte Folded Reload + ldp x24, x23, [sp, #160] // 16-byte Folded Reload + ldp x26, x25, [sp, #144] // 16-byte Folded Reload + ldp x28, x27, [sp, #128] // 16-byte Folded Reload + ldr d8, [sp, #112] // 8-byte Folded Reload + add sp, sp, #224 // =224 + ret +.Lfunc_end0: + .size triad, .Lfunc_end0-triad + .cfi_endproc + // -- End function + .globl main // -- Begin function main + .p2align 6 + .type main,@function +main: // @main + .cfi_startproc +// %bb.0: + stp x29, x30, [sp, #-16]! // 16-byte Folded Spill + mov x29, sp + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + adrp x0, .Lstr + add x0, x0, :lo12:.Lstr + bl puts + adrp x0, .Lstr.3 + add x0, x0, :lo12:.Lstr.3 + bl puts + mov w0, #190 + bl triad + mov w0, #247 + bl triad + mov w0, #321 + bl triad + mov w0, #417 + bl triad + mov w0, #542 + bl triad + mov w0, #705 + bl triad + mov w0, #917 + bl triad + mov w0, #1192 + bl triad + mov w0, #1550 + bl triad + mov w0, #2015 + bl triad + mov w0, #2619 + bl triad + mov w0, #3405 + bl triad + mov w0, #4427 + bl triad + mov w0, #5756 + bl triad + mov w0, #7482 + bl triad + mov w0, #9727 + bl triad + mov w0, wzr + ldp x29, x30, [sp], #16 // 16-byte Folded Reload + ret +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + .type .L.str,@object // @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "%12.1f | %9.8f | %9.3f | %7.1f | %7.1f | %7d | %4d \n" + .size .L.str, 53 + .type .Lstr,@object // @str + .section .rodata.str1.16,"aMS",@progbits,1 + .p2align 4 +.Lstr: + .asciz "TRIAD a[i] = b[i]+c[i]*d[i], 32 byte/it, 2 Flop/it" + .size .Lstr, 51 + .type .Lstr.3,@object // @str.3 + .p2align 4 +.Lstr.3: + .asciz "Size (KByte) | runtime | MFlop/s | MB/s | MLUP/s | repeat | size" + .size .Lstr.3, 74 + .ident "Arm C/C++/Fortran Compiler version 19.0 (build number 69) (based on LLVM 7.0.2)" + .section ".note.GNU-stack","",@progbits + .addrsig diff --git a/tests/test_files/triad-iaca.s b/tests/test_files/triad-x86-iaca.s similarity index 100% rename from tests/test_files/triad-iaca.s rename to tests/test_files/triad-x86-iaca.s diff --git a/tests/test_parser_AArch64v81.py b/tests/test_parser_AArch64v81.py new file mode 100755 index 0000000..8d20876 --- /dev/null +++ b/tests/test_parser_AArch64v81.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +""" +Unit tests for ARMv8 AArch64 assembly parser +""" + +import os +import unittest + +from pyparsing import ParseException + +from osaca.parser import ParserAArch64v81 + + +class TestParserAArch64v81(unittest.TestCase): + def setUp(self): + self.parser = ParserAArch64v81() + with open(self._find_file('triad-arm-iaca.s')) as f: + self.triad_code = f.read() + + ################## + # Test + ################## + + def test_comment_parser(self): + self.assertEqual(self._get_comment(self.parser, '// some comments'), 'some comments') + self.assertEqual( + self._get_comment(self.parser, '\t\t//AA BB CC \t end \t'), 'AA BB CC end' + ) + self.assertEqual( + self._get_comment(self.parser, '\t//// comment //// comment'), + '// comment //// comment', + ) + + def test_label_parser(self): + self.assertEqual(self._get_label(self.parser, 'main:')['name'], 'main') + self.assertEqual(self._get_label(self.parser, '..B1.10:')['name'], '..B1.10') + self.assertEqual(self._get_label(self.parser, '.2.3_2_pack.3:')['name'], '.2.3_2_pack.3') + self.assertEqual(self._get_label(self.parser, '.L1:\t\t\t//label1')['name'], '.L1') + self.assertEqual( + ' '.join(self._get_label(self.parser, '.L1:\t\t\t//label1')['comment']), 'label1' + ) + with self.assertRaises(ParseException): + self._get_label(self.parser, '\t.cfi_startproc') + + def test_directive_parser(self): + self.assertEqual(self._get_directive(self.parser, '\t.text')['name'], 'text') + self.assertEqual(len(self._get_directive(self.parser, '\t.text')['parameters']), 0) + self.assertEqual(self._get_directive(self.parser, '\t.align\t16,0x90')['name'], 'align') + self.assertEqual( + len(self._get_directive(self.parser, '\t.align\t16,0x90')['parameters']), 2 + ) + self.assertEqual( + self._get_directive(self.parser, '\t.align\t16,0x90')['parameters'][1], '0x90' + ) + self.assertEqual( + self._get_directive(self.parser, ' .byte 100,103,144 //IACA START')[ + 'name' + ], + 'byte', + ) + self.assertEqual( + self._get_directive(self.parser, ' .byte 100,103,144 //IACA START')[ + 'parameters' + ][2], + '144', + ) + self.assertEqual( + ' '.join( + self._get_directive(self.parser, ' .byte 100,103,144 //IACA START')[ + 'comment' + ] + ), + 'IACA START', + ) + + def test_parse_instruction(self): + instr1 = '\t\tvcvt.F32.S32 w1, w2\t\t\t//12.27' + instr2 = 'b.lo ..B1.4 \t' + instr3 = ' mov x2,#0x222 //NOT IACA END' + instr4 = 'str x28, [sp, x1, lsl #4] //12.9' + instr5 = 'ldr x0, [x0, #:got_lo12:q2c]' + instr6 = 'adrp x0, :got:visited' + + parsed_1 = self.parser.parse_instruction(instr1) + parsed_2 = self.parser.parse_instruction(instr2) + parsed_3 = self.parser.parse_instruction(instr3) + parsed_4 = self.parser.parse_instruction(instr4) + parsed_5 = self.parser.parse_instruction(instr5) + parsed_6 = self.parser.parse_instruction(instr6) + + self.assertEqual(parsed_1['instruction'], 'vcvt.F32.S32') + self.assertEqual(parsed_1['operands']['destination'][0]['register']['name'], '1') + self.assertEqual(parsed_1['operands']['destination'][0]['register']['prefix'], 'w') + self.assertEqual(parsed_1['operands']['source'][0]['register']['name'], '2') + self.assertEqual(parsed_1['operands']['source'][0]['register']['prefix'], 'w') + self.assertEqual(parsed_1['comment'], '12.27') + + self.assertEqual(parsed_2['instruction'], 'b.lo') + self.assertEqual(parsed_2['operands']['destination'][0]['identifier']['name'], '..B1.4') + self.assertEqual(len(parsed_2['operands']['source']), 0) + self.assertIsNone(parsed_2['comment']) + + self.assertEqual(parsed_3['instruction'], 'mov') + self.assertEqual(parsed_3['operands']['destination'][0]['register']['name'], '2') + self.assertEqual(parsed_3['operands']['destination'][0]['register']['prefix'], 'x') + self.assertEqual(parsed_3['operands']['source'][0]['immediate']['value'], '0x222') + self.assertEqual(parsed_3['comment'], 'NOT IACA END') + + self.assertEqual(parsed_4['instruction'], 'str') + self.assertIsNone(parsed_4['operands']['destination'][0]['memory']['offset']) + self.assertEqual(parsed_4['operands']['destination'][0]['memory']['base']['name'], 'sp') + self.assertIsNone(parsed_4['operands']['destination'][0]['memory']['base']['prefix']) + self.assertEqual(parsed_4['operands']['destination'][0]['memory']['index']['name'], '1') + self.assertEqual(parsed_4['operands']['destination'][0]['memory']['index']['prefix'], 'x') + self.assertEqual(parsed_4['operands']['destination'][0]['memory']['scale'], '16') + self.assertEqual(parsed_4['operands']['source'][0]['register']['name'], '28') + self.assertEqual(parsed_4['operands']['source'][0]['register']['prefix'], 'x') + self.assertEqual(parsed_4['comment'], '12.9') + + self.assertEqual(parsed_5['instruction'], 'ldr') + self.assertEqual(parsed_5['operands']['destination'][0]['register']['name'], '0') + self.assertEqual(parsed_5['operands']['destination'][0]['register']['prefix'], 'x') + self.assertEqual( + parsed_5['operands']['source'][0]['memory']['offset']['identifier']['name'], 'q2c' + ) + self.assertEqual( + parsed_5['operands']['source'][0]['memory']['offset']['identifier']['relocation'], + ':got_lo12:', + ) + self.assertEqual(parsed_5['operands']['source'][0]['memory']['base']['name'], '0') + self.assertEqual(parsed_5['operands']['source'][0]['memory']['base']['prefix'], 'x') + self.assertIsNone(parsed_5['operands']['source'][0]['memory']['index']) + self.assertEqual(parsed_5['operands']['source'][0]['memory']['scale'], '1') + + self.assertEqual(parsed_6['instruction'], 'adrp') + self.assertEqual(parsed_6['operands']['destination'][0]['register']['name'], '0') + self.assertEqual(parsed_6['operands']['destination'][0]['register']['prefix'], 'x') + self.assertEqual(parsed_6['operands']['source'][0]['identifier']['relocation'], ':got:') + self.assertEqual(parsed_6['operands']['source'][0]['identifier']['name'], 'visited') + + def test_parse_line(self): + line_comment = '// -- Begin main' + line_label = '.LBB0_1: // =>This Inner Loop Header: Depth=1' + line_directive = '\t.cfi_def_cfa w29, -16' + line_instruction = '\tldr s0, [x11, w10, sxtw #2]\t\t// = <<2' + # STREXD/STREX/STRD? + + instruction_form_1 = { + 'instruction': None, + 'operands': None, + 'directive': None, + 'comment': '-- Begin main', + 'label': None, + 'line_number': 1, + } + + instruction_form_2 = { + 'instruction': None, + 'operands': None, + 'directive': None, + 'comment': '=>This Inner Loop Header: Depth=1', + 'label': '.LBB0_1', + 'line_number': 2, + } + instruction_form_3 = { + 'instruction': None, + 'operands': None, + 'directive': {'name': 'cfi_def_cfa', 'parameters': ['w29', '-16']}, + 'comment': None, + 'label': None, + 'line_number': 3, + } + instruction_form_4 = { + 'instruction': 'ldr', + 'operands': { + 'source': [ + { + 'memory': { + 'offset': None, + 'base': {'prefix': 'x', 'name': '11'}, + 'index': { + 'prefix': 'w', + 'name': '10', + 'shift_op': 'sxtw', + 'shift': {'value': '2'}, + }, + 'scale': '4', + } + } + ], + 'destination': [{'register': {'prefix': 's', 'name': '0'}}], + }, + 'directive': None, + 'comment': '= <<2', + 'label': None, + 'line_number': 4, + } + parsed_1 = self.parser.parse_line(line_comment, 1) + parsed_2 = self.parser.parse_line(line_label, 2) + parsed_3 = self.parser.parse_line(line_directive, 3) + parsed_4 = self.parser.parse_line(line_instruction, 4) + + self.assertEqual(parsed_1, instruction_form_1) + self.assertEqual(parsed_2, instruction_form_2) + self.assertEqual(parsed_3, instruction_form_3) + self.assertEqual(parsed_4['operands'], instruction_form_4['operands']) + + def test_parse_file(self): + parsed = self.parser.parse_file(self.triad_code) + self.assertEqual(parsed[0]['line_number'], 1) + self.assertEqual(len(parsed), 645) + + ################## + # Helper functions + ################## + def _get_comment(self, parser, comment): + return ' '.join( + parser._process_operand(parser.comment.parseString(comment, parseAll=True).asDict())[ + 'comment' + ] + ) + + def _get_label(self, parser, label): + return parser._process_operand(parser.label.parseString(label, parseAll=True).asDict())[ + 'label' + ] + + def _get_directive(self, parser, directive): + return parser._process_operand( + parser.directive.parseString(directive, parseAll=True).asDict() + )['directive'] + + @staticmethod + def _find_file(name): + testdir = os.path.dirname(__file__) + name = os.path.join(testdir, 'test_files', name) + assert os.path.exists(name) + return name + + +if __name__ == '__main__': + suite = unittest.TestLoader().loadTestsFromTestCase(TestParserAArch64v81) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index 2290e0f..7bd77c0 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -14,7 +14,7 @@ from osaca.parser import ParserX86ATT class TestParserX86ATT(unittest.TestCase): def setUp(self): self.parser = ParserX86ATT() - with open(self._find_file('triad-iaca.s')) as f: + with open(self._find_file('triad-x86-iaca.s')) as f: self.triad_code = f.read() ################## @@ -75,8 +75,8 @@ class TestParserX86ATT(unittest.TestCase): instr2 = 'jb ..B1.4 \t' instr3 = ' movl $222,%ebx #IACA END' instr4 = 'vmovss %xmm4, -4(%rsp,%rax,8) #12.9' - instr5 = 'mov %ebx, var(,1)' - instr6 = 'lea (,%rax,8), %rbx' + instr5 = 'mov %ebx,var(,1)' + instr6 = 'lea (,%rax,8),%rbx' parsed_1 = self.parser.parse_instruction(instr1) parsed_2 = self.parser.parse_instruction(instr2) @@ -86,41 +86,43 @@ class TestParserX86ATT(unittest.TestCase): parsed_6 = self.parser.parse_instruction(instr6) self.assertEqual(parsed_1['instruction'], 'vcvtsi2ss') - self.assertEqual(parsed_1['operands']['destination']['register']['name'], 'xmm2') - self.assertEqual(parsed_1['operands']['sources'][0]['register']['name'], 'edx') + self.assertEqual(parsed_1['operands']['destination'][0]['register']['name'], 'xmm2') + self.assertEqual(parsed_1['operands']['source'][0]['register']['name'], 'edx') self.assertEqual(parsed_1['comment'], '12.27') self.assertEqual(parsed_2['instruction'], 'jb') - self.assertEqual(parsed_2['operands']['destination']['identifier'], '..B1.4') - self.assertEqual(len(parsed_2['operands']['sources']), 0) + self.assertEqual(parsed_2['operands']['destination'][0]['identifier']['name'], '..B1.4') + self.assertEqual(len(parsed_2['operands']['source']), 0) self.assertIsNone(parsed_2['comment']) self.assertEqual(parsed_3['instruction'], 'movl') - self.assertEqual(parsed_3['operands']['destination']['register']['name'], 'ebx') - self.assertEqual(parsed_3['operands']['sources'][0]['immediate']['value'], '222') + self.assertEqual(parsed_3['operands']['destination'][0]['register']['name'], 'ebx') + self.assertEqual(parsed_3['operands']['source'][0]['immediate']['value'], '222') self.assertEqual(parsed_3['comment'], 'IACA END') self.assertEqual(parsed_4['instruction'], 'vmovss') - self.assertEqual(parsed_4['operands']['destination']['memory']['offset'], '-4') - self.assertEqual(parsed_4['operands']['destination']['memory']['base']['name'], 'rsp') - self.assertEqual(parsed_4['operands']['destination']['memory']['index']['name'], 'rax') - self.assertEqual(parsed_4['operands']['destination']['memory']['scale'], '8') - self.assertEqual(parsed_4['operands']['sources'][0]['register']['name'], 'xmm4') + self.assertEqual(parsed_4['operands']['destination'][0]['memory']['offset']['value'], '-4') + self.assertEqual(parsed_4['operands']['destination'][0]['memory']['base']['name'], 'rsp') + self.assertEqual(parsed_4['operands']['destination'][0]['memory']['index']['name'], 'rax') + self.assertEqual(parsed_4['operands']['destination'][0]['memory']['scale'], '8') + self.assertEqual(parsed_4['operands']['source'][0]['register']['name'], 'xmm4') self.assertEqual(parsed_4['comment'], '12.9') self.assertEqual(parsed_5['instruction'], 'mov') - self.assertEqual(parsed_5['operands']['destination']['memory']['offset'], 'var') - self.assertIsNone(parsed_5['operands']['destination']['memory']['base']) - self.assertIsNone(parsed_5['operands']['destination']['memory']['index']) - self.assertEqual(parsed_5['operands']['destination']['memory']['scale'], '1') - self.assertEqual(parsed_5['operands']['sources'][0]['register']['name'], 'ebx') + self.assertEqual( + parsed_5['operands']['destination'][0]['memory']['offset']['identifier']['name'], 'var' + ) + self.assertIsNone(parsed_5['operands']['destination'][0]['memory']['base']) + self.assertIsNone(parsed_5['operands']['destination'][0]['memory']['index']) + self.assertEqual(parsed_5['operands']['destination'][0]['memory']['scale'], '1') + self.assertEqual(parsed_5['operands']['source'][0]['register']['name'], 'ebx') self.assertEqual(parsed_6['instruction'], 'lea') - self.assertIsNone(parsed_6['operands']['sources'][0]['memory']['offset']) - self.assertIsNone(parsed_6['operands']['sources'][0]['memory']['base']) - self.assertEqual(parsed_6['operands']['sources'][0]['memory']['index']['name'], 'rax') - self.assertEqual(parsed_6['operands']['sources'][0]['memory']['scale'], '8') - self.assertEqual(parsed_6['operands']['destination']['register']['name'], 'rbx') + self.assertIsNone(parsed_6['operands']['source'][0]['memory']['offset']) + self.assertIsNone(parsed_6['operands']['source'][0]['memory']['base']) + self.assertEqual(parsed_6['operands']['source'][0]['memory']['index']['name'], 'rax') + self.assertEqual(parsed_6['operands']['source'][0]['memory']['scale'], '8') + self.assertEqual(parsed_6['operands']['destination'][0]['register']['name'], 'rbx') def test_parse_line(self): line_comment = '# -- Begin main' @@ -155,22 +157,22 @@ class TestParserX86ATT(unittest.TestCase): instruction_form_4 = { 'instruction': 'lea', 'operands': { - 'sources': [ + 'source': [ { 'memory': { - 'offset': '2', + 'offset': {'value': '2'}, 'base': {'name': 'rax'}, 'index': {'name': 'rax'}, 'scale': '1', } } ], - 'destination': {'register': {'name': 'ecx'}}, + 'destination': [{'register': {'name': 'ecx'}}], }, 'directive': None, - 'comment': '-- Begin main', + 'comment': '12.9', 'label': None, - 'line_number': 1, + 'line_number': 4, } parsed_1 = self.parser.parse_line(line_comment, 1) @@ -181,7 +183,7 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(parsed_1, instruction_form_1) self.assertEqual(parsed_2, instruction_form_2) self.assertEqual(parsed_3, instruction_form_3) - self.assertEqual(parsed_4['operands'], instruction_form_4['operands']) + self.assertEqual(parsed_4, instruction_form_4) def test_parse_file(self): parsed = self.parser.parse_file(self.triad_code) @@ -192,13 +194,21 @@ class TestParserX86ATT(unittest.TestCase): # Helper functions ################## def _get_comment(self, parser, comment): - return ' '.join(parser.comment.parseString(comment, parseAll=True).asDict()['comment']) + return ' '.join( + parser._process_operand(parser.comment.parseString(comment, parseAll=True).asDict())[ + 'comment' + ] + ) def _get_label(self, parser, label): - return parser.label.parseString(label, parseAll=True).asDict()['label'] + return parser._process_operand(parser.label.parseString(label, parseAll=True).asDict())[ + 'label' + ] def _get_directive(self, parser, directive): - return parser.directive.parseString(directive, parseAll=True).asDict()['directive'] + return parser._process_operand( + parser.directive.parseString(directive, parseAll=True).asDict() + )['directive'] @staticmethod def _find_file(name): @@ -206,3 +216,8 @@ class TestParserX86ATT(unittest.TestCase): name = os.path.join(testdir, 'test_files', name) assert os.path.exists(name) return name + + +if __name__ == '__main__': + suite = unittest.TestLoader().loadTestsFromTestCase(TestParserX86ATT) + unittest.TextTestRunner(verbosity=2).run(suite)