diff --git a/osaca/parser/parser_AArch64.py b/osaca/parser/parser_AArch64.py index fd8a681..5b0598c 100755 --- a/osaca/parser/parser_AArch64.py +++ b/osaca/parser/parser_AArch64.py @@ -256,7 +256,9 @@ class ParserAArch64(BaseParser): # 2. Parse label if result is None: try: - result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())[0] + result = self.process_operand( + self.label.parseString(line, parseAll=True).asDict() + )[0] result = AttrDict.convert_dict(result) instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name if self.COMMENT_ID in result[self.LABEL_ID]: @@ -364,6 +366,8 @@ class ParserAArch64(BaseParser): offset = memory_address.get("offset", None) if isinstance(offset, list) and len(offset) == 1: offset = offset[0] + if offset is not None and "value" in offset: + offset["value"] = int(offset["value"], 0) base = memory_address.get("base", None) index = memory_address.get("index", None) scale = 1 @@ -380,7 +384,12 @@ class ParserAArch64(BaseParser): if "pre_indexed" in memory_address: new_dict["pre_indexed"] = True if "post_indexed" in memory_address: - new_dict["post_indexed"] = memory_address["post_indexed"] + if "value" in memory_address["post_indexed"]: + new_dict["post_indexed"] = {"value": int( + memory_address["post_indexed"]["value"], 0 + )} + else: + new_dict["post_indexed"] = memory_address["post_indexed"] return AttrDict({self.MEMORY_ID: new_dict}) def process_sp_register(self, register): @@ -392,32 +401,31 @@ class ParserAArch64(BaseParser): def resolve_range_list(self, operand): """ Resolve range or list register operand to list of registers. - Returns None if neither list nor range """ if 'register' in operand: if 'list' in operand.register: index = operand.register.get('index') - l = [] + range_list = [] for reg in operand.register.list: reg = deepcopy(reg) if index is not None: - reg.index = index - l.append(AttrDict({self.REGISTER_ID: reg})) - return l + reg['index'] = int(index, 0) + range_list.append(AttrDict({self.REGISTER_ID: reg})) + return range_list elif 'range' in operand.register: base_register = operand.register.range[0] index = operand.register.get('index') - l = [] + range_list = [] start_name = base_register.name end_name = operand.register.range[1].name - for name in range(int(start_name), int(end_name)+1): + for name in range(int(start_name), int(end_name) + 1): reg = deepcopy(base_register) if index is not None: - reg['index'] = operand.register.range.index + reg['index'] = int(index, 0) reg['name'] = str(name) - l.append(AttrDict({self.REGISTER_ID: reg})) - return l + range_list.append(AttrDict({self.REGISTER_ID: reg})) + return range_list def process_register_list(self, register_list): """Post-process register lists (e.g., {r0,r3,r5}) and register ranges (e.g., {r0-r7})""" @@ -447,11 +455,13 @@ class ParserAArch64(BaseParser): if "value" in immediate: # normal integer value immediate["type"] = "int" + # convert hex/bin immediates to dec + immediate["value"] = self.normalize_imd(immediate) return AttrDict({self.IMMEDIATE_ID: immediate}) if "base_immediate" in immediate: # arithmetic immediate, add calculated value as value immediate["shift"] = immediate["shift"][0] - immediate["value"] = int(immediate["base_immediate"]["value"], 0) << int( + immediate["value"] = self.normalize_imd(immediate["base_immediate"]) << int( immediate["shift"]["value"] ) immediate["type"] = "int" @@ -499,10 +509,11 @@ class ParserAArch64(BaseParser): def normalize_imd(self, imd): """Normalize immediate to decimal based representation""" if "value" in imd: - if imd["value"].lower().startswith("0x"): - # hex, return decimal - return int(imd["value"], 16) - return int(imd["value"], 10) + if isinstance(imd["value"], str): + # hex or bin, return decimal + return int(imd["value"], 0) + else: + return imd["value"] elif "float" in imd: return self.ieee_to_float(imd["float"]) elif "double" in imd: diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 110b431..5c2a493 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -327,9 +327,14 @@ class ParserX86ATT(BaseParser): offset = memory_address.get("offset", None) base = memory_address.get("base", None) index = memory_address.get("index", None) - scale = 1 if "scale" not in memory_address else int(memory_address["scale"]) + scale = 1 if "scale" not in memory_address else int(memory_address["scale"], 0) if isinstance(offset, str) and base is None and index is None: - offset = {"value": offset} + try: + offset = {"value": int(offset, 0)} + except ValueError: + offset = {"value": offset} + elif offset is not None and "value" in offset: + offset["value"] = int(offset["value"], 0) new_dict = AttrDict({"offset": offset, "base": base, "index": index, "scale": scale}) # Add segmentation extension if existing if self.SEGMENT_EXT_ID in memory_address: @@ -347,7 +352,8 @@ class ParserX86ATT(BaseParser): if "identifier" in immediate: # actually an identifier, change declaration return immediate - # otherwise nothing to do + # otherwise just make sure the immediate is a decimal + immediate["value"] = int(immediate["value"], 0) return AttrDict({self.IMMEDIATE_ID: immediate}) def get_full_reg_name(self, register): @@ -358,10 +364,11 @@ class ParserX86ATT(BaseParser): def normalize_imd(self, imd): """Normalize immediate to decimal based representation""" if "value" in imd: - if imd["value"].lower().startswith("0x"): - # hex, return decimal - return int(imd["value"], 16) - return int(imd["value"], 10) + if isinstance(imd["value"], str): + # return decimal + return int(imd["value"], 0) + else: + return imd["value"] # identifier return imd diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index b450c17..5889eb3 100755 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -163,16 +163,16 @@ class ISASemantics(object): if only_postindexed: for o in instruction_form.operands: if 'post_indexed' in o.get('memory', {}): - base_name = o.memory.base.get('prefix', '')+o.memory.base.name + base_name = o.memory.base.get('prefix', '') + o.memory.base.name return {base_name: { - 'name': o.memory.base.get('prefix', '')+o.memory.base.name, - 'value': int(o.memory.post_indexed.value) + 'name': o.memory.base.get('prefix', '') + o.memory.base.name, + 'value': o.memory.post_indexed.value }} return {} reg_operand_names = {} # e.g., {'rax': 'op1'} operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged - + for o in instruction_form.operands: if 'pre_indexed' in o.get('memory', {}): # Assuming no isa_data.operation @@ -180,24 +180,24 @@ class ISASemantics(object): raise ValueError( "ISA information for pre-indexed instruction {!r} has operation set." "This is currently not supprted.".format(instruction_form.line)) - base_name = o.memory.base.get('prefix', '')+o.memory.base.name + base_name = o.memory.base.get('prefix', '') + o.memory.base.name reg_operand_names = {base_name: 'op1'} operand_state = {'op1': { 'name': base_name, - 'value': int(o.memory.offset.value) + 'value': o.memory.offset.value }} if isa_data is not None and 'operation' in isa_data: for i, o in enumerate(instruction_form.operands): - operand_name = "op{}".format(i+1) + operand_name = "op{}".format(i + 1) if "register" in o: - o_reg_name = o["register"].get('prefix', '')+o["register"]["name"] + o_reg_name = o["register"].get('prefix', '') + o["register"]["name"] reg_operand_names[o_reg_name] = operand_name operand_state[operand_name] = { 'name': o_reg_name, 'value': 0} elif "immediate" in o: - operand_state[operand_name] = {'value': int(o["immediate"]["value"])} + operand_state[operand_name] = {'value': o["immediate"]["value"]} elif "memory" in o: # TODO lea needs some thinking about pass @@ -212,7 +212,7 @@ class ISASemantics(object): """ Create operand dictionary containing src/dst operands out of the ISA data entry and the oeprands of an instruction form - + If breaks_pedendency_on_equal_operands is True (configuted per instruction in ISA db) and all operands are equal, place operand into destination only. diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index 97c3a62..b3a8af6 100755 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -382,9 +382,9 @@ class KernelDG(nx.DiGraph): # determine absolute address change addr_change = 0 if src.offset and "value" in src.offset: - addr_change += int(src.offset.value, 0) + addr_change += src.offset.value if mem.offset: - addr_change -= int(mem.offset.value, 0) + addr_change -= mem.offset.value if mem.base and src.base: base_change = register_changes.get( src.base.get('prefix', '') + src.base.name, diff --git a/tests/test_parser_AArch64.py b/tests/test_parser_AArch64.py index eddb725..723454f 100755 --- a/tests/test_parser_AArch64.py +++ b/tests/test_parser_AArch64.py @@ -102,7 +102,7 @@ class TestParserAArch64(unittest.TestCase): self.assertEqual(parsed_3.instruction, "mov") self.assertEqual(parsed_3.operands[0].register.name, "2") self.assertEqual(parsed_3.operands[0].register.prefix, "x") - self.assertEqual(parsed_3.operands[1].immediate.value, "0x222") + self.assertEqual(parsed_3.operands[1].immediate.value, int("0x222", 0)) self.assertEqual(parsed_3.comment, "NOT IACA END") self.assertEqual(parsed_4.instruction, "str") @@ -208,7 +208,7 @@ class TestParserAArch64(unittest.TestCase): {"prfop": {"type": ["PLD"], "target": ["L1"], "policy": ["KEEP"]}}, { "memory": { - "offset": {"value": "2048"}, + "offset": {"value": 2048}, "base": {"prefix": "x", "name": "26"}, "index": None, "scale": 1, @@ -228,7 +228,7 @@ class TestParserAArch64(unittest.TestCase): {"register": {"prefix": "x", "name": "30"}}, { "memory": { - "offset": {"value": "-16"}, + "offset": {"value": -16}, "base": {"name": "sp", "prefix": "x"}, "index": None, "scale": 1, @@ -253,7 +253,7 @@ class TestParserAArch64(unittest.TestCase): "base": {"prefix": "x", "name": "11"}, "index": None, "scale": 1, - "post_indexed": {"value": "64"}, + "post_indexed": {"value": 64}, } }, ], @@ -270,7 +270,7 @@ class TestParserAArch64(unittest.TestCase): {"register": {"prefix": "p", "name": "0", "predication": "m"}}, {"register": {"prefix": "z", "name": "29", "shape": "d"}}, {"register": {"prefix": "z", "name": "21", "shape": "d"}}, - {"immediate": {"value": "90", "type": "int"}}, + {"immediate": {"value": 90, "type": "int"}}, ], "directive": None, "comment": None, @@ -327,16 +327,28 @@ class TestParserAArch64(unittest.TestCase): def test_multiple_regs(self): instr_range = "PUSH {x5-x7}" instr_list = "POP {x5, x6, x7}" + instr_range_with_index = "ld4 {v0.S - v3.S}[2]" + instr_list_with_index = "ld4 {v0.S, v1.S, v2.S, v3.S}[2]" reg_list = [ AttrDict({"register": {"prefix": "x", "name": "5"}}), AttrDict({"register": {"prefix": "x", "name": "6"}}), AttrDict({"register": {"prefix": "x", "name": "7"}}), ] + reg_list_idx = [ + AttrDict({"register": {"prefix": "v", "name": "0", "shape": "S", "index": 2}}), + AttrDict({"register": {"prefix": "v", "name": "1", "shape": "S", "index": 2}}), + AttrDict({"register": {"prefix": "v", "name": "2", "shape": "S", "index": 2}}), + AttrDict({"register": {"prefix": "v", "name": "3", "shape": "S", "index": 2}}), + ] prange = self.parser.parse_line(instr_range) plist = self.parser.parse_line(instr_list) + p_idx_range = self.parser.parse_line(instr_range_with_index) + p_idx_list = self.parser.parse_line(instr_list_with_index) self.assertEqual(prange.operands, reg_list) self.assertEqual(plist.operands, reg_list) + self.assertEqual(p_idx_range.operands, reg_list_idx) + self.assertEqual(p_idx_list.operands, reg_list_idx) def test_reg_dependency(self): reg_1_1 = AttrDict({"prefix": "b", "name": "1"}) diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index c9f2e89..57b2e71 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -120,12 +120,12 @@ class TestParserX86ATT(unittest.TestCase): self.assertIsNone(parsed_2.comment) self.assertEqual(parsed_3.instruction, "movl") - self.assertEqual(parsed_3.operands[0].immediate.value, "222") + self.assertEqual(parsed_3.operands[0].immediate.value, 222) self.assertEqual(parsed_3.operands[1].register.name, "ebx") self.assertEqual(parsed_3.comment, "IACA END") self.assertEqual(parsed_4.instruction, "vmovss") - self.assertEqual(parsed_4.operands[1].memory.offset.value, "-4") + self.assertEqual(parsed_4.operands[1].memory.offset.value, -4) self.assertEqual(parsed_4.operands[1].memory.base.name, "rsp") self.assertEqual(parsed_4.operands[1].memory.index.name, "rax") self.assertEqual(parsed_4.operands[1].memory.scale, 8) @@ -146,7 +146,7 @@ class TestParserX86ATT(unittest.TestCase): self.assertEqual(parsed_6.operands[0].memory.scale, 8) self.assertEqual(parsed_6.operands[1].register.name, "rbx") - self.assertEqual(parsed_7.operands[0].immediate.value, "0x1") + self.assertEqual(parsed_7.operands[0].immediate.value, 0x1) self.assertEqual(parsed_7.operands[1].register.name, "xmm0") self.assertEqual(parsed_7.operands[2].register.name, "ymm1") self.assertEqual(parsed_7.operands[3].register.name, "ymm1") @@ -189,7 +189,7 @@ class TestParserX86ATT(unittest.TestCase): "operands": [ { "memory": { - "offset": {"value": "2"}, + "offset": {"value": 2}, "base": {"name": "rax"}, "index": {"name": "rax"}, "scale": 1, @@ -240,7 +240,7 @@ class TestParserX86ATT(unittest.TestCase): imd_decimal_1 = {"value": "79"} imd_hex_1 = {"value": "0x4f"} imd_decimal_2 = {"value": "8"} - imd_hex_2 = {"value": "0x8"} + imd_hex_2 = {"value": "8"} self.assertEqual( self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1) )