diff --git a/osaca/parser/parser_x86intel.py b/osaca/parser/parser_x86intel.py index 503c572..8802b59 100644 --- a/osaca/parser/parser_x86intel.py +++ b/osaca/parser/parser_x86intel.py @@ -321,9 +321,7 @@ class ParserX86Intel(ParserX86): base = base_register.setResultsName("base") displacement = pp.Group( - pp.Group( - integer_number ^ identifier - ).setResultsName(self.immediate_id) + pp.Group(integer_number ^ identifier).setResultsName(self.immediate_id) ).setResultsName("displacement") short_indexed = index_register.setResultsName("index") long_indexed = ( @@ -331,11 +329,10 @@ class ParserX86Intel(ParserX86): + pp.Literal("*") + scale.setResultsName("scale") ) - indexed = pp.Group( - short_indexed - ^ long_indexed - ).setResultsName("indexed") + indexed = pp.Group(short_indexed ^ long_indexed).setResultsName("indexed") operator = pp.Word("+-", exact=1) + operator_index = pp.Word("+-", exact=1).setResultsName("operator_idx") + operator_displacement = pp.Word("+-", exact=1).setResultsName("operator_disp") # Syntax: # `base` always preceedes `indexed`. @@ -347,16 +344,24 @@ class ParserX86Intel(ParserX86): pp.Literal("[") + ( base - ^ (base + operator + displacement) - ^ (base + operator + displacement + operator + indexed) - ^ (base + operator + indexed) - ^ (base + operator + indexed + operator + displacement) + ^ (base + operator_displacement + displacement) + ^ (base + operator_displacement + displacement + operator_index + indexed) + ^ (base + operator_index + indexed) + ^ (base + operator_index + indexed + operator_displacement + displacement) ^ (displacement + operator + base) - ^ (displacement + operator + base + operator + indexed) - ^ (displacement + operator + pp.Group(long_indexed).setResultsName("indexed")) + ^ (displacement + operator + base + operator_index + indexed) + ^ ( + displacement + + operator_index + + pp.Group(long_indexed).setResultsName("indexed") + ) ^ pp.Group(long_indexed).setResultsName("indexed") - ^ (pp.Group(long_indexed).setResultsName("indexed") + operator + displacement) - ) + ^ ( + pp.Group(long_indexed).setResultsName("indexed") + + operator_displacement + + displacement + ) + ) + pp.Literal("]") ).setResultsName("register_expression") @@ -373,7 +378,7 @@ class ParserX86Intel(ParserX86): self.register.setResultsName("segment") + pp.Literal(":") + immediate ^ immediate + register_expression ^ register_expression - ^ identifier + pp.Optional(pp.Literal("+") + immediate) + ^ identifier + pp.Optional(operator + immediate) ).setResultsName("address_expression") offset_expression = pp.Group( @@ -665,9 +670,11 @@ class ParserX86Intel(ParserX86): if indexed: index = indexed.get("index") scale = int(indexed.get("scale", "1"), 0) - displacement_op = ( - self.process_immediate(displacement.immediate) if displacement else None - ) + if register_expression.get("operator_index") == "-": + scale *= -1 + displacement_op = self.process_immediate(displacement.immediate) if displacement else None + if displacement_op and register_expression.get("operator_disp") == "-": + displacement_op.value *= -1 base_op = RegisterOperand(name=base.name) if base else None index_op = RegisterOperand(name=index.name) if index else None new_memory = MemoryOperand( @@ -724,6 +731,8 @@ class ParserX86Intel(ParserX86): if "displacement" in offset_expression else None ) + if displacement and "operator_disp" == "-": + displacement.value *= -1 identifier = self.process_identifier(offset_expression.identifier) identifier.offset = displacement return MemoryOperand(offset=identifier) diff --git a/tests/test_files/kernel_x86_intel_memdep.s b/tests/test_files/kernel_x86_intel_memdep.s index e94caeb..98ef9fc 100644 --- a/tests/test_files/kernel_x86_intel_memdep.s +++ b/tests/test_files/kernel_x86_intel_memdep.s @@ -1,15 +1,15 @@ ; Translated from kernel_x86_memdep.s L4: - vmovsd [rax+8], xmm0 - add rax, 8 - vmovsd [rax+rcx*8+8], xmm0 - vaddsd xmm0, xmm0, [rax] - sub rax, -8 - vaddsd xmm0, xmm0, [rax-8] - dec rcx - vaddsd xmm0, xmm0, [rax+rcx*8+8] - mov rdx, rcx - vaddsd xmm0, xmm0, [rax+rdx*8+8] + vmovsd [rax+8], xmm0 # line 3 <---------------------------------+ + add rax, 8 # rax=rax_orig+8 | + vmovsd [rax+rcx*8+8], xmm0 # line 5 <------------------------------------------+ + vaddsd xmm0, xmm0, [rax] # depends on line 3, rax+8;[rax] == [rax+8] --------+ | + sub rax, -8 # rax=rax_orig+16 | | + vaddsd xmm0, xmm0, [rax-8] # depends on line 3, rax+16;[rax-8] == [rax+8] -----+ | + dec rcx # rcx=rcx_orig-1 | + vaddsd xmm0, xmm0, [rax+rcx*8+8] # depends on line 5, [(rax+8)+(rcx-1)*8+8] == [rax+rcx*+8] --+ + mov rdx, rcx # | + vaddsd xmm0, xmm0, [rax+rdx*8+8] # depends on line 5, rcx == rdx -----------------------------+ vmulsd xmm0, xmm0, xmm1 add rax, 8 cmp rsi, rax diff --git a/tests/test_parser_x86intel.py b/tests/test_parser_x86intel.py index 303a9ab..1918810 100755 --- a/tests/test_parser_x86intel.py +++ b/tests/test_parser_x86intel.py @@ -209,15 +209,17 @@ class TestParserX86Intel(unittest.TestCase): self.assertEqual(parsed_13.operands[0], IdentifierOperand(name="$LN18@operator")) self.assertEqual(parsed_14.mnemonic, "vaddsd") - self.assertEqual(parsed_14.operands[0], - RegisterOperand(name="XMM0")) - self.assertEqual(parsed_14.operands[1], - RegisterOperand(name="XMM0")) - self.assertEqual(parsed_14.operands[2], - MemoryOperand(base=RegisterOperand(name="RDX"), - offset=ImmediateOperand(value=8), - index=RegisterOperand(name="RAX"), - scale=8)) + self.assertEqual(parsed_14.operands[0], RegisterOperand(name="XMM0")) + self.assertEqual(parsed_14.operands[1], RegisterOperand(name="XMM0")) + self.assertEqual( + parsed_14.operands[2], + MemoryOperand( + base=RegisterOperand(name="RDX"), + offset=ImmediateOperand(value=8), + index=RegisterOperand(name="RAX"), + scale=8, + ), + ) def test_parse_line(self): line_comment = "; -- Begin main" @@ -363,21 +365,32 @@ class TestParserX86Intel(unittest.TestCase): parsed = self.parser.parse_file(self.gs_gcc_code) self.assertEqual(parsed[0].line_number, 1) # Check a few lines to make sure that we produced something reasonable. - self.assertEqual(parsed[61], - InstructionForm(mnemonic="vaddsd", - operands=[RegisterOperand("XMM0"), - RegisterOperand("XMM0"), - MemoryOperand(base=RegisterOperand("RDX"), - index=RegisterOperand("RAX"), - scale=8, - offset=ImmediateOperand(value=8))], - line=" vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]", - line_number=62)) - self.assertEqual(parsed[101], - InstructionForm(directive_id=DirectiveOperand(name=".long", - parameters=["1072939201"]), - line=" .long 1072939201", - line_number=102)) + self.assertEqual( + parsed[61], + InstructionForm( + mnemonic="vaddsd", + operands=[ + RegisterOperand("XMM0"), + RegisterOperand("XMM0"), + MemoryOperand( + base=RegisterOperand("RDX"), + index=RegisterOperand("RAX"), + scale=8, + offset=ImmediateOperand(value=8), + ), + ], + line=" vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]", + line_number=62, + ), + ) + self.assertEqual( + parsed[101], + InstructionForm( + directive_id=DirectiveOperand(name=".long", parameters=["1072939201"]), + line=" .long 1072939201", + line_number=102, + ), + ) self.assertEqual(len(parsed), 102) def test_normalize_imd(self):