diff --git a/osaca/parser/parser_x86intel.py b/osaca/parser/parser_x86intel.py index cb4af3d..503c572 100644 --- a/osaca/parser/parser_x86intel.py +++ b/osaca/parser/parser_x86intel.py @@ -318,28 +318,45 @@ class ParserX86Intel(ParserX86): base_register = self.register index_register = self.register scale = pp.Word("1248", exact=1) - post_displacement = pp.Group( - (pp.Literal("+") ^ pp.Literal("-")).setResultsName("sign") + integer_number - | identifier - ).setResultsName(self.immediate_id) - pre_displacement = pp.Group(integer_number + pp.Literal("+")).setResultsName( - self.immediate_id + + base = base_register.setResultsName("base") + displacement = pp.Group( + pp.Group( + integer_number ^ identifier + ).setResultsName(self.immediate_id) + ).setResultsName("displacement") + short_indexed = index_register.setResultsName("index") + long_indexed = ( + index_register.setResultsName("index") + + pp.Literal("*") + + scale.setResultsName("scale") ) indexed = pp.Group( - index_register.setResultsName("index") - + pp.Optional(pp.Literal("*") + scale.setResultsName("scale")) + short_indexed + ^ long_indexed ).setResultsName("indexed") + operator = pp.Word("+-", exact=1) + + # Syntax: + # `base` always preceedes `indexed`. + # `short_indexed` is only allowed if it follows `base`, not alone. + # `displacement` can go anywhere. + # It's easier to list all the alternatives than to represent these rules using complicated + # `Optional` and what not. register_expression = pp.Group( pp.Literal("[") - + pp.Optional(pp.Group(pre_displacement).setResultsName("pre_displacement")) - + pp.Group( - base_register.setResultsName("base") - ^ pp.Group( - base_register.setResultsName("base") + pp.Literal("+") + indexed - ).setResultsName("base_and_indexed") - ^ indexed - ).setResultsName("non_displacement") - + pp.Optional(pp.Group(post_displacement).setResultsName("post_displacement")) + + ( + base + ^ (base + operator + displacement) + ^ (base + operator + displacement + operator + indexed) + ^ (base + operator + indexed) + ^ (base + operator + indexed + operator + displacement) + ^ (displacement + operator + base) + ^ (displacement + operator + base + operator + indexed) + ^ (displacement + operator + pp.Group(long_indexed).setResultsName("indexed")) + ^ pp.Group(long_indexed).setResultsName("indexed") + ^ (pp.Group(long_indexed).setResultsName("indexed") + operator + displacement) + ) + pp.Literal("]") ).setResultsName("register_expression") @@ -640,33 +657,16 @@ class ParserX86Intel(ParserX86): return RegisterOperand(name=operand.name) def process_register_expression(self, register_expression): - pre_displacement = register_expression.get("pre_displacement") - post_displacement = register_expression.get("post_displacement") - non_displacement = register_expression.get("non_displacement") - base = None - indexed = None - if non_displacement: - base_and_indexed = non_displacement.get("base_and_indexed") - if base_and_indexed: - base = base_and_indexed.get("base") - indexed = base_and_indexed.get("indexed") - else: - base = non_displacement.get("base") - if not base: - indexed = non_displacement.get("indexed") + base = register_expression.get("base") + displacement = register_expression.get("displacement") + indexed = register_expression.get("indexed") + index = None + scale = 1 if indexed: index = indexed.get("index") scale = int(indexed.get("scale", "1"), 0) - else: - index = None - scale = 1 displacement_op = ( - self.process_immediate(pre_displacement.immediate) if pre_displacement else None - ) - displacement_op = ( - self.process_immediate(post_displacement.immediate) - if post_displacement - else displacement_op + self.process_immediate(displacement.immediate) if displacement else None ) base_op = RegisterOperand(name=base.name) if base else None index_op = RegisterOperand(name=index.name) if index else None diff --git a/tests/test_files/gs_x86_gcc.s b/tests/test_files/gs_x86_gcc.s new file mode 100644 index 0000000..307e9e6 --- /dev/null +++ b/tests/test_files/gs_x86_gcc.s @@ -0,0 +1,102 @@ +# Produced with gcc 14.2 with -O3 -march=sapphirerapids -fopenmp-simd -mprefer-vector-width=512, https://godbolt.org/z/drE47x1b4. +.LC3: + .string "%f\n" +main: + push r14 + xor edi, edi + push r13 + push r12 + push rbp + push rbx + call time + mov edi, eax + call srand + mov edi, 1600 + call malloc + mov r12, rax + mov rbp, rax + lea r13, [rax+1600] + mov rbx, rax +.L2: + mov edi, 1600 + add rbx, 8 + call malloc + mov QWORD PTR [rbx-8], rax + cmp r13, rbx + jne .L2 + lea rbx, [r12+8] + lea r13, [r12+1592] +.L5: + mov r14d, 8 +.L4: + call rand + vxorpd xmm2, xmm2, xmm2 + mov rcx, QWORD PTR [rbx] + movsx rdx, eax + mov esi, eax + imul rdx, rdx, 351843721 + sar esi, 31 + sar rdx, 45 + sub edx, esi + imul edx, edx, 100000 + sub eax, edx + vcvtsi2sd xmm0, xmm2, eax + vdivsd xmm0, xmm0, QWORD PTR .LC0[rip] + vmovsd QWORD PTR [rcx+r14], xmm0 + add r14, 8 + cmp r14, 1592 + jne .L4 + add rbx, 8 + cmp r13, rbx + jne .L5 + vmovsd xmm1, QWORD PTR .LC1[rip] + lea rdi, [r12+1584] +.L6: + mov rdx, QWORD PTR [rbp+8] + mov rcx, QWORD PTR [rbp+16] + mov eax, 1 + mov rsi, QWORD PTR [rbp+0] + vmovsd xmm0, QWORD PTR [rdx] +.L7: + vaddsd xmm0, xmm0, QWORD PTR [rcx+rax*8] + vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8] + vaddsd xmm0, xmm0, QWORD PTR [rsi+rax*8] + vmulsd xmm0, xmm0, xmm1 + vmovsd QWORD PTR [rdx+rax*8], xmm0 + inc rax + cmp rax, 199 + jne .L7 + vmovsd xmm0, QWORD PTR [rdx+1592] + add rbp, 8 + vmovsd QWORD PTR [rcx+8], xmm0 + cmp rdi, rbp + jne .L6 + mov rax, QWORD PTR [r12+1584] + vmovsd xmm0, QWORD PTR .LC2[rip] + vucomisd xmm0, QWORD PTR [rax+1584] + jp .L9 + je .L19 +.L9: + pop rbx + xor eax, eax + pop rbp + pop r12 + pop r13 + pop r14 + ret +.L19: + mov rax, QWORD PTR [r12] + mov edi, OFFSET FLAT:.LC3 + vmovsd xmm0, QWORD PTR [rax] + mov eax, 1 + call printf + jmp .L9 +.LC0: + .long 0 + .long 1083129856 +.LC1: + .long 2061584302 + .long 1072934420 +.LC2: + .long -57724360 + .long 1072939201 diff --git a/tests/test_parser_x86intel.py b/tests/test_parser_x86intel.py index 4df441b..303a9ab 100755 --- a/tests/test_parser_x86intel.py +++ b/tests/test_parser_x86intel.py @@ -25,6 +25,8 @@ class TestParserX86Intel(unittest.TestCase): self.triad_iaca_code = f.read() with open(self._find_file("gs_x86_icc.s")) as f: self.gs_icc_code = f.read() + with open(self._find_file("gs_x86_gcc.s")) as f: + self.gs_gcc_code = f.read() ################## # Test @@ -100,6 +102,7 @@ class TestParserX86Intel(unittest.TestCase): instr11 = "\tlea\trcx, OFFSET FLAT:??_R0N@8+8" instr12 = "\tvfmadd213sd xmm0, xmm1, QWORD PTR __real@bfc5555555555555" instr13 = "\tjmp\t$LN18@operator" + instr14 = "vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]" parsed_1 = self.parser.parse_instruction(instr1) parsed_2 = self.parser.parse_instruction(instr2) @@ -114,6 +117,7 @@ class TestParserX86Intel(unittest.TestCase): parsed_11 = self.parser.parse_instruction(instr11) parsed_12 = self.parser.parse_instruction(instr12) parsed_13 = self.parser.parse_instruction(instr13) + parsed_14 = self.parser.parse_instruction(instr14) self.assertEqual(parsed_1.mnemonic, "sub") self.assertEqual(parsed_1.operands[0], RegisterOperand(name="RSP")) @@ -204,6 +208,17 @@ class TestParserX86Intel(unittest.TestCase): self.assertEqual(parsed_13.mnemonic, "jmp") self.assertEqual(parsed_13.operands[0], IdentifierOperand(name="$LN18@operator")) + self.assertEqual(parsed_14.mnemonic, "vaddsd") + self.assertEqual(parsed_14.operands[0], + RegisterOperand(name="XMM0")) + self.assertEqual(parsed_14.operands[1], + RegisterOperand(name="XMM0")) + self.assertEqual(parsed_14.operands[2], + MemoryOperand(base=RegisterOperand(name="RDX"), + offset=ImmediateOperand(value=8), + index=RegisterOperand(name="RAX"), + scale=8)) + def test_parse_line(self): line_comment = "; -- Begin main" line_instruction = "\tret\t0" @@ -344,6 +359,27 @@ class TestParserX86Intel(unittest.TestCase): ) self.assertEqual(len(parsed), 227) + def test_parse_file4(self): + parsed = self.parser.parse_file(self.gs_gcc_code) + self.assertEqual(parsed[0].line_number, 1) + # Check a few lines to make sure that we produced something reasonable. + self.assertEqual(parsed[61], + InstructionForm(mnemonic="vaddsd", + operands=[RegisterOperand("XMM0"), + RegisterOperand("XMM0"), + MemoryOperand(base=RegisterOperand("RDX"), + index=RegisterOperand("RAX"), + scale=8, + offset=ImmediateOperand(value=8))], + line=" vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]", + line_number=62)) + self.assertEqual(parsed[101], + InstructionForm(directive_id=DirectiveOperand(name=".long", + parameters=["1072939201"]), + line=" .long 1072939201", + line_number=102)) + self.assertEqual(len(parsed), 102) + def test_normalize_imd(self): imd_binary = ImmediateOperand(value="1001111B") imd_octal = ImmediateOperand(value="117O")