Compare commits

...

3 Commits

Author SHA1 Message Date
JanLJL
af94ff7a2c Merge branch 'Intel' into intel-syntax 2025-03-14 15:45:29 +01:00
JanLJL
1d3ac100f9 take +- operator of offset/index in mem-addr into account 2025-03-14 15:40:16 +01:00
pleroy
d61330404b Rewrite the parsing of register expressions. GCC, for reasons unknown, put the displacement in the middle.
I am completely restructuring the parser definition so that they are more explicit.  They are more verbose too, but at least I understand what they do.
2025-03-12 22:26:38 +01:00
4 changed files with 85 additions and 64 deletions

View File

@@ -318,28 +318,50 @@ class ParserX86Intel(ParserX86):
base_register = self.register
index_register = self.register
scale = pp.Word("1248", exact=1)
post_displacement = pp.Group(
(pp.Literal("+") ^ pp.Literal("-")).setResultsName("sign") + integer_number
| identifier
).setResultsName(self.immediate_id)
pre_displacement = pp.Group(integer_number + pp.Literal("+")).setResultsName(
self.immediate_id
)
indexed = pp.Group(
base = base_register.setResultsName("base")
displacement = pp.Group(
pp.Group(integer_number ^ identifier).setResultsName(self.immediate_id)
).setResultsName("displacement")
short_indexed = index_register.setResultsName("index")
long_indexed = (
index_register.setResultsName("index")
+ pp.Optional(pp.Literal("*") + scale.setResultsName("scale"))
).setResultsName("indexed")
+ pp.Literal("*")
+ scale.setResultsName("scale")
)
indexed = pp.Group(short_indexed ^ long_indexed).setResultsName("indexed")
operator = pp.Word("+-", exact=1)
operator_index = pp.Word("+-", exact=1).setResultsName("operator_idx")
operator_displacement = pp.Word("+-", exact=1).setResultsName("operator_disp")
# Syntax:
# `base` always preceedes `indexed`.
# `short_indexed` is only allowed if it follows `base`, not alone.
# `displacement` can go anywhere.
# It's easier to list all the alternatives than to represent these rules using complicated
# `Optional` and what not.
register_expression = pp.Group(
pp.Literal("[")
+ pp.Optional(pp.Group(pre_displacement).setResultsName("pre_displacement"))
+ pp.Group(
base_register.setResultsName("base")
^ pp.Group(
base_register.setResultsName("base") + pp.Literal("+") + indexed
).setResultsName("base_and_indexed")
^ indexed
).setResultsName("non_displacement")
+ pp.Optional(pp.Group(post_displacement).setResultsName("post_displacement"))
+ (
base
^ (base + operator_displacement + displacement)
^ (base + operator_displacement + displacement + operator_index + indexed)
^ (base + operator_index + indexed)
^ (base + operator_index + indexed + operator_displacement + displacement)
^ (displacement + operator + base)
^ (displacement + operator + base + operator_index + indexed)
^ (
displacement
+ operator_index
+ pp.Group(long_indexed).setResultsName("indexed")
)
^ pp.Group(long_indexed).setResultsName("indexed")
^ (
pp.Group(long_indexed).setResultsName("indexed")
+ operator_displacement
+ displacement
)
)
+ pp.Literal("]")
).setResultsName("register_expression")
@@ -356,7 +378,7 @@ class ParserX86Intel(ParserX86):
self.register.setResultsName("segment") + pp.Literal(":") + immediate
^ immediate + register_expression
^ register_expression
^ identifier + pp.Optional(pp.Literal("+") + immediate)
^ identifier + pp.Optional(operator + immediate)
).setResultsName("address_expression")
offset_expression = pp.Group(
@@ -640,34 +662,19 @@ class ParserX86Intel(ParserX86):
return RegisterOperand(name=operand.name)
def process_register_expression(self, register_expression):
pre_displacement = register_expression.get("pre_displacement")
post_displacement = register_expression.get("post_displacement")
non_displacement = register_expression.get("non_displacement")
base = None
indexed = None
if non_displacement:
base_and_indexed = non_displacement.get("base_and_indexed")
if base_and_indexed:
base = base_and_indexed.get("base")
indexed = base_and_indexed.get("indexed")
else:
base = non_displacement.get("base")
if not base:
indexed = non_displacement.get("indexed")
base = register_expression.get("base")
displacement = register_expression.get("displacement")
indexed = register_expression.get("indexed")
index = None
scale = 1
if indexed:
index = indexed.get("index")
scale = int(indexed.get("scale", "1"), 0)
else:
index = None
scale = 1
displacement_op = (
self.process_immediate(pre_displacement.immediate) if pre_displacement else None
)
displacement_op = (
self.process_immediate(post_displacement.immediate)
if post_displacement
else displacement_op
)
if register_expression.get("operator_index") == "-":
scale *= -1
displacement_op = self.process_immediate(displacement.immediate) if displacement else None
if displacement_op and register_expression.get("operator_disp") == "-":
displacement_op.value *= -1
base_op = RegisterOperand(name=base.name) if base else None
index_op = RegisterOperand(name=index.name) if index else None
new_memory = MemoryOperand(
@@ -724,6 +731,8 @@ class ParserX86Intel(ParserX86):
if "displacement" in offset_expression
else None
)
if displacement and "operator_disp" == "-":
displacement.value *= -1
identifier = self.process_identifier(offset_expression.identifier)
identifier.offset = displacement
return MemoryOperand(offset=identifier)

View File

@@ -1,3 +1,4 @@
# Produced with gcc 14.2 with -O3 -march=sapphirerapids -fopenmp-simd -mprefer-vector-width=512, https://godbolt.org/z/drE47x1b4.
.LC3:
.string "%f\n"
main:
@@ -56,7 +57,6 @@ main:
mov eax, 1
mov rsi, QWORD PTR [rbp+0]
vmovsd xmm0, QWORD PTR [rdx]
# OSACA-BEGIN
.L7:
vaddsd xmm0, xmm0, QWORD PTR [rcx+rax*8]
vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]
@@ -66,7 +66,6 @@ main:
inc rax
cmp rax, 199
jne .L7
# OSACA-END
vmovsd xmm0, QWORD PTR [rdx+1592]
add rbp, 8
vmovsd QWORD PTR [rcx+8], xmm0

View File

@@ -1,15 +1,15 @@
; Translated from kernel_x86_memdep.s
L4:
vmovsd [rax+8], xmm0
add rax, 8
vmovsd [rax+rcx*8+8], xmm0
vaddsd xmm0, xmm0, [rax]
sub rax, -8
vaddsd xmm0, xmm0, [rax-8]
dec rcx
vaddsd xmm0, xmm0, [rax+rcx*8+8]
mov rdx, rcx
vaddsd xmm0, xmm0, [rax+rdx*8+8]
vmovsd [rax+8], xmm0 # line 3 <---------------------------------+
add rax, 8 # rax=rax_orig+8 |
vmovsd [rax+rcx*8+8], xmm0 # line 5 <------------------------------------------+
vaddsd xmm0, xmm0, [rax] # depends on line 3, rax+8;[rax] == [rax+8] --------+ |
sub rax, -8 # rax=rax_orig+16 | |
vaddsd xmm0, xmm0, [rax-8] # depends on line 3, rax+16;[rax-8] == [rax+8] -----+ |
dec rcx # rcx=rcx_orig-1 |
vaddsd xmm0, xmm0, [rax+rcx*8+8] # depends on line 5, [(rax+8)+(rcx-1)*8+8] == [rax+rcx*+8] --+
mov rdx, rcx # |
vaddsd xmm0, xmm0, [rax+rdx*8+8] # depends on line 5, rcx == rdx -----------------------------+
vmulsd xmm0, xmm0, xmm1
add rax, 8
cmp rsi, rax

View File

@@ -102,6 +102,7 @@ class TestParserX86Intel(unittest.TestCase):
instr11 = "\tlea\trcx, OFFSET FLAT:??_R0N@8+8"
instr12 = "\tvfmadd213sd xmm0, xmm1, QWORD PTR __real@bfc5555555555555"
instr13 = "\tjmp\t$LN18@operator"
instr14 = "vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]"
parsed_1 = self.parser.parse_instruction(instr1)
parsed_2 = self.parser.parse_instruction(instr2)
@@ -116,6 +117,7 @@ class TestParserX86Intel(unittest.TestCase):
parsed_11 = self.parser.parse_instruction(instr11)
parsed_12 = self.parser.parse_instruction(instr12)
parsed_13 = self.parser.parse_instruction(instr13)
parsed_14 = self.parser.parse_instruction(instr14)
self.assertEqual(parsed_1.mnemonic, "sub")
self.assertEqual(parsed_1.operands[0], RegisterOperand(name="RSP"))
@@ -206,6 +208,19 @@ class TestParserX86Intel(unittest.TestCase):
self.assertEqual(parsed_13.mnemonic, "jmp")
self.assertEqual(parsed_13.operands[0], IdentifierOperand(name="$LN18@operator"))
self.assertEqual(parsed_14.mnemonic, "vaddsd")
self.assertEqual(parsed_14.operands[0], RegisterOperand(name="XMM0"))
self.assertEqual(parsed_14.operands[1], RegisterOperand(name="XMM0"))
self.assertEqual(
parsed_14.operands[2],
MemoryOperand(
base=RegisterOperand(name="RDX"),
offset=ImmediateOperand(value=8),
index=RegisterOperand(name="RAX"),
scale=8,
),
)
def test_parse_line(self):
line_comment = "; -- Begin main"
line_instruction = "\tret\t0"
@@ -364,21 +379,19 @@ class TestParserX86Intel(unittest.TestCase):
offset=ImmediateOperand(value=8),
),
],
comment_id=None,
line=" vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]",
line_number=62,
),
)
self.assertEqual(
parsed[95],
parsed[101],
InstructionForm(
directive_id=DirectiveOperand(name=".long", parameters=["0"]),
line=" .long 0",
line_number=96,
directive_id=DirectiveOperand(name=".long", parameters=["1072939201"]),
line=" .long 1072939201",
line_number=102,
),
)
self.assertEqual(len(parsed), 103)
self.assertEqual(len(parsed), 102)
def test_normalize_imd(self):
imd_binary = ImmediateOperand(value="1001111B")