flake8 formatting

This commit is contained in:
JanLJL
2025-03-05 10:19:10 +01:00
parent 5cd6b2cf9d
commit 02716e7b41
8 changed files with 80 additions and 83 deletions

View File

@@ -11,7 +11,7 @@ from ruamel.yaml import YAML
from osaca.db_interface import import_benchmark_output, sanity_check from osaca.db_interface import import_benchmark_output, sanity_check
from osaca.frontend import Frontend from osaca.frontend import Frontend
from osaca.parser import BaseParser, ParserAArch64, ParserX86, ParserX86ATT, ParserX86Intel from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT, ParserX86Intel
from osaca.semantics import ( from osaca.semantics import (
INSTR_FLAGS, INSTR_FLAGS,
ArchSemantics, ArchSemantics,
@@ -355,7 +355,7 @@ def inspect(args, output_file=sys.stdout):
(arch, syntax) (arch, syntax)
for arch in archs_to_try for arch in archs_to_try
for syntax in syntaxes_to_try for syntax in syntaxes_to_try
if (syntax != None) == (MachineModel.get_isa_for_arch(arch) == "x86") if (syntax is not None) == (MachineModel.get_isa_for_arch(arch) == "x86")
] ]
# Parse file. # Parse file.

View File

@@ -1,8 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import string
import re
import pyparsing as pp import pyparsing as pp
from osaca.parser import ParserX86 from osaca.parser import ParserX86
@@ -34,11 +31,11 @@ class ParserX86ATT(ParserX86):
InstructionForm( InstructionForm(
mnemonic="mov", mnemonic="mov",
operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")] operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")]
), ),
InstructionForm( InstructionForm(
mnemonic="movl", mnemonic="movl",
operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")] operands=[ImmediateOperand(value=111), RegisterOperand(name="ebx")]
) )
], ],
InstructionForm( InstructionForm(
directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"]) directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"])
@@ -51,11 +48,11 @@ class ParserX86ATT(ParserX86):
InstructionForm( InstructionForm(
mnemonic="mov", mnemonic="mov",
operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")] operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")]
), ),
InstructionForm( InstructionForm(
mnemonic="movl", mnemonic="movl",
operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")] operands=[ImmediateOperand(value=222), RegisterOperand(name="ebx")]
) )
], ],
InstructionForm( InstructionForm(
directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"]) directive_id=DirectiveOperand(name="byte", parameters=["100", "103", "144"])

View File

@@ -1,8 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import pyparsing as pp import pyparsing as pp
import re
import string
import unicodedata import unicodedata
from osaca.parser import ParserX86 from osaca.parser import ParserX86
@@ -25,6 +23,7 @@ NON_ASCII_PRINTABLE_CHARACTERS = "".join(
if unicodedata.category(chr(cp)) not in ("Cc", "Zl", "Zp", "Cs", "Cn") if unicodedata.category(chr(cp)) not in ("Cc", "Zl", "Zp", "Cs", "Cn")
) )
# References: # References:
# ASM386 Assembly Language Reference, document number 469165-003, https://mirror.math.princeton.edu/pub/oldlinux/Linux.old/Ref-docs/asm-ref.pdf. # ASM386 Assembly Language Reference, document number 469165-003, https://mirror.math.princeton.edu/pub/oldlinux/Linux.old/Ref-docs/asm-ref.pdf.
# Microsoft Macro Assembler BNF Grammar, https://learn.microsoft.com/en-us/cpp/assembler/masm/masm-bnf-grammar?view=msvc-170. # Microsoft Macro Assembler BNF Grammar, https://learn.microsoft.com/en-us/cpp/assembler/masm/masm-bnf-grammar?view=msvc-170.
@@ -146,16 +145,18 @@ class ParserX86Intel(ParserX86):
# A hack to help with comparison instruction: if the instruction is in the model, and has # A hack to help with comparison instruction: if the instruction is in the model, and has
# exactly two sources, swap its operands. # exactly two sources, swap its operands.
if (model and if (
not has_destination and model
len(instruction_form.operands) == 2 and not has_destination
and len(instruction_form.operands) == 2
and not isa_model.get_instruction( and not isa_model.get_instruction(
mnemonic, mnemonic,
instruction_form.operands instruction_form.operands
) and not arch_model.get_instruction( ) and not arch_model.get_instruction(
mnemonic, mnemonic,
instruction_form.operands instruction_form.operands
)): )
):
instruction_form.operands.reverse() instruction_form.operands.reverse()
# If the instruction has a well-known data type, append a suffix. # If the instruction has a well-known data type, append a suffix.
@@ -175,7 +176,6 @@ class ParserX86Intel(ParserX86):
instruction_form.mnemonic = suffixed_mnemonic instruction_form.mnemonic = suffixed_mnemonic
break break
def construct_parser(self): def construct_parser(self):
"""Create parser for x86 Intel ISA.""" """Create parser for x86 Intel ISA."""
# Numeric literal. # Numeric literal.
@@ -353,12 +353,15 @@ class ParserX86Intel(ParserX86):
(pp.Literal("+") ^ pp.Literal("-")).setResultsName("sign") (pp.Literal("+") ^ pp.Literal("-")).setResultsName("sign")
+ integer_number | identifier + integer_number | identifier
).setResultsName(self.immediate_id) ).setResultsName(self.immediate_id)
pre_displacement = pp.Group(integer_number + pp.Literal("+") pre_displacement = pp.Group(
integer_number + pp.Literal("+")
).setResultsName(self.immediate_id) ).setResultsName(self.immediate_id)
indexed = pp.Group( indexed = pp.Group(
index_register.setResultsName("index") index_register.setResultsName("index")
+ pp.Optional(pp.Literal("*") + pp.Optional(
+ scale.setResultsName("scale")) pp.Literal("*")
+ scale.setResultsName("scale")
)
).setResultsName("indexed") ).setResultsName("indexed")
register_expression = pp.Group( register_expression = pp.Group(
pp.Literal("[") pp.Literal("[")
@@ -370,7 +373,7 @@ class ParserX86Intel(ParserX86):
+ pp.Literal("+") + pp.Literal("+")
+ indexed).setResultsName("base_and_indexed") + indexed).setResultsName("base_and_indexed")
^ indexed ^ indexed
).setResultsName("non_displacement") ).setResultsName("non_displacement")
+ pp.Optional(pp.Group(post_displacement).setResultsName("post_displacement")) + pp.Optional(pp.Group(post_displacement).setResultsName("post_displacement"))
+ pp.Literal("]") + pp.Literal("]")
).setResultsName("register_expression") ).setResultsName("register_expression")
@@ -472,7 +475,7 @@ class ParserX86Intel(ParserX86):
pp.CaselessKeyword("ALIAS") pp.CaselessKeyword("ALIAS")
| pp.CaselessKeyword("ALIGN") | pp.CaselessKeyword("ALIGN")
| pp.CaselessKeyword("ASSUME") | pp.CaselessKeyword("ASSUME")
#| pp.CaselessKeyword("BYTE") # | pp.CaselessKeyword("BYTE")
| pp.CaselessKeyword("CATSTR") | pp.CaselessKeyword("CATSTR")
| pp.CaselessKeyword("COMM") | pp.CaselessKeyword("COMM")
| pp.CaselessKeyword("COMMENT") | pp.CaselessKeyword("COMMENT")
@@ -482,7 +485,7 @@ class ParserX86Intel(ParserX86):
| pp.CaselessKeyword("DQ") | pp.CaselessKeyword("DQ")
| pp.CaselessKeyword("DT") | pp.CaselessKeyword("DT")
| pp.CaselessKeyword("DW") | pp.CaselessKeyword("DW")
#| pp.CaselessKeyword("DWORD") # | pp.CaselessKeyword("DWORD")
| pp.CaselessKeyword("ECHO") | pp.CaselessKeyword("ECHO")
| pp.CaselessKeyword("END") | pp.CaselessKeyword("END")
| pp.CaselessKeyword("ENDP") | pp.CaselessKeyword("ENDP")
@@ -491,14 +494,14 @@ class ParserX86Intel(ParserX86):
| pp.CaselessKeyword("EVEN") | pp.CaselessKeyword("EVEN")
| pp.CaselessKeyword("EXTRN") | pp.CaselessKeyword("EXTRN")
| pp.CaselessKeyword("EXTERNDEF") | pp.CaselessKeyword("EXTERNDEF")
#| pp.CaselessKeyword("FWORD") # | pp.CaselessKeyword("FWORD")
| pp.CaselessKeyword("GROUP") | pp.CaselessKeyword("GROUP")
| pp.CaselessKeyword("INCLUDE") | pp.CaselessKeyword("INCLUDE")
| pp.CaselessKeyword("INCLUDELIB") | pp.CaselessKeyword("INCLUDELIB")
| pp.CaselessKeyword("INSTR") | pp.CaselessKeyword("INSTR")
| pp.CaselessKeyword("INVOKE") | pp.CaselessKeyword("INVOKE")
| pp.CaselessKeyword("LABEL") | pp.CaselessKeyword("LABEL")
#| pp.CaselessKeyword("MMWORD") # | pp.CaselessKeyword("MMWORD")
| pp.CaselessKeyword("OPTION") | pp.CaselessKeyword("OPTION")
| pp.CaselessKeyword("ORG") | pp.CaselessKeyword("ORG")
| pp.CaselessKeyword("PAGE") | pp.CaselessKeyword("PAGE")
@@ -507,27 +510,27 @@ class ParserX86Intel(ParserX86):
| pp.CaselessKeyword("PROTO") | pp.CaselessKeyword("PROTO")
| pp.CaselessKeyword("PUBLIC") | pp.CaselessKeyword("PUBLIC")
| pp.CaselessKeyword("PUSHCONTEXT") | pp.CaselessKeyword("PUSHCONTEXT")
#| pp.CaselessKeyword("QWORD") # | pp.CaselessKeyword("QWORD")
#| pp.CaselessKeyword("REAL10") # | pp.CaselessKeyword("REAL10")
#| pp.CaselessKeyword("REAL4") # | pp.CaselessKeyword("REAL4")
#| pp.CaselessKeyword("REAL8") # | pp.CaselessKeyword("REAL8")
| pp.CaselessKeyword("RECORD") | pp.CaselessKeyword("RECORD")
#| pp.CaselessKeyword("SBYTE") # | pp.CaselessKeyword("SBYTE")
#| pp.CaselessKeyword("SDWORD") # | pp.CaselessKeyword("SDWORD")
| pp.CaselessKeyword("SEGMENT") | pp.CaselessKeyword("SEGMENT")
| pp.CaselessKeyword("SIZESTR") | pp.CaselessKeyword("SIZESTR")
| pp.CaselessKeyword("STRUCT") | pp.CaselessKeyword("STRUCT")
| pp.CaselessKeyword("SUBSTR") | pp.CaselessKeyword("SUBSTR")
| pp.CaselessKeyword("SUBTITLE") | pp.CaselessKeyword("SUBTITLE")
#| pp.CaselessKeyword("SWORD") # | pp.CaselessKeyword("SWORD")
#| pp.CaselessKeyword("TBYTE") # | pp.CaselessKeyword("TBYTE")
| pp.CaselessKeyword("TEXTEQU") | pp.CaselessKeyword("TEXTEQU")
| pp.CaselessKeyword("TITLE") | pp.CaselessKeyword("TITLE")
| pp.CaselessKeyword("TYPEDEF") | pp.CaselessKeyword("TYPEDEF")
| pp.CaselessKeyword("UNION") | pp.CaselessKeyword("UNION")
#| pp.CaselessKeyword("WORD") # | pp.CaselessKeyword("WORD")
#| pp.CaselessKeyword("XMMWORD") # | pp.CaselessKeyword("XMMWORD")
#| pp.CaselessKeyword("YMMWORD") # | pp.CaselessKeyword("YMMWORD")
) )
self.directive = pp.Group( self.directive = pp.Group(
pp.Optional(~directive_keywords + directive_identifier) pp.Optional(~directive_keywords + directive_identifier)

View File

@@ -1,7 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Semantics opbject responsible for architecture specific semantic operations""" """Semantics opbject responsible for architecture specific semantic operations"""
from dis import Instruction
import sys import sys
import warnings import warnings
from itertools import chain from itertools import chain

View File

@@ -1,8 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import copy import copy
import os
import signal
import time import time
from itertools import chain from itertools import chain
from multiprocessing import Manager, Process, cpu_count from multiprocessing import Manager, Process, cpu_count

View File

@@ -1,11 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from collections import OrderedDict from collections import OrderedDict
from enum import Enum from enum import Enum
from functools import partial
from osaca.parser import get_parser from osaca.parser import get_parser
from osaca.parser.instruction_form import InstructionForm
from osaca.parser.directive import DirectiveOperand
from osaca.parser.identifier import IdentifierOperand from osaca.parser.identifier import IdentifierOperand
from osaca.parser.immediate import ImmediateOperand from osaca.parser.immediate import ImmediateOperand
from osaca.parser.memory import MemoryOperand from osaca.parser.memory import MemoryOperand
@@ -13,6 +10,7 @@ from osaca.parser.register import RegisterOperand
COMMENT_MARKER = {"start": "OSACA-BEGIN", "end": "OSACA-END"} COMMENT_MARKER = {"start": "OSACA-BEGIN", "end": "OSACA-END"}
# State of marker matching. # State of marker matching.
# No: we have determined that the code doesn't match the marker. # No: we have determined that the code doesn't match the marker.
# Partial: so far the code matches the marker, but we have not reached the end of the marker yet. # Partial: so far the code matches the marker, but we have not reached the end of the marker yet.
@@ -173,6 +171,7 @@ def get_marker(isa, syntax="ATT", comment=""):
return start_marker, end_marker return start_marker, end_marker
def match_line(parser, line, marker_line): def match_line(parser, line, marker_line):
""" """
Returns whether `line` matches `marker_line`. Returns whether `line` matches `marker_line`.
@@ -198,6 +197,7 @@ def match_line(parser, line, marker_line):
else: else:
return Matching.No return Matching.No
def match_operands(line_operands, marker_line_operands): def match_operands(line_operands, marker_line_operands):
if len(line_operands) != len(marker_line_operands): if len(line_operands) != len(marker_line_operands):
return False return False
@@ -207,6 +207,7 @@ def match_operands(line_operands, marker_line_operands):
zip(line_operands, marker_line_operands) zip(line_operands, marker_line_operands)
) )
def match_operand(line_operand, marker_line_operand): def match_operand(line_operand, marker_line_operand):
if ( if (
isinstance(line_operand, ImmediateOperand) isinstance(line_operand, ImmediateOperand)
@@ -221,14 +222,15 @@ def match_operand(line_operand, marker_line_operand):
): ):
return True return True
if ( if (
isinstance(line_operand, MemoryOperand) isinstance(line_operand, MemoryOperand)
and isinstance(marker_line_operand, MemoryOperand) and isinstance(marker_line_operand, MemoryOperand)
and match_operand(line_operand.base, marker_line_operand.base) and match_operand(line_operand.base, marker_line_operand.base)
and match_operand(line_operand.offset, line_operand.offset) and match_operand(line_operand.offset, line_operand.offset)
): ):
return True return True
return False return False
def match_parameters(parser, line_parameters, marker_line_parameters): def match_parameters(parser, line_parameters, marker_line_parameters):
""" """
Returns whether `line_parameters` matches `marker_line_parameters`. Returns whether `line_parameters` matches `marker_line_parameters`.
@@ -238,13 +240,10 @@ def match_parameters(parser, line_parameters, marker_line_parameters):
:return: Matching. In case of partial match, `marker_line_parameters` is modified and should be :return: Matching. In case of partial match, `marker_line_parameters` is modified and should be
reused for matching the next line in the parsed assembly code. reused for matching the next line in the parsed assembly code.
""" """
line_parameter_count = len(line_parameters)
marker_line_parameter_count = len(marker_line_parameters)
# The elements of `marker_line_parameters` are consumed as they are matched. # The elements of `marker_line_parameters` are consumed as they are matched.
for line_parameter in line_parameters: for line_parameter in line_parameters:
if not marker_line_parameters: if not marker_line_parameters:
break; break
marker_line_parameter = marker_line_parameters[0] marker_line_parameter = marker_line_parameters[0]
if not match_parameter(parser, line_parameter, marker_line_parameter): if not match_parameter(parser, line_parameter, marker_line_parameter):
return Matching.No return Matching.No
@@ -254,6 +253,7 @@ def match_parameters(parser, line_parameters, marker_line_parameters):
else: else:
return Matching.Full return Matching.Full
def match_parameter(parser, line_parameter, marker_line_parameter): def match_parameter(parser, line_parameter, marker_line_parameter):
if line_parameter.lower() == marker_line_parameter.lower(): if line_parameter.lower() == marker_line_parameter.lower():
return True return True

View File

@@ -6,8 +6,6 @@ Unit tests for x86 Intel assembly parser
import os import os
import unittest import unittest
from pyparsing import ParseException
from osaca.parser import ParserX86Intel, InstructionForm from osaca.parser import ParserX86Intel, InstructionForm
from osaca.parser.directive import DirectiveOperand from osaca.parser.directive import DirectiveOperand
from osaca.parser.identifier import IdentifierOperand from osaca.parser.identifier import IdentifierOperand
@@ -134,13 +132,13 @@ class TestParserX86Intel(unittest.TestCase):
self.assertEqual(parsed_4.mnemonic, "mov") self.assertEqual(parsed_4.mnemonic, "mov")
self.assertEqual(parsed_4.operands[0], self.assertEqual(parsed_4.operands[0],
RegisterOperand(name="EAX")) RegisterOperand(name="EAX"))
self.assertEqual(parsed_4.operands[1], self.assertEqual(
MemoryOperand(offset=ImmediateOperand( parsed_4.operands[1],
identifier="cur_elements$", MemoryOperand(
value=104 offset=ImmediateOperand(identifier="cur_elements$", value=104),
), base=RegisterOperand(name="RBP")
base=RegisterOperand(name="RBP"))) )
)
self.assertEqual(parsed_5.mnemonic, "mov") self.assertEqual(parsed_5.mnemonic, "mov")
self.assertEqual(parsed_5.operands[0], self.assertEqual(parsed_5.operands[0],
MemoryOperand(offset=ImmediateOperand(value=24), MemoryOperand(offset=ImmediateOperand(value=24),
@@ -252,18 +250,21 @@ class TestParserX86Intel(unittest.TestCase):
self.assertEqual(parsed[0].line_number, 1) self.assertEqual(parsed[0].line_number, 1)
# Check specifically that the values of the symbols defined by "=" were correctly # Check specifically that the values of the symbols defined by "=" were correctly
# propagated. # propagated.
self.assertEqual(parsed[69], self.assertEqual(
InstructionForm(mnemonic="mov", parsed[69],
operands=[MemoryOperand( InstructionForm(
base=RegisterOperand("RBP"), mnemonic="mov",
offset=ImmediateOperand( operands=[
value=4, MemoryOperand(
identifier="r$1" base=RegisterOperand("RBP"),
) offset=ImmediateOperand(value=4, identifier="r$1")
), ),
ImmediateOperand(value=0)], ImmediateOperand(value=0)
line="\tmov\tDWORD PTR r$1[rbp], 0", ],
line_number=73)) line="\tmov\tDWORD PTR r$1[rbp], 0",
line_number=73
)
)
# Check a few lines to make sure that we produced something reasonable. # Check a few lines to make sure that we produced something reasonable.
self.assertEqual(parsed[60], self.assertEqual(parsed[60],
InstructionForm(mnemonic="mov", InstructionForm(mnemonic="mov",

View File

@@ -314,11 +314,11 @@ class TestSemanticTools(unittest.TestCase):
def test_src_dst_assignment_x86_intel(self): def test_src_dst_assignment_x86_intel(self):
for instruction_form in self.kernel_x86_intel: for instruction_form in self.kernel_x86_intel:
with self.subTest(instruction_form=instruction_form): with self.subTest(instruction_form=instruction_form):
if instruction_form.semantic_operands is not None: if instruction_form.semantic_operands is not None:
self.assertTrue("source" in instruction_form.semantic_operands) self.assertTrue("source" in instruction_form.semantic_operands)
self.assertTrue("destination" in instruction_form.semantic_operands) self.assertTrue("destination" in instruction_form.semantic_operands)
self.assertTrue("src_dst" in instruction_form.semantic_operands) self.assertTrue("src_dst" in instruction_form.semantic_operands)
def test_src_dst_assignment_AArch64(self): def test_src_dst_assignment_AArch64(self):
for instruction_form in self.kernel_AArch64: for instruction_form in self.kernel_AArch64: