Classes for OperandForm and Operand types

This commit is contained in:
stefan.desouza@outlook.com
2023-08-07 15:01:48 +02:00
parent 9a0474bcc1
commit 1eb692c86f
4 changed files with 231 additions and 55 deletions

View File

@@ -21,7 +21,8 @@ class InstructionForm:
OPERANDS_ID = "operands"
def __init__(self, INSTRUCTION_ID = None, OPERANDS_ID = [], DIRECTIVE_ID = None
, COMMENT_ID = None, LABEL_ID = None, LINE = None, LINE_NUMBER = None):
, COMMENT_ID = None, LABEL_ID = None, LINE = None, LINE_NUMBER = None
, SEMANTIC_OPERANDS = None):
self._INSTRUCTION_ID = INSTRUCTION_ID
self._OPERANDS_ID = OPERANDS_ID
self._DIRECTIVE_ID = DIRECTIVE_ID
@@ -29,6 +30,11 @@ class InstructionForm:
self._LABEL_ID = LABEL_ID
self._LINE = LINE
self._LINE_NUMBER = LINE_NUMBER
self._SEMANTIC_OPERANDS = SEMANTIC_OPERANDS
@property
def semantic_operands(self):
return self._SEMANTIC_OPERANDS
@property
def instruction(self):
@@ -58,6 +64,10 @@ class InstructionForm:
def operands(self):
return self._OPERANDS_ID
@semantic_operands.setter
def semantic_operands(self, semantic_operands):
self._SEMANTIC_OPERANDS = semantic_operands
@directive.setter
def directive(self, directive):
self._DIRECTIVE_ID = directive
@@ -86,6 +96,55 @@ class InstructionForm:
def comment(self, comment):
self._COMMENT_ID =comment
class OperandForm:
def __init__(self, MEMORY_ID = None, IMMEDIATE_ID = None, DIRECTIVE_ID = None, LABEL_ID = None
, COMMENT_ID = None):
self._MEMORY_ID = MEMORY_ID
self._IMMEDIATE_ID = IMMEDIATE_ID
self._DIRECTIVE_ID = DIRECTIVE_ID
self._LABEL_ID = LABEL_ID
self._COMMENT_ID = COMMENT_ID
@property
def memory(self):
return self._MEMORY_ID
@property
def immediate(self):
return self._IMMEDIATE_ID
@property
def directive(self):
return self._DIRECTIVE_ID
@property
def label(self):
return self._LABEL_ID
@property
def comment(self):
return self._COMMENT_ID
@memory.setter
def memory(self, memory):
self._MEMORY_ID = memory
@immediate.setter
def immediate(self, immediate):
self._IMMEDIATE_ID = immediate
@directive.setter
def directive(self, directive):
self._DIRECTIVE_ID = directive
@label.setter
def label(self, label):
self._LABEL_ID = label
@comment.setter
def comment(self, comment):
self._COMMENT_ID = comment
class DirectiveForm:
def __init__(self, NAME_ID = None, PARAMETER_ID = None, COMMENT_ID = None):
self._NAME_ID = NAME_ID
@@ -124,6 +183,85 @@ class DirectiveForm:
def comment(self, comment):
self._COMMENT_ID = comment
class MemoryForm:
def __init__(self, OFFSET_ID = None, BASE_ID = None, INDEX_ID = None
, SCALE_ID = None, SEGMENT_EXT_ID = None):
self._OFFSET_ID = OFFSET_ID
self._BASE_ID = BASE_ID
self._INDEX_ID = INDEX_ID
self._SCALE_ID = SCALE_ID
self._SEGMENT_EXT_ID = SEGMENT_EXT_ID
@property
def offset(self):
return self._OFFSET_ID
@property
def base(self):
return self._BASE_ID
@property
def index(self):
return self._INDEX_ID
@property
def scale(self):
return self._SCALE_ID
@property
def segment_ext_id(self):
return self._SEGMENT_EXT_ID
@segment_ext_id.setter
def segment_ext_id(self, segment):
self._SEGMENT_EXT_ID= segment
@offset.setter
def offset(self, offset):
self._OFFSET_ID = offset
@base.setter
def base(self, base):
self._BASE_ID = base
@index.setter
def index(self, index):
self._INDEX_ID = index
@scale.setter
def scale(self, scale):
self._SCALE_ID = scale
class LabelForm:
def __init__(self, NAME_ID = None, COMMENT_ID = None):
self._NAME_ID = NAME_ID
self._COMMENT_ID = COMMENT_ID
@property
def name(self):
return self._NAME_ID
@name.setter
def name(self, name):
self._NAME_ID = name
@property
def comment(self):
return self._COMMENT_ID
@comment.setter
def comment(self, comment):
self._COMMENT_ID = comment
def __iter__(self):
return self
def __next__(self):
if not self._COMMENT_ID:
raise StopIteration
return self._COMMENT_ID.pop(0)
class ParserX86ATT(BaseParser):
_instance = None
@@ -329,10 +467,10 @@ class ParserX86ATT(BaseParser):
if result is None:
try:
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
instruction_form.label = result[self.LABEL_ID]["name"]
if self.COMMENT_ID in result[self.LABEL_ID]:
instruction_form.label = result.label.name
if self.COMMENT_ID in result.label:
instruction_form.comment = " ".join(
result[self.LABEL_ID][self.COMMENT_ID]
result.label.comment
)
except pp.ParseException:
pass
@@ -343,10 +481,10 @@ class ParserX86ATT(BaseParser):
result = self.process_operand(
self.directive.parseString(line, parseAll=True).asDict()
)
instruction_form.directive = {
"name": result.directive.name,
"parameters": result.directive.parameters,
}
instruction_form.directive = DirectiveForm(
NAME_ID = result.directive.name,
PARAMETER_ID = result.directive.parameters,
)
if self.COMMENT_ID in result.directive:
instruction_form.comment = " ".join(
@@ -421,7 +559,7 @@ class ParserX86ATT(BaseParser):
directive_new.parameters = directive["parameters"]
if "comment" in directive:
directive_new.comment = directive["comment"]
return InstructionForm(DIRECTIVE_ID = directive_new)
return OperandForm(DIRECTIVE_ID = directive_new)
def process_memory_address(self, memory_address):
"""Post-process memory address operand"""
@@ -437,17 +575,18 @@ class ParserX86ATT(BaseParser):
offset = {"value": offset}
elif offset is not None and "value" in offset:
offset["value"] = int(offset["value"], 0)
new_dict = AttrDict({"offset": offset, "base": base, "index": index, "scale": scale})
new_dict = MemoryForm(OFFSET_ID = offset, BASE_ID = base, INDEX_ID = index, SCALE_ID = scale)
# Add segmentation extension if existing
if self.SEGMENT_EXT_ID in memory_address:
new_dict[self.SEGMENT_EXT_ID] = memory_address[self.SEGMENT_EXT_ID]
return AttrDict({self.MEMORY_ID: new_dict})
new_dict.segment_ext_id = memory_address[self.SEGMENT_EXT_ID]
return OperandForm(MEMORY_ID = new_dict)
def process_label(self, label):
"""Post-process label asm line"""
# remove duplicated 'name' level due to identifier
label["name"] = label["name"][0]["name"]
return AttrDict({self.LABEL_ID: label})
new_label = LabelForm(NAME_ID = label["name"], COMMENT_ID = label["comment"] if "comment" in label else None)
return OperandForm(LABEL_ID = new_label)
def process_immediate(self, immediate):
"""Post-process immediate operand"""
@@ -456,7 +595,7 @@ class ParserX86ATT(BaseParser):
return immediate
# otherwise just make sure the immediate is a decimal
immediate["value"] = int(immediate["value"], 0)
return AttrDict({self.IMMEDIATE_ID: immediate})
return OperandForm(IMMEDIATE_ID = immediate)
def get_full_reg_name(self, register):
"""Return one register name string including all attributes"""

View File

@@ -6,6 +6,36 @@ from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
from .hw_model import MachineModel
class SemanticForm:
def __init__(self, SOURCE_ID = [], DESTINATION_ID = [], SRC_DST = []):
self._SOURCE_ID = SOURCE_ID
self._DESTINATION_ID = DESTINATION_ID
self._SRC_DST = SRC_DST
@property
def source(self):
return self._SOURCE_ID
@source.setter
def source(self, source):
self._SOURCE_ID = source
@property
def destination(self):
return self._DESTINATION_ID
@destination.setter
def destination(self, destination):
self._DESTINATION_ID = destination
@property
def src_dst(self):
return self._SRC_DST
@src_dst.setter
def src_dst(self, src_dst):
self._SRC_DST = src_dst
class INSTR_FLAGS:
"""
@@ -45,32 +75,32 @@ class ISASemantics(object):
def assign_src_dst(self, instruction_form):
"""Update instruction form dictionary with source, destination and flag information."""
# if the instruction form doesn't have operands or is None, there's nothing to do
if instruction_form["operands"] is None or instruction_form["instruction"] is None:
instruction_form["semantic_operands"] = AttrDict(
{"source": [], "destination": [], "src_dst": []}
if instruction_form.operands is None or instruction_form.instruction is None:
instruction_form.semantic_operands = SemanticForm(
SOURCE_ID = [], DESTINATION_ID = [], SRC_DST = []
)
return
# check if instruction form is in ISA yaml, otherwise apply standard operand assignment
# (one dest, others source)
isa_data = self._isa_model.get_instruction(
instruction_form["instruction"], instruction_form["operands"]
instruction_form.instruction, instruction_form.operands
)
if (
isa_data is None
and self._isa == "x86"
and instruction_form["instruction"][-1] in self.GAS_SUFFIXES
and instruction_form.instruction[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data = self._isa_model.get_instruction(
instruction_form["instruction"][:-1], instruction_form["operands"]
instruction_form.instruction[:-1], instruction_form.operands
)
if isa_data is None and self._isa == "aarch64" and "." in instruction_form["instruction"]:
if isa_data is None and self._isa == "aarch64" and "." in instruction_form.instruction:
# Check for instruction without shape/cc suffix
suffix_start = instruction_form["instruction"].index(".")
suffix_start = instruction_form.instruction.index(".")
isa_data = self._isa_model.get_instruction(
instruction_form["instruction"][:suffix_start], instruction_form["operands"]
instruction_form.instruction[:suffix_start], instruction_form.operands
)
operands = instruction_form["operands"]
operands = instruction_form.operands
op_dict = {}
assign_default = False
if isa_data:
@@ -81,28 +111,28 @@ class ISASemantics(object):
assign_default = True
# check for equivalent register-operands DB entry if LD/ST
if any(["memory" in op for op in operands]):
operands_reg = self.substitute_mem_address(instruction_form["operands"])
operands_reg = self.substitute_mem_address(instruction_form.operands)
isa_data_reg = self._isa_model.get_instruction(
instruction_form["instruction"], operands_reg
instruction_form.instruction, operands_reg
)
if (
isa_data_reg is None
and self._isa == "x86"
and instruction_form["instruction"][-1] in self.GAS_SUFFIXES
and instruction_form.instruction[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data_reg = self._isa_model.get_instruction(
instruction_form["instruction"][:-1], operands_reg
instruction_form.instruction[:-1], operands_reg
)
if (
isa_data_reg is None
and self._isa == "aarch64"
and "." in instruction_form["instruction"]
and "." in instruction_form.instruction
):
# Check for instruction without shape/cc suffix
suffix_start = instruction_form["instruction"].index(".")
suffix_start = instruction_form.instruction.index(".")
isa_data_reg = self._isa_model.get_instruction(
instruction_form["instruction"][:suffix_start], operands_reg
instruction_form.instruction[:suffix_start], operands_reg
)
if isa_data_reg:
assign_default = False
@@ -149,7 +179,7 @@ class ISASemantics(object):
)
)
# store operand list in dict and reassign operand key/value pair
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
instruction_form.semantic_operands = AttrDict.convert_dict(op_dict)
# assign LD/ST flags
instruction_form["flags"] = (
instruction_form["flags"] if "flags" in instruction_form else []
@@ -177,22 +207,22 @@ class ISASemantics(object):
if "register" in op
]
isa_data = self._isa_model.get_instruction(
instruction_form["instruction"], instruction_form["operands"]
instruction_form.instruction, instruction_form.operands
)
if (
isa_data is None
and self._isa == "x86"
and instruction_form["instruction"][-1] in self.GAS_SUFFIXES
and instruction_form.instruction[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data = self._isa_model.get_instruction(
instruction_form["instruction"][:-1], instruction_form["operands"]
instruction_form.instruction[:-1], instruction_form.operands
)
if isa_data is None and self._isa == "aarch64" and "." in instruction_form["instruction"]:
if isa_data is None and self._isa == "aarch64" and "." in instruction_form.instruction:
# Check for instruction without shape/cc suffix
suffix_start = instruction_form["instruction"].index(".")
suffix_start = instruction_form.instruction.index(".")
isa_data = self._isa_model.get_instruction(
instruction_form["instruction"][:suffix_start], instruction_form["operands"]
instruction_form.instruction[:suffix_start], instruction_form.operands
)
if only_postindexed:
@@ -308,8 +338,8 @@ class ISASemantics(object):
def _has_load(self, instruction_form):
"""Check if instruction form performs a LOAD"""
for operand in chain(
instruction_form["semantic_operands"]["source"],
instruction_form["semantic_operands"]["src_dst"],
instruction_form.semantic_operands.source,
instruction_form.semantic_operands.src_dst,
):
if "memory" in operand:
return True
@@ -318,8 +348,8 @@ class ISASemantics(object):
def _has_store(self, instruction_form):
"""Check if instruction form perfroms a STORE"""
for operand in chain(
instruction_form["semantic_operands"]["destination"],
instruction_form["semantic_operands"]["src_dst"],
instruction_form.semantic_operands.destination,
instruction_form.semantic_operands.src_dst,
):
if "memory" in operand:
return True
@@ -328,27 +358,27 @@ class ISASemantics(object):
def _get_regular_source_operands(self, instruction_form):
"""Get source operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume it is a source operand
if len(instruction_form["operands"]) == 1:
return [instruction_form["operands"][0]]
if len(instruction_form.operands) == 1:
return [instruction_form.operands[0]]
if self._isa == "x86":
# return all but last operand
return [op for op in instruction_form["operands"][0:-1]]
return [op for op in instruction_form.operands[0:-1]]
elif self._isa == "aarch64":
return [op for op in instruction_form["operands"][1:]]
return [op for op in instruction_form.operands[1:]]
else:
raise ValueError("Unsupported ISA {}.".format(self._isa))
def _get_regular_destination_operands(self, instruction_form):
"""Get destination operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume no destination
if len(instruction_form["operands"]) == 1:
if len(instruction_form.operands) == 1:
return []
if self._isa == "x86":
# return last operand
return instruction_form["operands"][-1:]
return instruction_form.operands[-1:]
if self._isa == "aarch64":
# return first operand
return instruction_form["operands"][:1]
return instruction_form.operands[:1]
else:
raise ValueError("Unsupported ISA {}.".format(self._isa))

View File

@@ -0,0 +1,11 @@
# OSACA-BEGIN
.L10:
vmovapd (%r15,%rax), %ymm0
vmovapd (%r12,%rax), %ymm3
addl $1, %ecx
vfmadd132pd 0(%r13,%rax), %ymm3, %ymm0
vmovapd %ymm0, (%r14,%rax)
addq $32, %rax
cmpl %ecx, %r10d
ja .L10
# OSACA-END

View File

@@ -311,14 +311,10 @@ class TestParserX86ATT(unittest.TestCase):
)
def _get_label(self, parser, label):
return AttrDict.convert_dict(
parser.process_operand(parser.label.parseString(label, parseAll=True).asDict())
).label
return parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()).label
def _get_directive(self, parser, directive):
return AttrDict.convert_dict(
parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict())
).directive
return parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()).directive
@staticmethod
def _find_file(name):