diff --git a/osaca/parser/parser_x86att.py b/osaca/parser/parser_x86att.py index 99cc7a6..b263fbd 100755 --- a/osaca/parser/parser_x86att.py +++ b/osaca/parser/parser_x86att.py @@ -21,7 +21,8 @@ class InstructionForm: OPERANDS_ID = "operands" def __init__(self, INSTRUCTION_ID = None, OPERANDS_ID = [], DIRECTIVE_ID = None - , COMMENT_ID = None, LABEL_ID = None, LINE = None, LINE_NUMBER = None): + , COMMENT_ID = None, LABEL_ID = None, LINE = None, LINE_NUMBER = None + , SEMANTIC_OPERANDS = None): self._INSTRUCTION_ID = INSTRUCTION_ID self._OPERANDS_ID = OPERANDS_ID self._DIRECTIVE_ID = DIRECTIVE_ID @@ -29,6 +30,11 @@ class InstructionForm: self._LABEL_ID = LABEL_ID self._LINE = LINE self._LINE_NUMBER = LINE_NUMBER + self._SEMANTIC_OPERANDS = SEMANTIC_OPERANDS + + @property + def semantic_operands(self): + return self._SEMANTIC_OPERANDS @property def instruction(self): @@ -58,6 +64,10 @@ class InstructionForm: def operands(self): return self._OPERANDS_ID + @semantic_operands.setter + def semantic_operands(self, semantic_operands): + self._SEMANTIC_OPERANDS = semantic_operands + @directive.setter def directive(self, directive): self._DIRECTIVE_ID = directive @@ -86,6 +96,55 @@ class InstructionForm: def comment(self, comment): self._COMMENT_ID =comment +class OperandForm: + def __init__(self, MEMORY_ID = None, IMMEDIATE_ID = None, DIRECTIVE_ID = None, LABEL_ID = None + , COMMENT_ID = None): + self._MEMORY_ID = MEMORY_ID + self._IMMEDIATE_ID = IMMEDIATE_ID + self._DIRECTIVE_ID = DIRECTIVE_ID + self._LABEL_ID = LABEL_ID + self._COMMENT_ID = COMMENT_ID + + @property + def memory(self): + return self._MEMORY_ID + + @property + def immediate(self): + return self._IMMEDIATE_ID + + @property + def directive(self): + return self._DIRECTIVE_ID + + @property + def label(self): + return self._LABEL_ID + + @property + def comment(self): + return self._COMMENT_ID + + @memory.setter + def memory(self, memory): + self._MEMORY_ID = memory + + @immediate.setter + def immediate(self, immediate): + self._IMMEDIATE_ID = immediate + + @directive.setter + def directive(self, directive): + self._DIRECTIVE_ID = directive + + @label.setter + def label(self, label): + self._LABEL_ID = label + + @comment.setter + def comment(self, comment): + self._COMMENT_ID = comment + class DirectiveForm: def __init__(self, NAME_ID = None, PARAMETER_ID = None, COMMENT_ID = None): self._NAME_ID = NAME_ID @@ -124,6 +183,85 @@ class DirectiveForm: def comment(self, comment): self._COMMENT_ID = comment +class MemoryForm: + def __init__(self, OFFSET_ID = None, BASE_ID = None, INDEX_ID = None + , SCALE_ID = None, SEGMENT_EXT_ID = None): + self._OFFSET_ID = OFFSET_ID + self._BASE_ID = BASE_ID + self._INDEX_ID = INDEX_ID + self._SCALE_ID = SCALE_ID + self._SEGMENT_EXT_ID = SEGMENT_EXT_ID + + @property + def offset(self): + return self._OFFSET_ID + + @property + def base(self): + return self._BASE_ID + + @property + def index(self): + return self._INDEX_ID + + @property + def scale(self): + return self._SCALE_ID + + @property + def segment_ext_id(self): + return self._SEGMENT_EXT_ID + + @segment_ext_id.setter + def segment_ext_id(self, segment): + self._SEGMENT_EXT_ID= segment + + @offset.setter + def offset(self, offset): + self._OFFSET_ID = offset + + @base.setter + def base(self, base): + self._BASE_ID = base + + @index.setter + def index(self, index): + self._INDEX_ID = index + + @scale.setter + def scale(self, scale): + self._SCALE_ID = scale + +class LabelForm: + def __init__(self, NAME_ID = None, COMMENT_ID = None): + self._NAME_ID = NAME_ID + self._COMMENT_ID = COMMENT_ID + + @property + def name(self): + return self._NAME_ID + + @name.setter + def name(self, name): + self._NAME_ID = name + + @property + def comment(self): + return self._COMMENT_ID + + @comment.setter + def comment(self, comment): + self._COMMENT_ID = comment + + def __iter__(self): + return self + + def __next__(self): + if not self._COMMENT_ID: + raise StopIteration + return self._COMMENT_ID.pop(0) + + class ParserX86ATT(BaseParser): _instance = None @@ -329,10 +467,10 @@ class ParserX86ATT(BaseParser): if result is None: try: result = self.process_operand(self.label.parseString(line, parseAll=True).asDict()) - instruction_form.label = result[self.LABEL_ID]["name"] - if self.COMMENT_ID in result[self.LABEL_ID]: + instruction_form.label = result.label.name + if self.COMMENT_ID in result.label: instruction_form.comment = " ".join( - result[self.LABEL_ID][self.COMMENT_ID] + result.label.comment ) except pp.ParseException: pass @@ -343,10 +481,10 @@ class ParserX86ATT(BaseParser): result = self.process_operand( self.directive.parseString(line, parseAll=True).asDict() ) - instruction_form.directive = { - "name": result.directive.name, - "parameters": result.directive.parameters, - } + instruction_form.directive = DirectiveForm( + NAME_ID = result.directive.name, + PARAMETER_ID = result.directive.parameters, + ) if self.COMMENT_ID in result.directive: instruction_form.comment = " ".join( @@ -421,7 +559,7 @@ class ParserX86ATT(BaseParser): directive_new.parameters = directive["parameters"] if "comment" in directive: directive_new.comment = directive["comment"] - return InstructionForm(DIRECTIVE_ID = directive_new) + return OperandForm(DIRECTIVE_ID = directive_new) def process_memory_address(self, memory_address): """Post-process memory address operand""" @@ -437,17 +575,18 @@ class ParserX86ATT(BaseParser): offset = {"value": offset} elif offset is not None and "value" in offset: offset["value"] = int(offset["value"], 0) - new_dict = AttrDict({"offset": offset, "base": base, "index": index, "scale": scale}) + new_dict = MemoryForm(OFFSET_ID = offset, BASE_ID = base, INDEX_ID = index, SCALE_ID = scale) # Add segmentation extension if existing if self.SEGMENT_EXT_ID in memory_address: - new_dict[self.SEGMENT_EXT_ID] = memory_address[self.SEGMENT_EXT_ID] - return AttrDict({self.MEMORY_ID: new_dict}) + new_dict.segment_ext_id = memory_address[self.SEGMENT_EXT_ID] + return OperandForm(MEMORY_ID = new_dict) def process_label(self, label): """Post-process label asm line""" # remove duplicated 'name' level due to identifier label["name"] = label["name"][0]["name"] - return AttrDict({self.LABEL_ID: label}) + new_label = LabelForm(NAME_ID = label["name"], COMMENT_ID = label["comment"] if "comment" in label else None) + return OperandForm(LABEL_ID = new_label) def process_immediate(self, immediate): """Post-process immediate operand""" @@ -456,7 +595,7 @@ class ParserX86ATT(BaseParser): return immediate # otherwise just make sure the immediate is a decimal immediate["value"] = int(immediate["value"], 0) - return AttrDict({self.IMMEDIATE_ID: immediate}) + return OperandForm(IMMEDIATE_ID = immediate) def get_full_reg_name(self, register): """Return one register name string including all attributes""" diff --git a/osaca/semantics/isa_semantics.py b/osaca/semantics/isa_semantics.py index 1c26818..ae2fa6c 100755 --- a/osaca/semantics/isa_semantics.py +++ b/osaca/semantics/isa_semantics.py @@ -6,6 +6,36 @@ from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT from .hw_model import MachineModel +class SemanticForm: + def __init__(self, SOURCE_ID = [], DESTINATION_ID = [], SRC_DST = []): + self._SOURCE_ID = SOURCE_ID + self._DESTINATION_ID = DESTINATION_ID + self._SRC_DST = SRC_DST + + @property + def source(self): + return self._SOURCE_ID + + @source.setter + def source(self, source): + self._SOURCE_ID = source + + @property + def destination(self): + return self._DESTINATION_ID + + @destination.setter + def destination(self, destination): + self._DESTINATION_ID = destination + + @property + def src_dst(self): + return self._SRC_DST + + @src_dst.setter + def src_dst(self, src_dst): + self._SRC_DST = src_dst + class INSTR_FLAGS: """ @@ -45,32 +75,32 @@ class ISASemantics(object): def assign_src_dst(self, instruction_form): """Update instruction form dictionary with source, destination and flag information.""" # if the instruction form doesn't have operands or is None, there's nothing to do - if instruction_form["operands"] is None or instruction_form["instruction"] is None: - instruction_form["semantic_operands"] = AttrDict( - {"source": [], "destination": [], "src_dst": []} + if instruction_form.operands is None or instruction_form.instruction is None: + instruction_form.semantic_operands = SemanticForm( + SOURCE_ID = [], DESTINATION_ID = [], SRC_DST = [] ) return # check if instruction form is in ISA yaml, otherwise apply standard operand assignment # (one dest, others source) isa_data = self._isa_model.get_instruction( - instruction_form["instruction"], instruction_form["operands"] + instruction_form.instruction, instruction_form.operands ) if ( isa_data is None and self._isa == "x86" - and instruction_form["instruction"][-1] in self.GAS_SUFFIXES + and instruction_form.instruction[-1] in self.GAS_SUFFIXES ): # Check for instruction without GAS suffix isa_data = self._isa_model.get_instruction( - instruction_form["instruction"][:-1], instruction_form["operands"] + instruction_form.instruction[:-1], instruction_form.operands ) - if isa_data is None and self._isa == "aarch64" and "." in instruction_form["instruction"]: + if isa_data is None and self._isa == "aarch64" and "." in instruction_form.instruction: # Check for instruction without shape/cc suffix - suffix_start = instruction_form["instruction"].index(".") + suffix_start = instruction_form.instruction.index(".") isa_data = self._isa_model.get_instruction( - instruction_form["instruction"][:suffix_start], instruction_form["operands"] + instruction_form.instruction[:suffix_start], instruction_form.operands ) - operands = instruction_form["operands"] + operands = instruction_form.operands op_dict = {} assign_default = False if isa_data: @@ -81,28 +111,28 @@ class ISASemantics(object): assign_default = True # check for equivalent register-operands DB entry if LD/ST if any(["memory" in op for op in operands]): - operands_reg = self.substitute_mem_address(instruction_form["operands"]) + operands_reg = self.substitute_mem_address(instruction_form.operands) isa_data_reg = self._isa_model.get_instruction( - instruction_form["instruction"], operands_reg + instruction_form.instruction, operands_reg ) if ( isa_data_reg is None and self._isa == "x86" - and instruction_form["instruction"][-1] in self.GAS_SUFFIXES + and instruction_form.instruction[-1] in self.GAS_SUFFIXES ): # Check for instruction without GAS suffix isa_data_reg = self._isa_model.get_instruction( - instruction_form["instruction"][:-1], operands_reg + instruction_form.instruction[:-1], operands_reg ) if ( isa_data_reg is None and self._isa == "aarch64" - and "." in instruction_form["instruction"] + and "." in instruction_form.instruction ): # Check for instruction without shape/cc suffix - suffix_start = instruction_form["instruction"].index(".") + suffix_start = instruction_form.instruction.index(".") isa_data_reg = self._isa_model.get_instruction( - instruction_form["instruction"][:suffix_start], operands_reg + instruction_form.instruction[:suffix_start], operands_reg ) if isa_data_reg: assign_default = False @@ -149,7 +179,7 @@ class ISASemantics(object): ) ) # store operand list in dict and reassign operand key/value pair - instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict) + instruction_form.semantic_operands = AttrDict.convert_dict(op_dict) # assign LD/ST flags instruction_form["flags"] = ( instruction_form["flags"] if "flags" in instruction_form else [] @@ -177,22 +207,22 @@ class ISASemantics(object): if "register" in op ] isa_data = self._isa_model.get_instruction( - instruction_form["instruction"], instruction_form["operands"] + instruction_form.instruction, instruction_form.operands ) if ( isa_data is None and self._isa == "x86" - and instruction_form["instruction"][-1] in self.GAS_SUFFIXES + and instruction_form.instruction[-1] in self.GAS_SUFFIXES ): # Check for instruction without GAS suffix isa_data = self._isa_model.get_instruction( - instruction_form["instruction"][:-1], instruction_form["operands"] + instruction_form.instruction[:-1], instruction_form.operands ) - if isa_data is None and self._isa == "aarch64" and "." in instruction_form["instruction"]: + if isa_data is None and self._isa == "aarch64" and "." in instruction_form.instruction: # Check for instruction without shape/cc suffix - suffix_start = instruction_form["instruction"].index(".") + suffix_start = instruction_form.instruction.index(".") isa_data = self._isa_model.get_instruction( - instruction_form["instruction"][:suffix_start], instruction_form["operands"] + instruction_form.instruction[:suffix_start], instruction_form.operands ) if only_postindexed: @@ -308,8 +338,8 @@ class ISASemantics(object): def _has_load(self, instruction_form): """Check if instruction form performs a LOAD""" for operand in chain( - instruction_form["semantic_operands"]["source"], - instruction_form["semantic_operands"]["src_dst"], + instruction_form.semantic_operands.source, + instruction_form.semantic_operands.src_dst, ): if "memory" in operand: return True @@ -318,8 +348,8 @@ class ISASemantics(object): def _has_store(self, instruction_form): """Check if instruction form perfroms a STORE""" for operand in chain( - instruction_form["semantic_operands"]["destination"], - instruction_form["semantic_operands"]["src_dst"], + instruction_form.semantic_operands.destination, + instruction_form.semantic_operands.src_dst, ): if "memory" in operand: return True @@ -328,27 +358,27 @@ class ISASemantics(object): def _get_regular_source_operands(self, instruction_form): """Get source operand of given instruction form assuming regular src/dst behavior.""" # if there is only one operand, assume it is a source operand - if len(instruction_form["operands"]) == 1: - return [instruction_form["operands"][0]] + if len(instruction_form.operands) == 1: + return [instruction_form.operands[0]] if self._isa == "x86": # return all but last operand - return [op for op in instruction_form["operands"][0:-1]] + return [op for op in instruction_form.operands[0:-1]] elif self._isa == "aarch64": - return [op for op in instruction_form["operands"][1:]] + return [op for op in instruction_form.operands[1:]] else: raise ValueError("Unsupported ISA {}.".format(self._isa)) def _get_regular_destination_operands(self, instruction_form): """Get destination operand of given instruction form assuming regular src/dst behavior.""" # if there is only one operand, assume no destination - if len(instruction_form["operands"]) == 1: + if len(instruction_form.operands) == 1: return [] if self._isa == "x86": # return last operand - return instruction_form["operands"][-1:] + return instruction_form.operands[-1:] if self._isa == "aarch64": # return first operand - return instruction_form["operands"][:1] + return instruction_form.operands[:1] else: raise ValueError("Unsupported ISA {}.".format(self._isa)) diff --git a/tests/test_files/kernel_x86.s.copy.s b/tests/test_files/kernel_x86.s.copy.s new file mode 100644 index 0000000..ca7af30 --- /dev/null +++ b/tests/test_files/kernel_x86.s.copy.s @@ -0,0 +1,11 @@ +# OSACA-BEGIN +.L10: + vmovapd (%r15,%rax), %ymm0 + vmovapd (%r12,%rax), %ymm3 + addl $1, %ecx + vfmadd132pd 0(%r13,%rax), %ymm3, %ymm0 + vmovapd %ymm0, (%r14,%rax) + addq $32, %rax + cmpl %ecx, %r10d + ja .L10 +# OSACA-END diff --git a/tests/test_parser_x86att.py b/tests/test_parser_x86att.py index 5e56aa1..6735a02 100755 --- a/tests/test_parser_x86att.py +++ b/tests/test_parser_x86att.py @@ -311,14 +311,10 @@ class TestParserX86ATT(unittest.TestCase): ) def _get_label(self, parser, label): - return AttrDict.convert_dict( - parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()) - ).label + return parser.process_operand(parser.label.parseString(label, parseAll=True).asDict()).label def _get_directive(self, parser, directive): - return AttrDict.convert_dict( - parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()) - ).directive + return parser.process_operand(parser.directive.parseString(directive, parseAll=True).asDict()).directive @staticmethod def _find_file(name):