Files
OSACA/osaca/semantics/isa_semantics.py
2023-09-11 18:23:57 +02:00

352 lines
15 KiB
Python

#!/usr/bin/env python3
from itertools import chain
from osaca import utils
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
from osaca.parser.memory import MemoryOperand
from osaca.parser.register import RegisterOperand
from .hw_model import MachineModel
class INSTR_FLAGS:
"""
Flags used for unknown or special instructions
"""
LD = "is_load_instruction"
TP_UNKWN = "tp_unknown"
LT_UNKWN = "lt_unknown"
NOT_BOUND = "not_bound"
HIDDEN_LD = "hidden_load"
HAS_LD = "performs_load"
HAS_ST = "performs_store"
class ISASemantics(object):
GAS_SUFFIXES = "bswlqt"
def __init__(self, isa, path_to_yaml=None):
self._isa = isa.lower()
path = path_to_yaml or utils.find_datafile("isa/" + self._isa + ".yml")
self._isa_model = MachineModel(path_to_yaml=path)
if self._isa == "x86":
self._parser = ParserX86ATT()
elif self._isa == "aarch64":
self._parser = ParserAArch64()
def process(self, instruction_forms):
"""Process a list of instruction forms."""
for i in instruction_forms:
self.assign_src_dst(i)
# get ;parser result and assign operands to
# - source
# - destination
# - source/destination
def assign_src_dst(self, instruction_form):
"""Update instruction form dictionary with source, destination and flag information."""
# if the instruction form doesn't have operands or is None, there's nothing to do
if instruction_form.operands is None or instruction_form.instruction is None:
instruction_form.semantic_operands = {"source": [], "destination": [], "src_dst": []}
return
# check if instruction form is in ISA yaml, otherwise apply standard operand assignment
# (one dest, others source)
isa_data = self._isa_model.get_instruction(
instruction_form.instruction, instruction_form.operands
)
if (
isa_data is None
and self._isa == "x86"
and instruction_form.instruction[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data = self._isa_model.get_instruction(
instruction_form.instruction[:-1], instruction_form.operands
)
if isa_data is None and self._isa == "aarch64" and "." in instruction_form.instruction:
# Check for instruction without shape/cc suffix
suffix_start = instruction_form.instruction.index(".")
isa_data = self._isa_model.get_instruction(
instruction_form.instruction[:suffix_start], instruction_form.operands
)
operands = instruction_form.operands
op_dict = {}
assign_default = False
if isa_data:
# load src/dst structure from isa_data
op_dict = self._apply_found_ISA_data(isa_data, operands)
else:
# Couldn't found instruction form in ISA DB
assign_default = True
# check for equivalent register-operands DB entry if LD/ST
if any([isinstance(op, MemoryOperand) for op in operands]):
operands_reg = self.substitute_mem_address(instruction_form.operands)
isa_data_reg = self._isa_model.get_instruction(
instruction_form.instruction, operands_reg
)
if (
isa_data_reg is None
and self._isa == "x86"
and instruction_form.instruction[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data_reg = self._isa_model.get_instruction(
instruction_form.instruction[:-1], operands_reg
)
if (
isa_data_reg is None
and self._isa == "aarch64"
and "." in instruction_form.instruction
):
# Check for instruction without shape/cc suffix
suffix_start = instruction_form.instruction.index(".")
isa_data_reg = self._isa_model.get_instruction(
instruction_form.instruction[:suffix_start], operands_reg
)
if isa_data_reg:
assign_default = False
op_dict = self._apply_found_ISA_data(isa_data_reg, operands)
if assign_default:
# no irregular operand structure, apply default
op_dict["source"] = self._get_regular_source_operands(instruction_form)
op_dict["destination"] = self._get_regular_destination_operands(instruction_form)
op_dict["src_dst"] = []
# post-process pre- and post-indexing for aarch64 memory operands
if self._isa == "aarch64":
for operand in [op for op in op_dict["source"] if isinstance(op, MemoryOperand)]:
post_indexed = operand.post_indexed
pre_indexed = operand.pre_indexed
if post_indexed or pre_indexed:
op_dict["src_dst"].append(
{
"register": operand.base,
"pre_indexed": pre_indexed,
"post_indexed": post_indexed,
}
)
for operand in [op for op in op_dict["destination"] if isinstance(op, MemoryOperand)]:
post_indexed = operand.post_indexed
pre_indexed = operand.pre_indexed
if post_indexed or pre_indexed:
op_dict["src_dst"].append(
{
"register": operand.base,
"pre_indexed": pre_indexed,
"post_indexed": post_indexed,
}
)
# store operand list in dict and reassign operand key/value pair
instruction_form.semantic_operands = op_dict
# assign LD/ST flags
# instruction_form.flags = (
# instruction_form.flags if "flags" in instruction_form else []
# )
if self._has_load(instruction_form):
instruction_form.flags += [INSTR_FLAGS.HAS_LD]
if self._has_store(instruction_form):
instruction_form.flags += [INSTR_FLAGS.HAS_ST]
def get_reg_changes(self, instruction_form, only_postindexed=False):
"""
Returns register changes, as dict, for insruction_form, based on operation defined in isa.
Empty dict if no changes of registers occured. None for registers with unknown changes.
If only_postindexed is True, only considers changes due to post_indexed memory references.
"""
if instruction_form.instruction is None:
return {}
dest_reg_names = [
op.prefix if op.prefix != None else "" + op.name
for op in chain(
instruction_form.semantic_operands["destination"],
instruction_form.semantic_operands["src_dst"],
)
if isinstance(op, RegisterOperand)
]
isa_data = self._isa_model.get_instruction(
instruction_form.instruction, instruction_form.operands
)
if (
isa_data is None
and self._isa == "x86"
and instruction_form.instruction[-1] in self.GAS_SUFFIXES
):
# Check for instruction without GAS suffix
isa_data = self._isa_model.get_instruction(
instruction_form.instruction[:-1], instruction_form.operands
)
if isa_data is None and self._isa == "aarch64" and "." in instruction_form.instruction:
# Check for instruction without shape/cc suffix
suffix_start = instruction_form.instruction.index(".")
isa_data = self._isa_model.get_instruction(
instruction_form.instruction[:suffix_start], instruction_form.operands
)
'''
if only_postindexed:
for o in instruction_form.operands:
if isinstance(o, MemoryOperand) and o.base!=None:
base_name = o.base.prefix if o.base.prefix!=None else "" + o.base.name
return {
base_name: {
"name": o.base.prefix if o.base.prefix!=None else "" + o.base.name,
"value": o.post_indexed["value"],
}
}
return {}
'''
reg_operand_names = {} # e.g., {'rax': 'op1'}
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
for o in instruction_form.operands:
if isinstance(o, MemoryOperand) and o.pre_indexed:
# Assuming no isa_data.operation
if isa_data is not None and isa_data.get("operation", None) is not None:
raise ValueError(
"ISA information for pre-indexed instruction {!r} has operation set."
"This is currently not supprted.".format(instruction_form.line)
)
base_name = o.base.prefix if o.base.prefix!=None else "" + o.base.name
reg_operand_names = {base_name: "op1"}
operand_state = {"op1": {"name": base_name, "value": o.offset["value"]}}
if isa_data is not None and "operation" in isa_data:
for i, o in enumerate(instruction_form.operands):
operand_name = "op{}".format(i + 1)
if isinstance(o, RegisterOperand):
o_reg_name = o.prefix if o.prefix!=None else "" + o.name
reg_operand_names[o_reg_name] = operand_name
operand_state[operand_name] = {"name": o_reg_name, "value": 0}
elif "immediate" in o:
operand_state[operand_name] = {"value": o["immediate"]["value"]}
elif "memory" in o:
# TODO lea needs some thinking about
pass
exec(isa_data["operation"], {}, operand_state)
change_dict = {
reg_name: operand_state.get(reg_operand_names.get(reg_name))
for reg_name in dest_reg_names
}
return change_dict
def _apply_found_ISA_data(self, isa_data, operands):
"""
Create operand dictionary containing src/dst operands out of the ISA data entry and
the oeprands of an instruction form
If breaks_dependency_on_equal_operands is True (configuted per instruction in ISA db)
and all operands are equal, place operand into destination only.
:param dict isa_data: ISA DB entry
:param list operands: operands of the instruction form
:returns: `dict` -- operands dictionary with src/dst assignment
"""
op_dict = {}
op_dict["source"] = []
op_dict["destination"] = []
op_dict["src_dst"] = []
# handle dependency breaking instructions
if "breaks_dependency_on_equal_operands" in isa_data and operands[1:] == operands[:-1]:
op_dict["destination"] += operands
if "hidden_operands" in isa_data:
op_dict["destination"] += [
AttrDict.convert_dict(
{
hop["class"]: {
k: hop[k] for k in ["name", "class", "source", "destination"]
}
}
)
for hop in isa_data["hidden_operands"]
]
return op_dict
for i, op in enumerate(isa_data["operands"]):
if op["source"] and op["destination"]:
op_dict["src_dst"].append(operands[i])
continue
if op["source"]:
op_dict["source"].append(operands[i])
continue
if op["destination"]:
op_dict["destination"].append(operands[i])
continue
# check for hidden operands like flags or registers
if "hidden_operands" in isa_data:
# add operand(s) to semantic_operands of instruction form
for op in isa_data["hidden_operands"]:
dict_key = (
"src_dst"
if op["source"] and op["destination"]
else "source"
if op["source"]
else "destination"
)
hidden_op = {op["class"]: {}}
key_filter = ["class", "source", "destination"]
for key in [k for k in op.keys() if k not in key_filter]:
hidden_op[op["class"]][key] = op[key]
hidden_op = AttrDict.convert_dict(hidden_op)
op_dict[dict_key].append(hidden_op)
return op_dict
def _has_load(self, instruction_form):
"""Check if instruction form performs a LOAD"""
for operand in chain(
instruction_form.semantic_operands["source"],
instruction_form.semantic_operands["src_dst"],
):
if isinstance(operand, MemoryOperand):
return True
return False
def _has_store(self, instruction_form):
"""Check if instruction form perfroms a STORE"""
for operand in chain(
instruction_form.semantic_operands["destination"],
instruction_form.semantic_operands["src_dst"],
):
if isinstance(operand, MemoryOperand):
return True
return False
def _get_regular_source_operands(self, instruction_form):
"""Get source operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume it is a source operand
if len(instruction_form.operands) == 1:
return [instruction_form.operands[0]]
if self._isa == "x86":
# return all but last operand
return [op for op in instruction_form.operands[0:-1]]
elif self._isa == "aarch64":
return [op for op in instruction_form.operands[1:]]
else:
raise ValueError("Unsupported ISA {}.".format(self._isa))
def _get_regular_destination_operands(self, instruction_form):
"""Get destination operand of given instruction form assuming regular src/dst behavior."""
# if there is only one operand, assume no destination
if len(instruction_form.operands) == 1:
return []
if self._isa == "x86":
# return last operand
return instruction_form.operands[-1:]
if self._isa == "aarch64":
# return first operand
return instruction_form.operands[:1]
else:
raise ValueError("Unsupported ISA {}.".format(self._isa))
def substitute_mem_address(self, operands):
"""Create memory wildcard for all memory operands"""
return [
self._create_reg_wildcard() if isinstance(op, MemoryOperand) else op for op in operands
]
def _create_reg_wildcard(self):
"""Wildcard constructor"""
return {"*": "*"}