Files
OSACA/osaca/semantics/marker_utils.py
2019-12-13 17:38:35 +01:00

227 lines
8.4 KiB
Python
Executable File

#!/usr/bin/env python3
from collections import OrderedDict
from osaca.parser import ParserAArch64v81, ParserX86ATT, get_parser
def reduce_to_section(kernel, isa):
isa = isa.lower()
if isa == 'x86':
start, end = find_marked_kernel_x86ATT(kernel)
elif isa == 'aarch64':
start, end = find_marked_kernel_AArch64(kernel)
else:
raise ValueError('ISA not supported.')
if start == -1:
raise LookupError('Could not find START MARKER. Make sure it is inserted!')
if end == -1:
raise LookupError('Could not find END MARKER. Make sure it is inserted!')
return kernel[start:end]
def find_marked_kernel_AArch64(lines):
nop_bytes = ['213', '3', '32', '31']
return find_marked_section(
lines, ParserAArch64v81(), ['mov'], 'x1', [111, 222], nop_bytes, reverse=True
)
def find_marked_kernel_x86ATT(lines):
nop_bytes = ['100', '103', '144']
return find_marked_section(
lines, ParserX86ATT(), ['mov', 'movl'], 'ebx', [111, 222], nop_bytes
)
def get_marker(isa, comment=""):
"""Return tuple of start and end marker lines."""
if isa == 'x86':
start_marker_raw = (
'movl $111, %ebx # OSACA START MARKER\n'
'.byte 100 # OSACA START MARKER\n'
'.byte 103 # OSACA START MARKER\n'
'.byte 144 # OSACA START MARKER\n'
)
if comment:
start_marker_raw += "# {}\m".format(comment)
end_marker_raw = (
'movl $222, %ebx # OSACA END MARKER\n'
'.byte 100 # OSACA END MARKER\n'
'.byte 103 # OSACA END MARKER\n'
'.byte 144 # OSACA END MARKER\n'
)
elif isa == 'AArch64':
start_marker_raw = (
'mov x1, #111 // OSACA START MARKER\n'
'.byte 213,3,32,31 // OSACA START MARKER\n'
)
if comment:
start_marker_raw += "// {}\n".format(comment)
# After loop
end_marker_raw = (
'mov x1, #222 // OSACA END MARKER\n'
'.byte 213,3,32,31 // OSACA END MARKER\n'
)
parser = get_parser(isa)
start_marker = parser.parse_file(start_marker_raw)
end_marker = parser.parse_file(end_marker_raw)
return start_marker, end_marker
def find_marked_section(lines, parser, mov_instr, mov_reg, mov_vals, nop_bytes, reverse=False):
# TODO match to instructions returned by get_marker
index_start = -1
index_end = -1
for i, line in enumerate(lines):
try:
if line.instruction in mov_instr and lines[i + 1].directive is not None:
source = line.operands[0 if not reverse else 1]
destination = line.operands[1 if not reverse else 0]
# instruction pair matches, check for operands
if (
'immediate' in source
and parser.normalize_imd(source.immediate) == mov_vals[0]
and 'register' in destination
and parser.get_full_reg_name(destination.register) == mov_reg
):
# operands of first instruction match start, check for second one
match, line_count = match_bytes(lines, i + 1, nop_bytes)
if match:
# return first line after the marker
index_start = i + 1 + line_count
elif (
'immediate' in source
and parser.normalize_imd(source.immediate) == mov_vals[1]
and 'register' in destination
and parser.get_full_reg_name(destination.register) == mov_reg
):
# operand of first instruction match end, check for second one
match, line_count = match_bytes(lines, i + 1, nop_bytes)
if match:
# return line of the marker
index_end = i
except TypeError:
print(i, line)
if index_start != -1 and index_end != -1:
break
return index_start, index_end
def match_bytes(lines, index, byte_list):
# either all bytes are in one line or in separate ones
extracted_bytes = []
line_count = 0
while (
index < len(lines)
and lines[index].directive is not None
and lines[index].directive.name == 'byte'
):
line_count += 1
extracted_bytes += lines[index].directive.parameters
index += 1
if extracted_bytes[0:len(byte_list)] == byte_list:
return True, line_count
return False, -1
def find_jump_labels(lines):
"""
Find and return all labels which are followed by instructions until the next label
:return: OrderedDict of mapping from label name to associated line index
"""
# 1. Identify labels and instructions until next label
labels = OrderedDict()
current_label = None
for i, line in enumerate(lines):
if line['label'] is not None:
# When a new label is found, add to blocks dict
labels[line['label']] = (i,)
# End previous block at previous line
if current_label is not None:
labels[current_label] = (labels[current_label][0], i)
# Update current block name
current_label = line['label']
elif current_label is None:
# If no block has been started, skip end detection
continue
# Set to last line if no end was for last label found
if current_label is not None and len(labels[current_label]) == 1:
labels[current_label] = (labels[current_label][0], len(lines))
# 2. Identify and remove labels which contain only dot-instructions (e.g., .text)
for label in list(labels):
if all(
[
l['instruction'].startswith('.')
for l in lines[labels[label][0]:labels[label][1]]
if l['instruction'] is not None
]
):
del labels[label]
return OrderedDict([(l, v[0]) for l, v in labels.items()])
def find_basic_blocks(lines):
"""
Find and return basic blocks (asm sections which can only be executed as complete block).
Blocks always start at a label and end at the next jump/break possibility.
:return: OrderedDict with labels as keys and list of lines as value
"""
valid_jump_labels = find_jump_labels(lines)
# Identify blocks, as they are started with a valid jump label and terminated by a label or
# an instruction referencing a valid jump label
blocks = OrderedDict()
for label, label_line_idx in valid_jump_labels.items():
blocks[label] = []
for line in lines[label_line_idx + 1:]:
terminate = False
blocks[label].append(line)
# Find end of block by searching for references to valid jump labels
if line['instruction'] and line['operands']:
for operand in [o for o in line['operands'] if 'identifier' in o]:
if operand['identifier']['name'] in valid_jump_labels:
terminate = True
elif line['label'] is not None:
terminate = True
if terminate:
break
return blocks
def find_basic_loop_bodies(lines):
"""
Find and return basic loop bodies (asm section which loop back on itself with no other egress).
:return: OrderedDict with labels as keys and list of lines as value
"""
valid_jump_labels = find_jump_labels(lines)
# Identify blocks, as they are started with a valid jump label and terminated by
# an instruction referencing a valid jump label
loop_bodies = OrderedDict()
for label, label_line_idx in valid_jump_labels.items():
current_block = []
for line in lines[label_line_idx + 1:]:
terminate = False
current_block.append(line)
# Find end of block by searching for references to valid jump labels
if line['instruction'] and line['operands']:
for operand in [o for o in line['operands'] if 'identifier' in o]:
if operand['identifier']['name'] in valid_jump_labels:
if operand['identifier']['name'] == label:
loop_bodies[label] = current_block
terminate = True
break
if terminate:
break
return loop_bodies