From cd3b2ad965f1e9ff3d04e10cab85e4516f724f8f Mon Sep 17 00:00:00 2001 From: Julian Hammer Date: Fri, 21 Dec 2018 16:38:13 +0100 Subject: [PATCH] seperated disassembling (error prone), marker detection and kernel extraction --- osaca/osaca.py | 724 ++++++++++++++++++-------------------- tests/test_osaca.py | 36 +- tests/test_osaca_iaca.out | 28 +- 3 files changed, 383 insertions(+), 405 deletions(-) diff --git a/osaca/osaca.py b/osaca/osaca.py index e030e6c..07b56ab 100755 --- a/osaca/osaca.py +++ b/osaca/osaca.py @@ -7,6 +7,7 @@ import io import re import subprocess from datetime import datetime +from pprint import pprint import pandas as pd import numpy as np @@ -17,6 +18,19 @@ from osaca.testcase import Testcase DATA_DIR = os.path.expanduser('~') + '/.osaca/' +# Matches every variation of the IACA start marker +IACA_START_MARKER = re.compile(r'\s*movl?[ \t]+\$(?:111|0x6f)[ \t]*,[ \t]*%ebx.*\n\s*' + r'(?:\.byte[ \t]+100.*((,[ \t]*103.*((,[ \t]*144)|' + r'(\n\s*\.byte[ \t]+144)))|' + r'(\n\s*\.byte[ \t]+103.*((,[ \t]*144)|' + r'(\n\s*\.byte[ \t]+144))))|(?:fs addr32 )?nop)') +# Matches every variation of the IACA end marker +IACA_END_MARKER = re.compile(r'\s*movl?[ \t]+\$(?:222|0x1f3)[ \t]*,[ \t]*%ebx.*\n\s*' + r'(?:\.byte[ \t]+100.*((,[ \t]*103.*((,[ \t]*144)|' + r'(\n\s*\.byte[ \t]+144)))|' + r'(\n\s*\.byte[ \t]+103.*((,[ \t]*144)|' + r'(\n\s*\.byte[ \t]+144))))|(?:fs addr32 )?nop)') + def flatten(l): """ @@ -39,14 +53,264 @@ def flatten(l): return l[:1] + flatten(l[1:]) -def get_assembly_from_binary(file_path): +def get_assembly_from_binary(bin_path): """ - Load binary file compiled with '-g' in class attribute srcCode and - separate by line. + Disassemble binary with llvm-objdump and transform into a canonical from. + + Replace jump and call target offsets with labels. + + :param bin_path: path to binary file to disassemble + + :return assembly string """ - return subprocess.run(['objdump', '--source', file_path], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).stdout.decode('utf-8').split('\n') + asm_lines = subprocess.run( + ['objdump', '-d', '--no-show-raw-insn', bin_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE).stdout.decode('utf-8').split('\n') + + asm = [] + + # Separate label, offsets and instructions + # Store offset with each label (thus iterate in reverse) + label_offsets = {} + for l in reversed(asm_lines): + m = re.match(r'^(?:(?P