mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 00:50:06 +01:00
refactored to class module
This commit is contained in:
@@ -1,228 +1,249 @@
|
||||
#!/apps/python/3.5-anaconda/bin/python
|
||||
import sys
|
||||
import re
|
||||
import sys, os, re
|
||||
import argparse
|
||||
from testcase import Testcase
|
||||
from param import Register, MemAddr, Parameter
|
||||
|
||||
marker = r'//STARTLOOP'
|
||||
asm_line = re.compile(r'\s[0-9a-f]+[:]')
|
||||
numSeps = 0
|
||||
sem = 0
|
||||
db = {}
|
||||
sorted_db = []
|
||||
lncnt = 1
|
||||
fname = ""
|
||||
cntChar = ''
|
||||
first = True
|
||||
|
||||
def extract_instr(asmFile):
|
||||
global lncnt
|
||||
global fname
|
||||
fname = asmFile
|
||||
# Check if parameter is in the correct file format
|
||||
if(asmFile[-4:] != ".log"):
|
||||
print("Invalid argument")
|
||||
sys.exit()
|
||||
# Open file
|
||||
try:
|
||||
f=open(asmFile, "r")
|
||||
except IOError:
|
||||
print("IOError: File not found")
|
||||
# Analyse code line by line and check the instructions
|
||||
class Instr_extractor(object):
|
||||
|
||||
filepaths = []
|
||||
# Variables for checking lines
|
||||
numSeps = 0
|
||||
sem = 0
|
||||
db = {}
|
||||
sorted_db = []
|
||||
lncnt = 1
|
||||
for line in f:
|
||||
check_line(line)
|
||||
lncnt += 1
|
||||
f.close()
|
||||
fname = ''
|
||||
cntChar = ''
|
||||
first = True
|
||||
# Constant variables
|
||||
MARKER = r'//STARTLOOP'
|
||||
ASM_LINE = re.compile(r'\s[0-9a-f]+[:]')
|
||||
|
||||
def __init__(self, filepath):
|
||||
self.filepaths = filepath
|
||||
|
||||
def check_all(self):
|
||||
for i in range(0,len(self.filepaths)):
|
||||
self.extract_instr(self.filepaths[i])
|
||||
|
||||
def is_elffile(self, filepath):
|
||||
if(os.path.isfile(filepath)):
|
||||
with open(filepath) as f:
|
||||
src = f.read()
|
||||
if('format elf64' in src):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def extract_instr(self, asmFile):
|
||||
fname = asmFile
|
||||
# Check if parameter is in the correct file format
|
||||
if(not self.is_elffile(asmFile)):
|
||||
print('Invalid argument')
|
||||
return
|
||||
# Open file
|
||||
try:
|
||||
f=open(asmFile, 'r')
|
||||
except IOError:
|
||||
print('IOError: File not found')
|
||||
# Analyse code line by line and check the instructions
|
||||
self.lncnt = 1
|
||||
for line in f:
|
||||
self.check_line(line)
|
||||
self.lncnt += 1
|
||||
f.close()
|
||||
|
||||
|
||||
def check_line(line):
|
||||
global numSeps
|
||||
global sem
|
||||
global first
|
||||
# Check if marker is in line and count the number of whitespaces if so
|
||||
if(marker in line):
|
||||
# But first, check if high level code ist indented with whitespaces or tabs
|
||||
if(first):
|
||||
set_counter_char(line)
|
||||
first = False
|
||||
numSeps = (re.split(marker,line)[0]).count(cntChar)
|
||||
sem = 2;
|
||||
elif(sem > 0):
|
||||
# We're in the marked code snipped
|
||||
# Check if the line is ASM code and - if not - check if we're still in the loop
|
||||
match = re.search(asm_line, line)
|
||||
if(match):
|
||||
# Further analysis of instructions
|
||||
# Check if there are commetns in line
|
||||
if(r'//' in line):
|
||||
return
|
||||
check_instr("".join(re.split(r'\t',line)[-1:]))
|
||||
elif((re.split(r'\S',line)[0]).count(cntChar) <= numSeps):
|
||||
# Not in the loop anymore - or yet - so we decrement the semaphore
|
||||
sem = sem-1
|
||||
def check_line(self, line):
|
||||
# Check if MARKER is in line and count the number of whitespaces if so
|
||||
if(self.MARKER in line):
|
||||
# But first, check if high level code ist indented with whitespaces or tabs
|
||||
if(self.first):
|
||||
self.set_counter_char(line)
|
||||
self.first = False
|
||||
self.numSeps = (re.split(self.MARKER,line)[0]).count(self.cntChar)
|
||||
self.sem = 2;
|
||||
elif(self.sem > 0):
|
||||
# We're in the marked code snipped
|
||||
# Check if the line is ASM code and - if not - check if we're still in the loop
|
||||
match = re.search(self.ASM_LINE, line)
|
||||
if(match):
|
||||
# Further analysis of instructions
|
||||
# Check if there are commetns in line
|
||||
if(r'//' in line):
|
||||
return
|
||||
self.check_instr(''.join(re.split(r'\t',line)[-1:]))
|
||||
elif((re.split(r'\S',line)[0]).count(self.cntChar) <= self.numSeps):
|
||||
# Not in the loop anymore - or yet - so we decrement the semaphore
|
||||
self.sem = self.sem-1
|
||||
|
||||
# Check if seperator is either tabulator or whitespace
|
||||
def set_counter_char(line):
|
||||
global cntChar
|
||||
numSpaces = (re.split(marker,line)[0]).count(" ")
|
||||
numTabs = (re.split(marker,line)[0]).count("\t")
|
||||
if(numSpaces != 0 and numTabs == 0):
|
||||
cntChar = ' '
|
||||
elif(numSpaces == 0 and numTabs != 0):
|
||||
cntChar = '\t'
|
||||
else:
|
||||
err_msg = 'Indentation of code is only supported for whitespaces and tabs.'
|
||||
raise NotImplementedError(err_msg)
|
||||
|
||||
|
||||
def check_instr(instr):
|
||||
global db
|
||||
global lncnt
|
||||
global cnt
|
||||
global fname
|
||||
# Check for strange clang padding bytes
|
||||
while(instr.startswith("data32")):
|
||||
instr = instr[7:]
|
||||
# Seperate mnemonic and operands
|
||||
mnemonic = instr.split()[0]
|
||||
params = "".join(instr.split()[1:])
|
||||
# Check if line is not only a byte
|
||||
empty_byte = re.compile(r'[0-9a-f]{2}')
|
||||
if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2):
|
||||
return
|
||||
# Check if there's one or more operand and store all in a list
|
||||
param_list = flatten(separate_params(params))
|
||||
opList = list(param_list)
|
||||
# Check operands and seperate them by IMMEDIATE (IMD), REGISTER (REG), MEMORY (MEM) or
|
||||
# LABEL (LBL)
|
||||
for i in range(len(param_list)):
|
||||
op = param_list[i]
|
||||
if(len(op) <= 0):
|
||||
op = Parameter("NONE")
|
||||
elif(op[0] == '$'):
|
||||
op = Parameter("IMD")
|
||||
elif(op[0] == '%' and '(' not in op):
|
||||
j = len(op)
|
||||
opmask = False
|
||||
if('{' in op):
|
||||
j = op.index('{')
|
||||
opmask = True
|
||||
op = Register(op[1:j], opmask)
|
||||
elif('<' in op):
|
||||
op = Parameter("LBL")
|
||||
# Check if seperator is either tabulator or whitespace
|
||||
def set_counter_char(self, line):
|
||||
numSpaces = (re.split(self.MARKER,line)[0]).count(' ')
|
||||
numTabs = (re.split(self.MARKER,line)[0]).count('\t')
|
||||
if(numSpaces != 0 and numTabs == 0):
|
||||
self.cntChar = ' '
|
||||
elif(numSpaces == 0 and numTabs != 0):
|
||||
self.cntChar = '\t'
|
||||
else:
|
||||
op = MemAddr(op)
|
||||
param_list[i] = str(op)
|
||||
opList[i] = op
|
||||
# Join mnemonic and operand(s) to an instruction form
|
||||
if(len(mnemonic) > 7):
|
||||
tabs = "\t"
|
||||
else:
|
||||
tabs = "\t\t"
|
||||
instr_form = mnemonic+tabs+(" ".join(param_list))
|
||||
# Check in database for instruction form and increment the counter
|
||||
if(instr_form in db):
|
||||
db[instr_form] = db[instr_form]+1
|
||||
else:
|
||||
db[instr_form] = 1
|
||||
# Create testcase for instruction form, since it is the first appearance of it
|
||||
# Only create benchmark if no label (LBL) is part of the operands
|
||||
do_bench = True
|
||||
for par in opList:
|
||||
if(str(par) == 'LBL' or str(par) == ''):
|
||||
do_bench = False
|
||||
if(do_bench):
|
||||
# Create testcase with reversed param list, due to the fact its intel syntax!
|
||||
tc = Testcase(mnemonic, list(reversed(opList)), '64')
|
||||
tc.write_testcase()
|
||||
err_msg = 'Indentation of code is only supported for whitespaces and tabs.'
|
||||
raise NotImplementedError(err_msg)
|
||||
|
||||
|
||||
def separate_params(params):
|
||||
param_list = [params]
|
||||
if(',' in params):
|
||||
if(')' in params):
|
||||
if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','):
|
||||
i = params.index(')')+1
|
||||
elif(params.index('(') < params.index(',')):
|
||||
return param_list
|
||||
def check_instr(self, instr):
|
||||
# Check for strange clang padding bytes
|
||||
while(instr.startswith('data32')):
|
||||
instr = instr[7:]
|
||||
# Seperate mnemonic and operands
|
||||
mnemonic = instr.split()[0]
|
||||
params = ''.join(instr.split()[1:])
|
||||
# Check if line is not only a byte
|
||||
empty_byte = re.compile(r'[0-9a-f]{2}')
|
||||
if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2):
|
||||
return
|
||||
# Check if there's one or more operand and store all in a list
|
||||
param_list = self.flatten(self.separate_params(params))
|
||||
opList = list(param_list)
|
||||
# Check operands and seperate them by IMMEDIATE (IMD), REGISTER (REG), MEMORY (MEM) or
|
||||
# LABEL (LBL)
|
||||
for i in range(len(param_list)):
|
||||
op = param_list[i]
|
||||
if(len(op) <= 0):
|
||||
op = Parameter('NONE')
|
||||
elif(op[0] == '$'):
|
||||
op = Parameter('IMD')
|
||||
elif(op[0] == '%' and '(' not in op):
|
||||
j = len(op)
|
||||
opmask = False
|
||||
if('{' in op):
|
||||
j = op.index('{')
|
||||
opmask = True
|
||||
op = Register(op[1:j], opmask)
|
||||
elif('<' in op):
|
||||
op = Parameter('LBL')
|
||||
else:
|
||||
i = params.index(',')
|
||||
else:
|
||||
i = params.index(',')
|
||||
param_list = [params[:i],separate_params(params[i+1:])]
|
||||
elif('#' in params):
|
||||
i = params.index('#')
|
||||
param_list = [params[:i]]
|
||||
return param_list
|
||||
|
||||
|
||||
def sort_db():
|
||||
global sorted_db
|
||||
sorted_db=sorted(db.items(), key=lambda x:x[1], reverse=True)
|
||||
|
||||
|
||||
def print_sorted_db():
|
||||
sort_db()
|
||||
total = 0
|
||||
print("Number of\tmnemonic")
|
||||
print("calls\n")
|
||||
for i in range(len(sorted_db)):
|
||||
print(str(sorted_db[i][1])+"\t\t"+sorted_db[i][0])
|
||||
total += sorted_db[i][1]
|
||||
print("\nCumulated number of instructions: "+str(total))
|
||||
|
||||
|
||||
def save_db():
|
||||
file = open(".cnt_asm_ops.db","w")
|
||||
for i in db.items():
|
||||
file.write(i[0]+"\t"+str(i[1])+"\n")
|
||||
file.close()
|
||||
|
||||
|
||||
def load_db():
|
||||
global db
|
||||
try:
|
||||
file = open(".cnt_asm_ops.db", "r")
|
||||
except FileNotFoundError:
|
||||
print("no database found in current directory")
|
||||
return
|
||||
for line in file:
|
||||
mnemonic = line.split('\t')[0]
|
||||
op = MemAddr(op)
|
||||
param_list[i] = str(op) if (type(op) is not Register) else str(op)+str(op.size)
|
||||
opList[i] = op
|
||||
# Join mnemonic and operand(s) to an instruction form
|
||||
if(len(mnemonic) > 7):
|
||||
tabs = "\t"
|
||||
params = line.split('\t')[1]
|
||||
numCalls = line.split("\t")[2][:-1]
|
||||
tabs = '\t'
|
||||
else:
|
||||
tabs = "\t\t"
|
||||
params = line.split('\t')[2]
|
||||
numCalls = line.split("\t")[3][:-1]
|
||||
instr_form = mnemonic+tabs+params
|
||||
db[instr_form] = int(numCalls)
|
||||
file.close()
|
||||
tabs = '\t\t'
|
||||
instr_form = mnemonic+tabs+(' '.join(param_list))
|
||||
# Check in database for instruction form and increment the counter
|
||||
if(instr_form in self.db):
|
||||
self.db[instr_form] = self.db[instr_form]+1
|
||||
else:
|
||||
self. db[instr_form] = 1
|
||||
# Create testcase for instruction form, since it is the first appearance of it
|
||||
# Only create benchmark if no label (LBL) is part of the operands
|
||||
do_bench = True
|
||||
for par in opList:
|
||||
if(str(par) == 'LBL' or str(par) == ''):
|
||||
do_bench = False
|
||||
if(do_bench):
|
||||
# Create testcase with reversed param list, due to the fact its intel syntax!
|
||||
tc = Testcase(mnemonic, list(reversed(opList)), '64')
|
||||
tc.write_testcase()
|
||||
|
||||
|
||||
def flatten(l):
|
||||
if l == []:
|
||||
return l
|
||||
if(isinstance(l[0], list)):
|
||||
return flatten(l[0]) + flatten(l[1:])
|
||||
return l[:1] + flatten(l[1:])
|
||||
def separate_params(self, params):
|
||||
param_list = [params]
|
||||
if(',' in params):
|
||||
if(')' in params):
|
||||
if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','):
|
||||
i = params.index(')')+1
|
||||
elif(params.index('(') < params.index(',')):
|
||||
return param_list
|
||||
else:
|
||||
i = params.index(',')
|
||||
else:
|
||||
i = params.index(',')
|
||||
param_list = [params[:i],self.separate_params(params[i+1:])]
|
||||
elif('#' in params):
|
||||
i = params.index('#')
|
||||
param_list = [params[:i]]
|
||||
return param_list
|
||||
|
||||
|
||||
def sort_db(self):
|
||||
self.sorted_db=sorted(self.db.items(), key=lambda x:x[1], reverse=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# load_db()
|
||||
# r0 = Register("ymm0")
|
||||
# r1 = Register("xmm0")
|
||||
# r64 = Register("rax")
|
||||
# r32 = Register("eax")
|
||||
# mem0 = MemAddr('(%rax, %esi, 4)')
|
||||
# tc = Testcase("XOR", [r32, r32], '64')
|
||||
# tc.write_testcase()
|
||||
# create_testcase("VADDPD", [r0, r0, r0])
|
||||
if(len(sys.argv) > 1):
|
||||
for i in range(1,len(sys.argv)):
|
||||
extract_instr(sys.argv[i])
|
||||
print_sorted_db()
|
||||
# save_db()
|
||||
def print_sorted_db(self):
|
||||
self.sort_db()
|
||||
total = 0
|
||||
print('Number of\tmnemonic')
|
||||
print('calls\n')
|
||||
for i in range(len(self.sorted_db)):
|
||||
print(str(self.sorted_db[i][1])+'\t\t'+self.sorted_db[i][0])
|
||||
total += self.sorted_db[i][1]
|
||||
print('\nCumulated number of instructions: '+str(total))
|
||||
|
||||
|
||||
def save_db(self):
|
||||
file = open('.cnt_asm_ops.db','w')
|
||||
for i in self.db.items():
|
||||
file.write(i[0]+'\t'+str(i[1])+'\n')
|
||||
file.close()
|
||||
|
||||
|
||||
def load_db(self):
|
||||
try:
|
||||
file = open('.cnt_asm_ops.db', 'r')
|
||||
except FileNotFoundError:
|
||||
print('no database found in current directory')
|
||||
return
|
||||
for line in file:
|
||||
mnemonic = line.split('\t')[0]
|
||||
# Join mnemonic and operand(s) to an instruction form
|
||||
if(len(mnemonic) > 7):
|
||||
tabs = '\t'
|
||||
params = line.split('\t')[1]
|
||||
numCalls = line.split('\t')[2][:-1]
|
||||
else:
|
||||
tabs = '\t\t'
|
||||
params = line.split('\t')[2]
|
||||
numCalls = line.split('\t')[3][:-1]
|
||||
instr_form = mnemonic+tabs+params
|
||||
self.db[instr_form] = int(numCalls)
|
||||
file.close()
|
||||
|
||||
|
||||
def flatten(self, l):
|
||||
if l == []:
|
||||
return l
|
||||
if(isinstance(l[0], list)):
|
||||
return self.flatten(l[0]) + self.flatten(l[1:])
|
||||
return l[:1] + self.flatten(l[1:])
|
||||
|
||||
|
||||
def main():
|
||||
# Parse args
|
||||
parser = argparse.ArgumentParser(description='Returns a list of all instruction forms in the'
|
||||
+'given files sorted by their number of occurences.')
|
||||
parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.2')
|
||||
parser.add_argument('filepath', nargs='+', help='path to objdump(s)')
|
||||
parser.add_argument('-l', '--load', dest='load', action='store_true', help='load database'
|
||||
+' before checking new files')
|
||||
parser.add_argument('-s', '--store', dest='store', action='store_true', help='store database '
|
||||
+'before checking new files')
|
||||
|
||||
# Create object and store arguments as attribute
|
||||
inp = parser.parse_args()
|
||||
ie = Instr_extractor(inp.filepath)
|
||||
|
||||
# Do work
|
||||
if(inp.load):
|
||||
ie.load_db()
|
||||
ie.check_all()
|
||||
ie.print_sorted_db()
|
||||
if(inp.store):
|
||||
ie.save_db()
|
||||
|
||||
## ---------main method----------
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user