Merge pull request #14 from RRZE-HPC/pep8

PEP8 and Coding Style Changes
This commit is contained in:
Julian
2018-11-30 13:55:39 +01:00
committed by GitHub
7 changed files with 385 additions and 411 deletions

View File

@@ -11,19 +11,17 @@ from osaca.param import Register, MemAddr
class Scheduler(object):
arch_dict = {'SNB': 6, 'IVB': 6, 'HSW': 8, 'BDW': 8, 'SKL': 8, 'ZEN': 10}
dv_port_dict = {'SKL': 0, 'ZEN': 3}
ports = None # type: int
instrList = None # type: list<list<str,Param[,Param][,Param],str>>,
ports = None # type: int
instrList = None # type: list<list<str,Param[,Param][,Param],str>>,
# content of most inner list in instrList: instr, operand(s), instr form
df = None # type: DataFrame
df = None # type: DataFrame
# for parallel ld/st in archs with 1 st/cy and >1 ld/cy, able to do 1 st and 1 ld in 1cy
ld_ports = None # type: list<int>
ld_ports = None # type: list<int>
# enable flag for parallel ld/st
en_par_ldst = False # type: boolean
dv_port = -1 # type: int
en_par_ldst = False # type: boolean
dv_port = -1 # type: int
def __init__(self, arch, instruction_list):
arch = arch.upper()
@@ -33,7 +31,7 @@ class Scheduler(object):
print('Architecture not supportet for EU scheduling.', file=sys.stderr)
sys.exit(1)
# check for parallel ld/st in a cycle
if(arch == 'ZEN'):
if arch == 'ZEN':
self.en_par_ldst = True
self.ld_ports = [9, 10]
# check for DV port
@@ -44,13 +42,14 @@ class Scheduler(object):
# do nothing
pass
self.instrList = instruction_list
#curr_dir = os.path.realpath(__file__)[:-11]
# curr_dir = os.path.realpath(__file__)[:-11]
osaca_dir = os.path.expanduser('~/.osaca/')
self.df = pd.read_csv(osaca_dir + 'data/' + arch.lower() + '_data.csv', quotechar='"',
converters={'ports': ast.literal_eval})
def new_schedule(self):
"""
Schedules Instruction Form list and calculates port bindings.
Schedule Instruction Form list and calculate port bindings.
Returns
-------
@@ -62,18 +61,18 @@ class Scheduler(object):
# Initialize ports
# Add DV port, if it is existing
tmp_port = 0
if(self.dv_port != -1):
if self.dv_port != -1:
tmp_port = 1
occ_ports = [[0] * (self.ports + tmp_port) for x in range(len(self.instrList))]
port_bndgs = [0] * (self.ports + tmp_port)
# Store instruction counter for parallel ld/st
par_ldst = 0
# Count the number of store instr if we schedule for an architecture with par ld/st
if(self.en_par_ldst):
if self.en_par_ldst:
for i, instrForm in enumerate(self.instrList):
if(isinstance(instrForm[1], MemAddr) and len(instrForm) > 3
and not instrForm[0].startswith('cmp')):
#print('({}, {}) is st --> par_ldst = {}'.format(i, instrForm[0], par_ldst + 1))
if (isinstance(instrForm[1], MemAddr) and len(instrForm) > 3
and not instrForm[0].startswith('cmp')):
# print('({}, {}) is st --> par_ldst = {}'.format(i, instrForm[0], par_ldst + 1))
par_ldst += 1
# Check if there's a port occupation stored in the CSV, otherwise leave the
# occ_port list item empty
@@ -82,11 +81,11 @@ class Scheduler(object):
search_string = instrForm[0] + '-' + self.get_operand_suffix(instrForm)
entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr]
tup = entry.ports.values[0]
if(len(tup) == 1 and tup[0] == -1):
if len(tup) == 1 and tup[0] == -1:
raise IndexError()
except IndexError:
# Instruction form not in CSV
if(instrForm[0][:3] == 'nop'):
if instrForm[0][:3] == 'nop':
sched += self.get_line(occ_ports[i], '* ' + instrForm[-1])
else:
sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1])
@@ -94,18 +93,17 @@ class Scheduler(object):
occ_ports[i] = list(tup)
# Check if it's a ld including instr
p_flg = ''
if(self.en_par_ldst):
if self.en_par_ldst:
# Check for ld
if(isinstance(instrForm[-2], MemAddr) or
(len(instrForm) > 4 and isinstance(instrForm[2], MemAddr))):
if(par_ldst > 0):
if (isinstance(instrForm[-2], MemAddr) or
(len(instrForm) > 4 and isinstance(instrForm[2], MemAddr))):
if par_ldst > 0:
par_ldst -= 1
p_flg = 'P '
for port in self.ld_ports:
tmp_port_add = 1 if(self.dv_port != -1 and self.dv_port < port) else 0
occ_ports[i][port] = '(' + str(occ_ports[i][port]) + ')'
# Write schedule line
if(len(p_flg) > 0):
if len(p_flg) > 0:
sched += self.get_line(occ_ports[i], p_flg + instrForm[-1])
for port in self.ld_ports:
occ_ports[i][port] = 0
@@ -113,11 +111,11 @@ class Scheduler(object):
sched += self.get_line(occ_ports[i], instrForm[-1])
# Add throughput to total port binding
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
return (sched, port_bndgs)
return sched, port_bndgs
def schedule(self):
"""
Schedules Instruction Form list and calculates port bindings.
Schedule Instruction Form list and calculate port bindings.
Returns
-------
@@ -128,7 +126,6 @@ class Scheduler(object):
wTP = False
sched = self.get_head()
# Initialize ports
occ_ports = [[0] * self.ports for x in range(len(self.instrList))]
port_bndgs = [0] * self.ports
# Check if there's a port occupation stored in the CSV, otherwise leave the
# occ_port list item empty
@@ -137,16 +134,16 @@ class Scheduler(object):
search_string = instrForm[0] + '-' + self.get_operand_suffix(instrForm)
entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr]
tup = entry.ports.values[0]
if(len(tup) == 1 and tup[0][0] == -1):
if len(tup) == 1 and tup[0][0] == -1:
raise IndexError()
except IndexError:
# Instruction form not in CSV
if(instrForm[0][:3] == 'nop'):
if instrForm[0][:3] == 'nop':
sched += self.get_line(occ_ports[i], '* ' + instrForm[-1])
else:
sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1])
continue
if(wTP):
if wTP:
# Get the occurance of each port from the occupation list
port_occurances = self.get_port_occurances(tup)
# Get 'occurance groups'
@@ -155,34 +152,34 @@ class Scheduler(object):
tp_ges = entry.TP.values[0] * len(occurance_groups[0])
for occGroup in occurance_groups:
for port in occGroup:
occ_ports[i][port] = tp_ges/len(occGroup)
occ_ports[i][port] = tp_ges / len(occGroup)
else:
variations = len(tup)
t_all = self.flatten(tup)
if(entry.TP.values[0] == 0):
if entry.TP.values[0] == 0:
t_all = ()
if(variations == 1):
if variations == 1:
for j in tup[0]:
occ_ports[i][j] = entry.TP.values[0]
else:
for j in range(0, self.ports):
occ_ports[i][j] = t_all.count(j) / variations
occ_ports[i][j] = t_all.count(j) / variations
# Write schedule line
sched += self.get_line(occ_ports[i], instrForm[-1])
# Add throughput to total port binding
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
return (sched, port_bndgs)
return sched, port_bndgs
def flatten(self, l):
if(len(l) == 0):
if len(l) == 0:
return l
if(isinstance(l[0], type(l))):
if isinstance(l[0], type(l)):
return self.flatten(l[0]) + self.flatten(l[1:])
return l[:1] + self.flatten(l[1:])
return l[:1] + self.flatten(l[1:])
def schedule_fcfs(self):
"""
Schedules Instruction Form list for a single run with latencies.
Schedule Instruction Form list for a single run with latencies.
Returns
-------
@@ -198,18 +195,18 @@ class Scheduler(object):
search_string = instrForm[0] + '-' + self.get_operand_suffix(instrForm)
entry = self.df.loc[lambda df, sStr=search_string: df.instr == sStr]
tup = entry.ports.values[0]
if(len(tup) == 1 and tup[0][0] == -1):
if len(tup) == 1 and tup[0][0] == -1:
raise IndexError()
except IndexError:
# Instruction form not in CSV
sched += self.get_line([0] * self.ports, '* ' + instrForm[-1])
continue
found = False
while(not found):
while not found:
for portOcc in tup:
# Test if chosen instruction form port occupation suits the current CPU port
# occupation
if(self.test_ports_fcfs(occ_ports, portOcc)):
if self.test_ports_fcfs(occ_ports, portOcc):
# Current port occupation fits for chosen port occupation of instruction!
found = True
good = [entry.LT.values[0] if (j in portOcc) else 0 for j in
@@ -219,16 +216,15 @@ class Scheduler(object):
occ_ports = [occ_ports[j] + good[j] for j in range(0, self.ports)]
break
# Step
occ_ports = [j-1 if (j > 0) else 0 for j in occ_ports]
if(entry.LT.values[0] != 0):
occ_ports = [j - 1 if (j > 0) else 0 for j in occ_ports]
if entry.LT.values[0] != 0:
total += 1
total += max(occ_ports)
return (sched, total)
return sched, total
def get_occurance_groups(self, port_occurances):
"""
Groups ports in groups by the number of their occurance and sorts
groups by cardinality
Group ports in groups by the number of their occurrence and sorts groups by cardinality.
Parameters
----------
@@ -254,8 +250,7 @@ class Scheduler(object):
def get_port_occurances(self, tups):
"""
Returns the number of each port occurance for the possible port
occupations
Return the number of each port occurrence for the possible port occupations.
Parameters
----------
@@ -276,7 +271,7 @@ class Scheduler(object):
def test_ports_fcfs(self, occ_ports, needed_ports):
"""
Test if current configuration of ports is possible and returns boolean
Test if current configuration of ports is possible and returns boolean.
Parameters
----------
@@ -292,13 +287,13 @@ class Scheduler(object):
False if not
"""
for port in needed_ports:
if(occ_ports[port] != 0):
if occ_ports[port] != 0:
return False
return True
def get_report_info(self):
"""
Creates Report information including all needed annotations.
Create Report information including all needed annotations.
Returns
-------
@@ -306,15 +301,15 @@ class Scheduler(object):
String containing the report information
"""
analysis = 'Throughput Analysis Report\n' + ('-' * 26) + '\n'
annotations = ('P - Load operation can be hidden behind a past or future store instruction\n'
'X - No information for this instruction in data file\n'
'* - Instruction micro-ops not bound to a port\n'
'\n')
annotations = (
'P - Load operation can be hidden behind a past or future store instruction\n'
'X - No information for this instruction in data file\n'
'* - Instruction micro-ops not bound to a port\n\n')
return analysis + annotations
def get_head(self):
"""
Creates right heading for CPU architecture.
Create right heading for CPU architecture.
Returns
-------
@@ -322,15 +317,15 @@ class Scheduler(object):
String containing the header
"""
horiz_line = '-' * 7 * self.ports
if(self.dv_port != -1):
if self.dv_port != -1:
horiz_line += '-' * 6
horiz_line += '-\n'
port_anno = (' ' * (math.floor((len(horiz_line) - 24) / 2)) + 'Ports Pressure in cycles'
+ ' ' * (math.ceil((len(horiz_line) - 24) / 2)) + '\n')
port_anno = (' ' * int(math.floor((len(horiz_line) - 24) / 2)) + 'Ports Pressure in cycles'
+ ' ' * int(math.ceil((len(horiz_line) - 24) / 2)) + '\n')
port_line = ''
for i in range(0, self.ports):
port_line += '| {} '.format(i)
if(i == self.dv_port):
if i == self.dv_port:
port_line = port_line + '- DV '
port_line += '|\n'
head = port_anno + port_line + horiz_line
@@ -353,19 +348,18 @@ class Scheduler(object):
String for output containing port scheduling for instr_name
"""
line = ''
r_space = ' '
for p_num, i in enumerate(occ_ports):
pipe = '|'
if(isinstance(i, str)):
if isinstance(i, str):
cycles = i
i = float(i[1:-1])
r_space = ''
else:
cycles = ' ' if (i == 0) else '%.2f' % float(i)
r_space = ' '
if(p_num == self.dv_port + 1 and p_num != 0):
if p_num == self.dv_port + 1 and p_num != 0:
pipe = ' '
if(i >= 10):
if i >= 10:
line += pipe + cycles + r_space
else:
line += pipe + ' ' + cycles + r_space
@@ -374,7 +368,7 @@ class Scheduler(object):
def get_port_binding(self, port_bndg):
"""
Creates port binding out of scheduling result.
Create port binding out of scheduling result.
Parameters
----------
@@ -392,12 +386,13 @@ class Scheduler(object):
port_line = '| Port |'
after_dv = 0
for i in range(0, self.ports):
if(i == self.dv_port):
port_line += ' ' * sp_left[i] + str(i) + ' ' * sp_right[i] + '-'
port_line += ' ' * (sp_left[i+1] - 1) + 'DV' + ' ' * sp_right[i+1] + '|'
if i == self.dv_port:
port_line += ' ' * int(sp_left[i]) + str(i) + ' ' * int(sp_right[i]) + '-'
port_line += ' ' * int(sp_left[i + 1] - 1) + 'DV' + ' ' * int(sp_right[i + 1]) + '|'
after_dv = 1
else:
port_line += ' ' * sp_left[i + after_dv] + str(i) + ' ' * sp_right[i + after_dv]
port_line += (' ' * int(sp_left[i + after_dv]) + str(i)
+ ' ' * int(sp_right[i + after_dv]))
port_line += '|'
port_line += '\n'
cyc_line = '| Cycles |'
@@ -410,15 +405,15 @@ class Scheduler(object):
return binding
def get_spaces(self, port_bndg):
len_list = [len(str(round(x, 2)))+1 for x in port_bndg]
total = sum([x+2 for x in len_list])
sp_left = [math.ceil(x/2) for x in len_list]
sp_right = [math.floor(x/2) for x in len_list]
len_list = [len(str(round(x, 2))) + 1 for x in port_bndg]
total = sum([x + 2 for x in len_list])
sp_left = [math.ceil(x / 2) for x in len_list]
sp_right = [math.floor(x / 2) for x in len_list]
return sp_left, sp_right, total
def get_operand_suffix(self, instr_form):
"""
Creates operand suffix out of list of Parameters.
Create operand suffix out of list of Parameters.
Parameters
----------
@@ -431,11 +426,10 @@ class Scheduler(object):
Operand suffix for searching in data file
"""
op_ext = []
for i in range(1, len(instr_form)-1):
optmp = ''
if(isinstance(instr_form[i], Register) and instr_form[i].reg_type == 'GPR'):
for i in range(1, len(instr_form) - 1):
if isinstance(instr_form[i], Register) and instr_form[i].reg_type == 'GPR':
optmp = 'r' + str(instr_form[i].size)
elif(isinstance(instr_form[i], MemAddr)):
elif isinstance(instr_form[i], MemAddr):
optmp = 'mem'
else:
optmp = str(instr_form[i]).lower()

View File

@@ -29,23 +29,20 @@ class InstrExtractor(object):
self.extract_instr(self.filepaths[i])
def is_elffile(self, filepath):
if(os.path.isfile(filepath)):
if os.path.isfile(filepath):
with open(filepath) as f:
src = f.read()
if('format elf64' in src):
if 'format elf64' in src:
return True
return False
def extract_instr(self, asm_file):
# Check if parameter is in the correct file format
if(not self.is_elffile(asm_file)):
if not self.is_elffile(asm_file):
print('Invalid argument')
return
# Open file
try:
f = open(asm_file, 'r')
except IOError:
print('IOError: File not found')
f = open(asm_file, 'r')
# Analyse code line by line and check the instructions
self.lncnt = 1
for line in f:
@@ -55,34 +52,34 @@ class InstrExtractor(object):
def check_line(self, line):
# Check if MARKER is in line and count the number of whitespaces if so
if(self.MARKER in line):
if self.MARKER in line:
# But first, check if high level code ist indented with whitespaces or tabs
if(self.first):
if self.first:
self.set_counter_char(line)
self.first = False
self.numSeps = (re.split(self.MARKER, line)[0]).count(self.cntChar)
self.sem = 2
elif(self.sem > 0):
elif self.sem > 0:
# We're in the marked code snipped
# Check if the line is ASM code and - if not - check if we're still in the loop
match = re.search(self.ASM_LINE, line)
if(match):
if match:
# Further analysis of instructions
# Check if there are commetns in line
if(r'//' in line):
if r'//' in line:
return
self.check_instr(''.join(re.split(r'\t', line)[-1:]))
elif((re.split(r'\S', line)[0]).count(self.cntChar) <= self.numSeps):
elif (re.split(r'\S', line)[0]).count(self.cntChar) <= self.numSeps:
# Not in the loop anymore - or yet - so we decrement the semaphore
self.sem = self.sem-1
self.sem = self.sem - 1
# Check if seperator is either tabulator or whitespace
def set_counter_char(self, line):
num_spaces = (re.split(self.MARKER, line)[0]).count(' ')
num_tabs = (re.split(self.MARKER, line)[0]).count('\t')
if(num_spaces != 0 and num_tabs == 0):
if num_spaces != 0 and num_tabs == 0:
self.cntChar = ' '
elif(num_spaces == 0 and num_tabs != 0):
elif num_spaces == 0 and num_tabs != 0:
self.cntChar = '\t'
else:
err_msg = 'Indentation of code is only supported for whitespaces and tabs.'
@@ -90,14 +87,14 @@ class InstrExtractor(object):
def check_instr(self, instr):
# Check for strange clang padding bytes
while(instr.startswith('data32')):
while instr.startswith('data32'):
instr = instr[7:]
# Seperate mnemonic and operands
mnemonic = instr.split()[0]
params = ''.join(instr.split()[1:])
# Check if line is not only a byte
empty_byte = re.compile(r'[0-9a-f]{2}')
if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2):
if re.match(empty_byte, mnemonic) and len(mnemonic) == 2:
return
# Check if there's one or more operand and store all in a list
param_list = self.flatten(self.separate_params(params))
@@ -106,59 +103,59 @@ class InstrExtractor(object):
# LABEL (LBL)
for i in range(len(param_list)):
op = param_list[i]
if(len(op) <= 0):
if len(op) <= 0:
op = Parameter('NONE')
elif(op[0] == '$'):
elif op[0] == '$':
op = Parameter('IMD')
elif(op[0] == '%' and '(' not in op):
elif op[0] == '%' and '(' not in op:
j = len(op)
opmask = False
if('{' in op):
if '{' in op:
j = op.index('{')
opmask = True
op = Register(op[1:j], opmask)
elif('<' in op):
elif '<' in op:
op = Parameter('LBL')
else:
op = MemAddr(op)
param_list[i] = str(op) if (type(op) is not Register) else str(op)+str(op.size)
param_list[i] = str(op) if (type(op) is not Register) else str(op) + str(op.size)
op_list[i] = op
# Join mnemonic and operand(s) to an instruction form
if(len(mnemonic) > 7):
if len(mnemonic) > 7:
tabs = '\t'
else:
tabs = '\t\t'
instr_form = mnemonic+tabs+(' '.join(param_list))
instr_form = mnemonic + tabs + (' '.join(param_list))
# Check in data file for instruction form and increment the counter
if(instr_form in self.db):
self.db[instr_form] = self.db[instr_form]+1
if instr_form in self.db:
self.db[instr_form] = self.db[instr_form] + 1
else:
self. db[instr_form] = 1
self.db[instr_form] = 1
# Create testcase for instruction form, since it is the first appearance of it
# Only create benchmark if no label (LBL) is part of the operands
do_bench = True
for par in op_list:
if(str(par) == 'LBL' or str(par) == ''):
if str(par) == 'LBL' or str(par) == '':
do_bench = False
if(do_bench):
if do_bench:
# Create testcase with reversed param list, due to the fact its intel syntax!
tc = Testcase(mnemonic, list(reversed(op_list)), '64')
tc.write_testcase()
def separate_params(self, params):
param_list = [params]
if(',' in params):
if(')' in params):
if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','):
i = params.index(')')+1
elif(params.index('(') < params.index(',')):
if ',' in params:
if ')' in params:
if params.index(')') < len(params) - 1 and params[params.index(')') + 1] == ',':
i = params.index(')') + 1
elif params.index('(') < params.index(','):
return param_list
else:
i = params.index(',')
else:
i = params.index(',')
param_list = [params[:i], self.separate_params(params[i+1:])]
elif('#' in params):
param_list = [params[:i], self.separate_params(params[i + 1:])]
elif '#' in params:
i = params.index('#')
param_list = [params[:i]]
return param_list
@@ -172,14 +169,14 @@ class InstrExtractor(object):
print('Number of\tmnemonic')
print('calls\n')
for i in range(len(self.sorted_db)):
print(str(self.sorted_db[i][1])+'\t\t'+self.sorted_db[i][0])
print(str(self.sorted_db[i][1]) + '\t\t' + self.sorted_db[i][0])
total += self.sorted_db[i][1]
print('\nCumulated number of instructions: '+str(total))
print('\nCumulated number of instructions: ' + str(total))
def save_db(self):
file = open('.cnt_asm_ops.db', 'w')
for i in self.db.items():
file.write(i[0]+'\t'+str(i[1])+'\n')
file.write(i[0] + '\t' + str(i[1]) + '\n')
file.close()
def load_db(self):
@@ -191,7 +188,7 @@ class InstrExtractor(object):
for line in file:
mnemonic = line.split('\t')[0]
# Join mnemonic and operand(s) to an instruction form
if(len(mnemonic) > 7):
if len(mnemonic) > 7:
tabs = '\t'
params = line.split('\t')[1]
num_calls = line.split('\t')[2][:-1]
@@ -199,41 +196,43 @@ class InstrExtractor(object):
tabs = '\t\t'
params = line.split('\t')[2]
num_calls = line.split('\t')[3][:-1]
instr_form = mnemonic+tabs+params
instr_form = mnemonic + tabs + params
self.db[instr_form] = int(num_calls)
file.close()
def flatten(self, l):
if l == []:
if not l:
return l
if(isinstance(l[0], list)):
if isinstance(l[0], list):
return self.flatten(l[0]) + self.flatten(l[1:])
return l[:1] + self.flatten(l[1:])
def main():
# Parse args
parser = argparse.ArgumentParser(description='Returns a list of all instruction forms in the'
+ 'given files sorted by their number of occurences.')
parser = argparse.ArgumentParser(description='Returns a list of all instruction forms in the '
'given files sorted by their number of '
'occurrences.')
parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.2')
parser.add_argument('filepath', nargs='+', help='path to objdump(s)')
parser.add_argument('-l', '--load', dest='load', action='store_true', help='load data file'
+ ' before checking new files')
parser.add_argument('-s', '--store', dest='store', action='store_true', help='store data file '
+ 'before checking new files')
parser.add_argument('-l', '--load', dest='load', action='store_true',
help='load data file before checking new files')
parser.add_argument('-s', '--store', dest='store', action='store_true',
help='store data file before checking new files')
# Create object and store arguments as attribute
inp = parser.parse_args()
ie = InstrExtractor(inp.filepath)
# Do work
if(inp.load):
if inp.load:
ie.load_db()
ie.check_all()
ie.print_sorted_db()
if(inp.store):
if inp.store:
ie.save_db()
# ---------main method----------
if __name__ == '__main__':
main()

View File

@@ -16,9 +16,8 @@ from osaca.eu_sched import Scheduler
from osaca.testcase import Testcase
class Osaca(object):
class OSACA(object):
arch = None
filepath = None
srcCode = None
df = None
instr_forms = None
@@ -37,23 +36,23 @@ class Osaca(object):
ASM_LINE = re.compile(r'\s[0-9a-f]+[:]')
# Matches every variation of the IACA start marker
IACA_SM = re.compile(r'\s*movl[ \t]+\$111[ \t]*,[ \t]*%ebx.*\n\s*\.byte[ \t]+100.*'
+ r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte'
+ r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))')
r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte'
r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))')
# Matches every variation of the IACA end marker
IACA_EM = re.compile(r'\s*movl[ \t]+\$222[ \t]*,[ \t]*%ebx.*\n\s*\.byte[ \t]+100.*'
+ r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte'
+ r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))')
r'((,[ \t]*103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte'
r'[ \t]+103.*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))')
def __init__(self, _arch, _filepath, output=sys.stdout):
def __init__(self, _arch, file_path, output=sys.stdout):
self.arch = _arch
self.filepath = _filepath
self.file_path = file_path
self.instr_forms = []
self.file_output = output
# Check if data files are already in usr dir, otherwise create them
if(not os.path.isdir(self.osaca_dir + 'data')):
if not os.path.isdir(self.osaca_dir + 'data'):
print('Copying files in user directory...', file=self.file_output, end='')
subprocess.call(['mkdir', '-p', self.osaca_dir])
subprocess.call(['cp', '-r',
subprocess.call(['cp', '-r',
'/'.join(os.path.realpath(__file__).split('/')[:-1]) + '/data',
self.osaca_dir])
print('Done!', file=self.file_output)
@@ -61,82 +60,80 @@ class Osaca(object):
# -----------------main functions depending on arguments--------------------
def include_ibench(self):
"""
Reads ibench output and includes it in the architecture specific csv
file.
Read ibench output and include it in the architecture specific csv file.
"""
# Check args and exit program if something's wrong
if(not self.check_arch()):
if not self.check_arch():
print('Invalid microarchitecture.', file=sys.stderr)
sys.exit(1)
if(not self.check_file()):
if not self.check_file():
print('Invalid file path or file format.', file=sys.stderr)
sys.exit(1)
# Check for database for the chosen architecture
self.df = self.read_csv()
# Create sequence of numbers and their reciprokals for validate the measurements
# Create sequence of numbers and their reciprocals for validate the measurements
cyc_list, reci_list = self.create_sequences()
print('Everything seems fine! Let\'s start!', file=self.file_output)
new_data = []
added_vals = 0
for line in self.srcCode:
if('Using frequency' in line or len(line) == 0):
if 'Using frequency' in line or len(line) == 0:
continue
clmn = 'LT'
column = 'LT'
instr = line.split()[0][:-1]
if('TP' in line):
if 'TP' in line:
# We found a command with a throughput value. Get instruction and the number of
# clock cycles and remove the '-TP' suffix.
clmn = 'TP'
column = 'TP'
instr = instr[:-3]
# Otherwise it is a latency value. Nothing to do.
clk_cyc = line.split()[1]
clk_cyc = float(line.split()[1])
clk_cyc_tmp = clk_cyc
clk_cyc = self.validate_val(clk_cyc, instr, True if (clmn == 'TP') else False,
clk_cyc = self.validate_val(clk_cyc, instr, True if (column == 'TP') else False,
cyc_list, reci_list)
txt_output = True if (clk_cyc_tmp == clk_cyc) else False
txt_output = (clk_cyc_tmp == clk_cyc)
val = -2
new = False
try:
entry = self.df.loc[lambda df, inst=instr: df.instr == inst, clmn]
entry = self.df.loc[lambda df, inst=instr: df.instr == inst, column]
val = entry.values[0]
# If val is -1 (= not filled with a valid value) add it immediately
if val == -1:
self.df.set_value(entry.index[0], column, clk_cyc)
added_vals += 1
continue
except IndexError:
# Instruction not in database yet --> add it
new = True
# First check if LT or TP value has already been added before
for i, item in enumerate(new_data):
if(instr in item):
if(clmn == 'TP'):
if instr in item:
if column == 'TP':
new_data[i][1] = clk_cyc
elif(clmn == 'LT'):
elif column == 'LT':
new_data[i][2] = clk_cyc
new = False
break
if(new and clmn == 'TP'):
if new and column == 'TP':
new_data.append([instr, clk_cyc, '-1', (-1,)])
elif(new and clmn == 'LT'):
elif new and column == 'LT':
new_data.append([instr, '-1', clk_cyc, (-1,)])
new = True
added_vals += 1
# If val is -1 (= not filled with a valid value) add it immediately
if(val == -1):
self.df.set_value(entry.index[0], clmn, clk_cyc)
added_vals += 1
continue
if(not new and abs((val/np.float64(clk_cyc))-1) > 0.05):
print('Different measurement for {} ({}): {}(old) vs. '.format(instr, clmn, val)
if not new and abs((val / np.float64(clk_cyc)) - 1) > 0.05:
print('Different measurement for {} ({}): {}(old) vs. '.format(instr, column, val)
+ '{}(new)\nPlease check for correctness '.format(clk_cyc)
+ '(no changes were made).', file=self.file_output)
txt_output = True
if(txt_output):
if txt_output:
print('', file=self.file_output)
txt_output = False
# Now merge the DataFrames and write new csv file
self.df = self.df.append(pd.DataFrame(new_data, columns=['instr', 'TP', 'LT', 'ports']),
ignore_index=True)
csv = self.df.to_csv(index=False)
self.write_csv(csv)
print('ibench output {} '.format(self.filepath.split('/')[-1])
+ 'successfully in data file included.', file=self.file_output)
print('ibench output {}'.format(self.file_path.split('/')[-1]),
'successfully in data file included.', file=self.file_output)
print('{} values were added.'.format(added_vals), file=self.file_output)
def inspect_binary(self):
@@ -144,10 +141,10 @@ class Osaca(object):
Main function of OSACA. Inspect binary file and create analysis.
"""
# Check args and exit program if something's wrong
if(not self.check_arch()):
if not self.check_arch():
print('Invalid microarchitecture.', file=sys.stderr)
sys.exit(1)
if(not self.check_elffile()):
if not self.check_elffile():
print('Invalid file path or file format. Not an ELF file.', file=sys.stderr)
sys.exit(1)
# Finally check for database for the chosen architecture
@@ -155,7 +152,7 @@ class Osaca(object):
print('Everything seems fine! Let\'s start checking!', file=self.file_output)
for i, line in enumerate(self.srcCode):
if(i == 0):
if i == 0:
self.check_line(line, True)
else:
self.check_line(line)
@@ -168,21 +165,21 @@ class Osaca(object):
Inspect binary file and create analysis.
"""
# Check args and exit program if something's wrong
if(not self.check_arch()):
if not self.check_arch():
print('Invalid microarchitecture.', file=sys.stderr)
sys.exit()
# Check if input file is a binary or assembly file
binary_file = True
if(not self.check_elffile()):
if not self.check_elffile():
binary_file = False
if(not self.check_file(True)):
if not self.check_file(True):
print('Invalid file path or file format.', file=sys.stderr)
sys.exit(1)
# Finally check for database for the chosen architecture
self.df = self.read_csv()
print('Everything seems fine! Let\'s start checking!', file=self.file_output)
if(binary_file):
if binary_file:
self.iaca_bin()
else:
self.iaca_asm()
@@ -203,7 +200,7 @@ class Osaca(object):
"""
arch_list = ['SNB', 'IVB', 'HSW', 'BDW', 'SKL', 'ZEN']
if(self.arch in arch_list):
if self.arch in arch_list:
return True
else:
return False
@@ -220,12 +217,12 @@ class Osaca(object):
False if file does not exist or is not an elf64 file
"""
if(os.path.isfile(self.filepath)):
if os.path.isfile(self.file_path):
self.store_src_code_elf()
try:
if('file format elf64' in self.srcCode[1].lower()):
if 'file format elf64' in self.srcCode[1].lower():
return True
except(IndexError):
except IndexError:
return False
return False
@@ -247,7 +244,7 @@ class Osaca(object):
False if file does not exist
"""
if(os.path.isfile(self.filepath)):
if os.path.isfile(self.file_path):
self.store_src_code(iaca_flag)
return True
return False
@@ -257,8 +254,8 @@ class Osaca(object):
Load binary file compiled with '-g' in class attribute srcCode and
separate by line.
"""
self.srcCode = (subprocess.run(['objdump', '--source', self.filepath],
stdout=subprocess.PIPE,
self.srcCode = (subprocess.run(['objdump', '--source', self.file_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE).stdout.decode('utf-8').split('\n'))
def store_src_code(self, iaca_flag=False):
@@ -271,52 +268,45 @@ class Osaca(object):
store file data as a string in attribute srcCode if True,
store it as a list of strings (lines) if False (default False)
"""
try:
f = open(self.filepath, 'r')
except IOError:
print('IOError: file \'{}\' not found'.format(self.filepath), file=self.file_output)
f = open(self.file_path, 'r')
self.srcCode = ''
for line in f:
self.srcCode += line
f.close()
if(iaca_flag):
if iaca_flag:
return
self.srcCode = self.srcCode.split('\n')
def read_csv(self):
"""
Reads architecture dependent CSV from data directory.
Read architecture dependent CSV from data directory.
Returns
-------
DataFrame
CSV as DataFrame object
"""
#curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
df = pd.read_csv(self.osaca_dir+'data/'+self.arch.lower()+'_data.csv')
# curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
df = pd.read_csv(self.osaca_dir + 'data/' + self.arch.lower() + '_data.csv')
return df
def write_csv(self, csv):
"""
Writes architecture dependent CSV into data directory.
Write architecture dependent CSV into data directory.
Parameters
----------
csv : str
CSV data as string
"""
#curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
try:
f = open(self.osaca_dir+'data/'+self.arch.lower()+'_data.csv', 'w')
except IOError:
print('IOError: file \'{}\' not found in $HOME/.osaca/data'.format(self.arch.lower()
+ '_data.csv'), file=self.file_output)
# curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
f = open(self.osaca_dir + 'data/' + self.arch.lower() + '_data.csv', 'w')
f.write(csv)
f.close()
def create_sequences(self, end=101):
"""
Creates list of integers from 1 to end and list of their reciprocals.
Create list of integers from 1 to end and list of their reciprocals.
Parameters
----------
@@ -334,7 +324,7 @@ class Osaca(object):
reci_list = []
for i in range(1, end):
cyc_list.append(i)
reci_list.append(1/i)
reci_list.append(1 / i)
return cyc_list, reci_list
def validate_val(self, clk_cyc, instr, is_tp, cyc_list, reci_list):
@@ -365,21 +355,20 @@ class Osaca(object):
Clock cycle, either rounded to an integer or its reciprocal or the
given clk_cyc parameter
"""
clmn = 'LT'
if(is_tp):
clmn = 'TP'
column = 'LT'
if is_tp:
column = 'TP'
for i in range(0, len(cyc_list)):
if(cyc_list[i]*1.05 > float(clk_cyc) and cyc_list[i]*0.95 < float(clk_cyc)):
if cyc_list[i] * 1.05 > float(clk_cyc) > cyc_list[i] * 0.95:
# Value is probably correct, so round it to the estimated value
return cyc_list[i]
# Check reciprocal only if it is a throughput value
elif(is_tp and reci_list[i]*1.05 > float(clk_cyc)
and reci_list[i]*0.95 < float(clk_cyc)):
elif is_tp and reci_list[i] * 1.05 > float(clk_cyc) > reci_list[i] * 0.95:
# Value is probably correct, so round it to the estimated value
return reci_list[i]
# No value close to an integer or its reciprocal found, we assume the
# measurement is incorrect
print('Your measurement for {} ({}) is probably wrong. '.format(instr, clmn)
print('Your measurement for {} ({}) is probably wrong. '.format(instr, column)
+ 'Please inspect your benchmark!', file=self.file_output)
print('The program will continue with the given value', file=self.file_output)
return clk_cyc
@@ -396,26 +385,26 @@ class Osaca(object):
Necessary for setting indenting character (default False)
"""
# Check if marker is in line
if(self.marker in line):
if self.marker in line:
# First, check if high level code in indented with whitespaces or tabs
if(first_appearance):
if first_appearance:
self.indentChar = self.get_indent_chars(line)
# Now count the number of whitespaces
self.numSeps = (re.split(self.marker, line)[0]).count(self.indentChar)
self.sem = 3
elif(self.sem > 0):
elif self.sem > 0:
# We're in the marked code snippet
# Check if the line is ASM code and - if not - check if we're still in the loop
match = re.search(self.ASM_LINE, line)
if(match):
if match:
# Further analysis of instructions
# Check if there are comments in line
if(r'//' in line):
if r'//' in line:
return
self.check_instr(''.join(re.split(r'\t', line)[-1:]))
elif((re.split(r'\S', line)[0]).count(self.indentChar) <= self.numSeps):
elif (re.split(r'\S', line)[0]).count(self.indentChar) <= self.numSeps:
# Not in the loop anymore - or yet. We decrement the semaphore
self.sem = self.sem-1
self.sem = self.sem - 1
def get_indent_chars(self, line):
"""
@@ -433,9 +422,9 @@ class Osaca(object):
"""
num_spaces = (re.split(self.marker, line)[0]).count(' ')
num_tabs = (re.split(self.marker, line)[0]).count('\t')
if(num_spaces != 0 and num_tabs == 0):
if num_spaces != 0 and num_tabs == 0:
return ' '
elif(num_spaces == 0 and num_tabs != 0):
elif num_spaces == 0 and num_tabs != 0:
return '\t'
else:
err_msg = 'Indentation of code is only supported for whitespaces and tabs.'
@@ -448,31 +437,29 @@ class Osaca(object):
self.marker = r'fs addr32 nop'
part1 = re.compile(r'64\s+fs')
part2 = re.compile(r'67 90\s+addr32 nop')
is_2_lines = False
for line in self.srcCode:
# Check if marker is in line
if(self.marker in line):
if self.marker in line:
self.sem += 1
elif(re.search(part1, line) or re.search(part2, line)):
elif re.search(part1, line) or re.search(part2, line):
self.sem += 0.5
is_2_lines = True
elif(self.sem == 1):
elif self.sem == 1:
# We're in the marked code snippet
# Check if the line is ASM code
match = re.search(self.ASM_LINE, line)
if(match):
if match:
# Further analysis of instructions
# Check if there are comments in line
if(r'//' in line):
if r'//' in line:
continue
# Do the same instruction check as for the OSACA marker line check
self.check_instr(''.join(re.split(r'\t', line)[-1:]))
elif(self.sem == 2):
elif self.sem == 2:
# Not in the loop anymore. Due to the fact it's the IACA marker we can stop here
# After removing the last line which belongs to the IACA marker
del self.instr_forms[-1:]
#if(is_2_lines):
# The marker is splitted into two lines, therefore delete another line
# if(is_2_lines):
# The marker is splitted into two lines, therefore delete another line
# del self.instr_forms[-1:]
return
@@ -484,15 +471,15 @@ class Osaca(object):
code = self.srcCode
# Search for the start marker
match = re.match(self.IACA_SM, code)
while(not match):
while not match:
code = code.split('\n', 1)[1]
match = re.match(self.IACA_SM, code)
# Search for the end marker
code = (code.split('144', 1)[1]).split('\n', 1)[1]
res = ''
match = re.match(self.IACA_EM, code)
while(not match):
res += code.split('\n', 1)[0]+'\n'
while not match:
res += code.split('\n', 1)[0] + '\n'
code = code.split('\n', 1)[1]
match = re.match(self.IACA_EM, code)
# Split the result by line go on like with OSACA markers
@@ -500,7 +487,7 @@ class Osaca(object):
for line in res:
line = line.split('#')[0]
line = line.lstrip()
if(len(line) == 0 or '//' in line or line.startswith('..')):
if len(line) == 0 or '//' in line or line.startswith('..'):
continue
self.check_instr(line)
@@ -515,14 +502,14 @@ class Osaca(object):
Instruction as string
"""
# Check for strange clang padding bytes
while(instr.startswith('data32')):
while instr.startswith('data32'):
instr = instr[7:]
# Separate mnemonic and operands
mnemonic = instr.split()[0]
params = ''.join(instr.split()[1:])
# Check if line is not only a byte
empty_byte = re.compile(r'[0-9a-f]{2}')
if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2):
if re.match(empty_byte, mnemonic) and len(mnemonic) == 2:
return
# Check if there's one or more operands and store all in a list
param_list = self.flatten(self.separate_params(params))
@@ -531,39 +518,39 @@ class Osaca(object):
# MEMORY (MEM) or LABEL(LBL)
for i in range(len(param_list)):
op = param_list[i]
if(len(op) <= 0):
if len(op) <= 0:
op = Parameter('NONE')
elif(op[0] == '$'):
elif op[0] == '$':
op = Parameter('IMD')
elif(op[0] == '%' and '(' not in op):
elif op[0] == '%' and '(' not in op:
j = len(op)
opmask = False
if('{' in op):
if '{' in op:
j = op.index('{')
opmask = True
op = Register(op[1:j], opmask)
elif('<' in op or op.startswith('.')):
elif '<' in op or op.startswith('.'):
op = Parameter('LBL')
else:
op = MemAddr(op)
op = MemAddr(op, )
param_list[i] = str(op)
param_list_types[i] = op
# Add to list
instr = instr.rstrip()
if(len(instr) > self.longestInstr):
if len(instr) > self.longestInstr:
self.longestInstr = len(instr)
instr_form = [mnemonic]+list(reversed(param_list_types))+[instr]
instr_form = [mnemonic] + list(reversed(param_list_types)) + [instr]
self.instr_forms.append(instr_form)
# If flag is set, create testcase for instruction form
# Do this in reversed param list order, du to the fact it's intel syntax
# Only create benchmark if no label (LBL) is part of the operands
if('LBL' in param_list or '' in param_list):
if 'LBL' in param_list or '' in param_list:
return
tc = Testcase(mnemonic, list(reversed(param_list_types)), '32')
# Only write a testcase if it not already exists or already in data file
writeTP, writeLT = tc.is_in_dir()
inDB = len(self.df.loc[lambda df: df.instr == tc.get_entryname()])
if(inDB == 0):
if inDB == 0:
tc.write_testcase(not writeTP, not writeLT)
def separate_params(self, params):
@@ -582,18 +569,18 @@ class Osaca(object):
number of parametes given.
"""
param_list = [params]
if(',' in params):
if(')' in params):
if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','):
i = params.index(')')+1
elif(params.index('(') < params.index(',')):
if ',' in params:
if ')' in params:
if params.index(')') < len(params) - 1 and params[params.index(')') + 1] == ',':
i = params.index(')') + 1
elif params.index('(') < params.index(','):
return param_list
else:
i = params.index(',')
else:
i = params.index(',')
param_list = [params[:i], self.separate_params(params[i+1:])]
elif('#' in params):
param_list = [params[:i], self.separate_params(params[i + 1:])]
elif '#' in params:
i = params.index('#')
param_list = [params[:i]]
return param_list
@@ -612,9 +599,9 @@ class Osaca(object):
[str]
List of strings
"""
if l == []:
if not l:
return l
if(isinstance(l[0], list)):
if isinstance(l[0], list):
return self.flatten(l[0]) + self.flatten(l[1:])
return l[:1] + self.flatten(l[1:])
@@ -637,22 +624,22 @@ class Osaca(object):
OSACA output
"""
# Check the output alignment depending on the longest instruction
if(self.longestInstr > 70):
if self.longestInstr > 70:
self.longestInstr = 70
horiz_line = self.create_horiz_sep()
# Write general information about the benchmark
output = ('--' + horiz_line + '\n'
+ '| Analyzing of file:\t' + os.path.abspath(self.filepath) + '\n'
+ '| Architecture:\t\t' + self.arch + '\n'
+ '| Timestamp:\t\t' + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '\n')
if(tp_list):
output = '--{}\n| Analyzing of file:\t{}| Architecture:\t\t{}\n| Timestamp:\t\t{}\n'.format(
horiz_line, os.path.abspath(self.file_path), self.arch,
datetime.now().strftime('%Y-%m-%d %H:%M:%S')
)
if tp_list:
output += self.create_tp_list(horiz_line)
if(pr_sched):
if pr_sched:
output += '\n\n'
sched = Scheduler(self.arch, self.instr_forms)
sched_output, port_binding = sched.new_schedule()
binding = sched.get_port_binding(port_binding)
output += sched.get_report_info() + '\n' + binding + '\n\n' + sched_output
schedule = Scheduler(self.arch, self.instr_forms)
schedule_output, port_binding = schedule.new_schedule()
binding = schedule.get_port_binding(port_binding)
output += schedule.get_report_info() + '\n' + binding + '\n\n' + schedule_output
block_tp = round(max(port_binding), 2)
output += 'Total number of estimated throughput: ' + str(block_tp)
return output
@@ -666,7 +653,7 @@ class Osaca(object):
str
Horizontal separator line
"""
return '-'*(self.longestInstr+8)
return '-' * (self.longestInstr + 8)
def create_tp_list(self, horiz_line):
"""
@@ -683,18 +670,16 @@ class Osaca(object):
Throughput list output for printing
"""
warning = False
ws = ' '*(len(horiz_line)-23)
ws = ' ' * (len(horiz_line) - 23)
output = ('\n| INSTRUCTION' + ws + 'CLOCK CYCLES\n'
+ '| ' + horiz_line + '\n|\n')
output = '\n| INSTRUCTION{}CLOCK CYCLES\n| {}\n|\n'.format(ws, horiz_line)
# Check for the throughput data in CSV
for elem in self.instr_forms:
op_ext = []
for i in range(1, len(elem)-1):
optmp = ''
if(isinstance(elem[i], Register) and elem[i].reg_type == 'GPR'):
optmp = 'r'+str(elem[i].size)
elif(isinstance(elem[i], MemAddr)):
for i in range(1, len(elem) - 1):
if isinstance(elem[i], Register) and elem[i].reg_type == 'GPR':
optmp = 'r' + str(elem[i].size)
elif isinstance(elem[i], MemAddr):
optmp = 'mem'
else:
optmp = str(elem[i]).lower()
@@ -705,7 +690,7 @@ class Osaca(object):
import warnings
warnings.filterwarnings("ignore", 'This pattern has match groups')
series = self.df['instr'].str.contains(elem[0] + '-' + operands)
if(True in series.values):
if True in series.values:
# It's a match!
not_found = False
try:
@@ -726,44 +711,43 @@ class Osaca(object):
op_ext_regs.append(True)
except KeyError:
op_ext_regs.append(False)
if(True not in op_ext_regs):
if True not in op_ext_regs:
# No register in whole instr form. How can I find out what regsize we need?
print('Feature not included yet: ', end='', file=self.file_output)
print(elem[0]+' for '+operands, file=self.file_output)
print(elem[0] + ' for ' + operands, file=self.file_output)
tp = 0
not_found = True
warning = True
num_whitespaces = self.longestInstr-len(elem[-1])
num_whitespaces = self.longestInstr - len(elem[-1])
ws = ' ' * num_whitespaces + '| '
n_f = ' ' * (5 - len(str(tp))) + '*'
data = '| ' + elem[-1] + ws + str(tp) + n_f + '\n'
output += data
continue
if(op_ext_regs[0] is False):
if op_ext_regs[0] is False:
# Instruction stores result in memory. Check for storing in register instead.
if(len(op_ext) > 1):
if(op_ext_regs[1] is True):
if len(op_ext) > 1:
if op_ext_regs[1] is True:
op_ext[0] = op_ext[1]
elif(len(op_ext > 2)):
if(op_ext_regs[2] is True):
elif len(op_ext) > 2:
if op_ext_regs[2] is True:
op_ext[0] = op_ext[2]
if(len(op_ext_regs) == 2 and op_ext_regs[1] is False):
if len(op_ext_regs) == 2 and op_ext_regs[1] is False:
# Instruction loads value from memory and has only two operands. Check for
# loading from register instead
if(op_ext_regs[0] is True):
if op_ext_regs[0] is True:
op_ext[1] = op_ext[0]
if(len(op_ext_regs) == 3 and op_ext_regs[2] is False):
if len(op_ext_regs) == 3 and op_ext_regs[2] is False:
# Instruction loads value from memory and has three operands. Check for loading
# from register instead
op_ext[2] = op_ext[0]
operands = '_'.join(op_ext)
# Check for register equivalent instruction
series = self.df['instr'].str.contains(elem[0]+'-'+operands)
if(True in series.values):
series = self.df['instr'].str.contains(elem[0] + '-' + operands)
if True in series.values:
# It's a match!
not_found = False
try:
tp = self.df[self.df.instr == elem[0]+'-'+operands].TP.values[0]
tp = self.df[self.df.instr == elem[0] + '-' + operands].TP.values[0]
except IndexError:
# Something went wrong
print('Error while fetching data from data file', file=self.file_output)
@@ -778,19 +762,16 @@ class Osaca(object):
num_whitespaces = self.longestInstr - len(elem[-1])
ws = ' ' * num_whitespaces + '| '
n_f = ''
if(not_found):
if not_found:
n_f = ' ' * (5 - len(str(tp))) + '*'
data = '| ' + elem[-1] + ws + '{:3.2f}'.format(tp) + n_f + '\n'
output += data
# Finally end the list of throughput values
num_whitespaces = self.longestInstr - 27
ws = ' ' + ' ' * num_whitespaces
output += '| ' + horiz_line + '\n'
if(warning):
output += ('\n\n* There was no throughput value found '
'for the specific instruction form.'
'\n Please create a testcase via the create_testcase-method '
'or add a value manually.')
if warning:
output += ('\n\n* There was no throughput value found for the specific instruction '
'form.\n Please create a testcase via the create_testcase-method or add a '
'value manually.')
return output
@@ -798,8 +779,8 @@ class Osaca(object):
# Stolen from pip
def __read(*names, **kwargs):
with io.open(
os.path.join(os.path.dirname(__file__), *names),
encoding=kwargs.get("encoding", "utf8")
os.path.join(os.path.dirname(__file__), *names),
encoding=kwargs.get("encoding", "utf8")
) as fp:
return fp.read()
@@ -817,10 +798,10 @@ def __find_version(*file_paths):
def main():
# Parse args
parser = argparse.ArgumentParser(description='Analyzes a marked innermost loop snippet'
+ 'for a given architecture type and prints out the estimated'
+ 'average throughput.')
parser.add_argument('-V', '--version', action='version', version='%(prog)s '
+ __find_version('__init__.py'))
'for a given architecture type and prints out the '
'estimated average throughput.')
parser.add_argument('-V', '--version', action='version',
version='%(prog)s ' + __find_version('__init__.py'))
parser.add_argument('--arch', dest='arch', type=str, help='define architecture '
+ '(SNB, IVB, HSW, BDW, SKL, ZEN)')
parser.add_argument('--tp-list', dest='tp_list', action='store_true',
@@ -833,49 +814,47 @@ def main():
help='search for IACA markers instead the OSACA marker')
group.add_argument('-m', '--insert-marker', dest='insert_marker', action='store_true',
help='try to find blocks probably corresponding to loops in assembly and'
+ 'insert IACA marker')
+ 'insert IACA marker')
parser.add_argument('filepath', type=str, help='path to object (Binary, ASM, CSV)')
# Store args in global variables
inp = parser.parse_args()
if(inp.arch is None and inp.insert_marker is None):
if inp.arch is None and inp.insert_marker is None:
raise ValueError('Please specify an architecture.', file=sys.stderr)
if(inp.arch is not None):
arch = inp.arch.upper()
arch = inp.arch.upper()
filepath = inp.filepath
incl_ibench = inp.incl
iaca_flag = inp.iaca
insert_m = inp.insert_marker
# Create Osaca object
if(inp.arch is not None):
osaca = Osaca(arch, filepath)
if(inp.tp_list):
osaca = OSACA(arch, filepath)
if inp.tp_list:
osaca.tp_list = True
if(incl_ibench):
if incl_ibench:
try:
osaca.include_ibench()
except UnboundLocalError:
print('Please specify an architecture.', file=sys.stderr)
elif(iaca_flag):
elif iaca_flag:
try:
osaca.inspect_with_iaca()
except UnboundLocalError:
print('Please specify an architecture.', file=sys.stderr)
elif(insert_m):
elif insert_m:
try:
from kerncraft import iaca
except ImportError:
print('ImportError: Module kerncraft not installed. Use '
+ '\'pip install --user kerncraft\' for installation.\nFor more information see '
+ 'https://github.com/RRZE-HPC/kerncraft', file=sys.stderr)
print("ImportError: Module kerncraft not installed. Use 'pip install --user "
"kerncraft' for installation.\nFor more information see "
"https://github.com/RRZE-HPC/kerncraft", file=sys.stderr)
sys.exit(1)
# Change due to newer kerncraft version (hopefully temporary)
#iaca.iaca_instrumentation(input_file=filepath, output_file=filepath,
# iaca.iaca_instrumentation(input_file=filepath, output_file=filepath,
# block_selection='manual', pointer_increment=1)
with open(filepath, 'r') as f_in, open(filepath[:-2] + '-iaca.s', 'w') as f_out:
iaca.iaca_instrumentation(input_file=f_in, output_file=f_out,
iaca.iaca_instrumentation(input_file=f_in, output_file=f_out,
block_selection='manual', pointer_increment=1)
else:
osaca.inspect_binary()

View File

@@ -4,12 +4,12 @@ class Parameter(object):
def __init__(self, ptype):
self.ptype = ptype.upper()
if(self.ptype not in self.type_list):
if self.ptype not in self.type_list:
raise NameError('Type not supported: '+ptype)
def __str__(self):
"""returns string representation"""
if(self.ptype == 'NONE'):
"""Return string representation."""
if self.ptype == 'NONE':
return ''
else:
return self.ptype
@@ -20,27 +20,28 @@ class MemAddr(Parameter):
scales = [1, 2, 4, 8]
def __init__(self, name):
super().__init__("MEM")
self.sreg = False
self.offset = False
self.base = False
self.index = False
self.scale = False
if(':' in name):
if(name[1:name.index(':')].upper() not in self.segment_regs):
if ':' in name:
if name[1:name.index(':')].upper() not in self.segment_regs:
raise NameError('Type not supported: '+name)
self.sreg = True
self.offset = True
if('(' not in name or ('(' in name and name.index('(') != 0)):
if '(' not in name or ('(' in name and name.index('(') != 0):
self.offset = True
if('(' in name):
if '(' in name:
self.parentheses = name[name.index('(')+1:-1]
self.commacnt = self.parentheses.count(',')
if(self.commacnt == 0):
if self.commacnt == 0:
self.base = True
elif(self.commacnt == 1 or self.commacnt == 2 and int(self.parentheses[-1:]) == 1):
elif self.commacnt == 1 or self.commacnt == 2 and int(self.parentheses[-1:]) == 1:
self.base = True
self.index = True
elif(self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales):
elif self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales:
self.base = True
self.index = True
self.scale = True
@@ -50,13 +51,13 @@ class MemAddr(Parameter):
def __str__(self):
"""returns string representation"""
mem_format = 'MEM('
if(self.sreg):
if self.sreg:
mem_format += 'sreg:'
if(self.offset):
if self.offset:
mem_format += 'offset'
if(self.base and not self.index):
if self.base and not self.index:
mem_format += '(base)'
elif(self.base and self.index and self.scale):
elif self.base and self.index and self.scale:
mem_format += '(base, index, scale)'
mem_format += ')'
return mem_format
@@ -131,17 +132,18 @@ class Register(Parameter):
}
def __init__(self, name, mask=False):
super().__init__("REG")
self.name = name.upper()
self.mask = mask
if[name in self.sizes]:
if self.name in self.sizes:
self.size = self.sizes[self.name][0]
self.reg_type = self.sizes[self.name][1]
else:
raise NameError('Register name not in dictionary: '+self.name)
raise NameError('Register name not in dictionary: {}'.format(self.name))
def __str__(self):
"""returns string representation"""
"""Return string representation."""
opmask = ''
if(self.mask):
if self.mask:
opmask = '{opmask}'
return(self.reg_type+opmask)
return self.reg_type + opmask

View File

@@ -67,7 +67,8 @@ class Testcase(object):
# num_instr must be an even number
self.num_instr = str(ceil(int(_num_instr)/2)*2)
# Check for the number of operands and initialise the GPRs if necessary
self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = self.__define_operands()
self.op_a, self.op_b, self.op_c, self.gprPush, self.gprPop, self.zeroGPR, self.copy = \
self.__define_operands()
self.num_operands = len(self.param_list)
# Create asm header
@@ -97,7 +98,7 @@ class Testcase(object):
(default True)
"""
osaca_dir = os.path.expanduser('~') + '/.osaca/'
if(lt):
if lt:
# Write latency file
call(['mkdir', '-p', osaca_dir + 'benchmarks'])
f = open(osaca_dir + 'benchmarks/'+self.instr+self.extension+'.S', 'w')
@@ -105,7 +106,7 @@ class Testcase(object):
+ self.zeroGPR + self.copy + self.loop_lat + self.gprPop + self.done)
f.write(data)
f.close()
if(tp):
if tp:
# Write throughput file
call(['mkdir', '-p', osaca_dir + 'benchmarks'])
f = open(osaca_dir + 'benchmarks/' + self.instr + self.extension
@@ -126,53 +127,53 @@ class Testcase(object):
String tuple containing types of operands and if needed push/pop operations, the
initialisation of general purpose regs and the copy if registers.
"""
oprnds = self.param_list
operands = self.param_list
op_a, op_b, op_c = ('', '', '')
gpr_push, gpr_pop, zero_gpr = ('', '', '')
if(isinstance(oprnds[0], Register)):
op_a = oprnds[0].reg_type.lower()
elif(isinstance(oprnds[0], MemAddr)):
if isinstance(operands[0], Register):
op_a = operands[0].reg_type.lower()
elif isinstance(operands[0], MemAddr):
op_a = 'mem'
elif(isinstance(oprnds[0], Parameter) and str(oprnds[0]) == 'IMD'):
elif isinstance(operands[0], Parameter) and str(operands[0]) == 'IMD':
op_a = 'imd'
if(op_a == 'gpr'):
if op_a == 'gpr':
gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs()
op_a += str(oprnds[0].size)
if(len(oprnds) > 1):
if(isinstance(oprnds[1], Register)):
op_b = oprnds[1].reg_type.lower()
elif(isinstance(oprnds[1], MemAddr)):
op_a += str(operands[0].size)
if len(operands) > 1:
if isinstance(operands[1], Register):
op_b = operands[1].reg_type.lower()
elif isinstance(operands[1], MemAddr):
op_b = 'mem'
elif(isinstance(oprnds[1], Parameter) and str(oprnds[1]) == 'IMD'):
elif isinstance(operands[1], Parameter) and str(operands[1]) == 'IMD':
op_b = 'imd'
if(op_b == 'gpr'):
op_b += str(oprnds[1].size)
if('gpr' not in op_a):
if op_b == 'gpr':
op_b += str(operands[1].size)
if 'gpr' not in op_a:
gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs()
if(len(oprnds) == 3):
if(isinstance(oprnds[2], Register)):
op_c = oprnds[2].reg_type.lower()
elif(isinstance(oprnds[2], MemAddr)):
if len(operands) == 3:
if isinstance(operands[2], Register):
op_c = operands[2].reg_type.lower()
elif isinstance(operands[2], MemAddr):
op_c = 'mem'
elif(isinstance(oprnds[2], Parameter) and str(oprnds[2]) == 'IMD'):
elif isinstance(operands[2], Parameter) and str(operands[2]) == 'IMD':
op_c = 'imd'
if(op_c == 'gpr'):
op_c += str(oprnds[2].size)
if(('gpr' not in op_a) and ('gpr'not in op_b)):
if op_c == 'gpr':
op_c += str(operands[2].size)
if ('gpr' not in op_a) and ('gpr' not in op_b):
gpr_push, gpr_pop, zero_gpr = self.__initialise_gprs()
if(len(oprnds) == 1 and isinstance(oprnds[0], Register)):
copy = self.__copy_regs(oprnds[0])
elif(len(oprnds) > 1 and isinstance(oprnds[1], Register)):
copy = self.__copy_regs(oprnds[1])
elif(len(oprnds) > 2 and isinstance(oprnds[2], Register)):
copy = self.__copy_regs(oprnds[1])
if len(operands) == 1 and isinstance(operands[0], Register):
copy = self.__copy_regs(operands[0])
elif len(operands) > 1 and isinstance(operands[1], Register):
copy = self.__copy_regs(operands[1])
elif len(operands) > 2 and isinstance(operands[2], Register):
copy = self.__copy_regs(operands[1])
else:
copy = ''
return (op_a, op_b, op_c, gpr_push, gpr_pop, zero_gpr, copy)
return op_a, op_b, op_c, gpr_push, gpr_pop, zero_gpr, copy
def __initialise_gprs(self):
"""
Initialise eleven general purpose registers and set them to zero.
Initialize eleven general purpose registers and set them to zero.
Returns
-------
@@ -189,10 +190,10 @@ class Testcase(object):
gpr_pop += '\t\tpop {}\n'.format(reg)
for reg in self.gprs64:
zero_gpr += '\t\txor {}, {}\n'.format(reg, reg)
return (gpr_push, gpr_pop, zero_gpr)
return gpr_push, gpr_pop, zero_gpr
# Copy created values in specific register
# Copy created values in specific register
def __copy_regs(self, reg):
"""
Copy created values in specific register.
@@ -208,8 +209,8 @@ class Testcase(object):
String containing the copy instructions
"""
copy = '\t\t# copy DP 1.0\n'
# Different handling for GPR, MMX and SSE/AVX registers
if(reg.reg_type == 'GPR'):
# Different handling for GPR, MMX and SSE/AVX registers
if reg.reg_type == 'GPR':
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][1])
copy += '\t\t# Create DP 2.0\n'
@@ -218,7 +219,7 @@ class Testcase(object):
copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0])
copy += '\t\tmovq {}, {}\n'.format(self.ops['gpr64'][2], self.ops['gpr64'][0])
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
elif(reg.reg_type == 'MMX'):
elif reg.reg_type == 'MMX':
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][0])
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['mmx'][1])
copy += '\t\tvmovq {}, xmm0\n'.format(self.ops['gpr64'][0])
@@ -227,7 +228,7 @@ class Testcase(object):
copy += '\t\t# Create DP 0.5\n'
copy += '\t\tdiv {}\n'.format(self.ops['gpr64'][0])
copy += '\t\tmovq {}, {}\n'.format(self.ops['mmx'][2], self.ops['gpr64'][0])
elif(reg.reg_type == 'XMM' or reg.reg_type == 'YMM' or reg.reg_type == 'ZMM'):
elif reg.reg_type == 'XMM' or reg.reg_type == 'YMM' or reg.reg_type == 'ZMM':
key = reg.reg_type.lower()
copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][0], self.ops[key][0])
copy += '\t\tvmovaps {}, {}\n'.format(self.ops[key][1], self.ops[key][0])
@@ -278,15 +279,15 @@ class Testcase(object):
'\t\tjle done\n')
# Expand to AVX(512) if necessary
expand = ''
if(self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm'):
if self.op_a == 'ymm' or self.op_b == 'ymm' or self.op_c == 'ymm':
expand = ('\t\t# expand from SSE to AVX\n'
'\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n')
if(self.op_a == 'zmm' or self.op_b == 'zmm' or self.op_c == 'zmm'):
if self.op_a == 'zmm' or self.op_b == 'zmm' or self.op_c == 'zmm':
expand = ('\t\t# expand from SSE to AVX\n'
'\t\tvinsertf128 ymm0, ymm0, xmm0, 0x1\n'
'\t\t# expand from AVX to AVX512\n'
'\t\tvinsert64x4 zmm0, zmm0, ymm0, 0x1\n')
return (def_instr, ninstr, init, expand)
return def_instr, ninstr, init, expand
def __define_loop_lat(self):
"""
@@ -299,22 +300,22 @@ class Testcase(object):
"""
loop_lat = ('loop:\n'
'\t\tinc i\n')
if(self.num_operands == 1):
if self.num_operands == 1:
for i in range(0, int(self.num_instr)):
loop_lat += '\t\tINSTR {}\n'.format(self.ops[self.op_a][0])
elif(self.num_operands == 2 and self.op_a == self.op_b):
elif self.num_operands == 2 and self.op_a == self.op_b:
for i in range(0, int(self.num_instr), 2):
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0],
self.ops[self.op_b][1])
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_b][1],
self.ops[self.op_b][0])
elif(self.num_operands == 2 and self.op_a != self.op_b):
elif self.num_operands == 2 and self.op_a != self.op_b:
for i in range(0, int(self.num_instr), 2):
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0],
self.ops[self.op_b][0])
loop_lat += '\t\tINSTR {}, {}\n'.format(self.ops[self.op_a][0],
self.ops[self.op_b][0])
elif(self.num_operands == 3 and self.op_a == self.op_b):
elif self.num_operands == 3 and self.op_a == self.op_b:
for i in range(0, int(self.num_instr), 2):
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0],
self.ops[self.op_b][1],
@@ -322,7 +323,7 @@ class Testcase(object):
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][1],
self.ops[self.op_b][0],
self.ops[self.op_c][0])
elif(self.num_operands == 3 and self.op_a == self.op_c):
elif self.num_operands == 3 and self.op_a == self.op_c:
for i in range(0, int(self.num_instr), 2):
loop_lat += '\t\tINSTR {}, {}, {}\n'.format(self.ops[self.op_a][0],
self.ops[self.op_b][0],
@@ -348,15 +349,15 @@ class Testcase(object):
ext = ''
ext1 = False
ext2 = False
if(self.num_operands == 2):
if self.num_operands == 2:
ext1 = True
if(self.num_operands == 3):
if self.num_operands == 3:
ext1 = True
ext2 = True
for i in range(0, int(self.num_instr)):
if(ext1):
if ext1:
ext = ', {}'.format(self.ops[self.op_b][i % 3])
if(ext2):
if ext2:
ext += ', {}'.format(self.ops[self.op_c][i % 3])
reg_num = (i % (len(self.ops[self.op_a]) - 3)) + 3
loop_thrpt += '\t\tINSTR {}{}\n'.format(self.ops[self.op_a][reg_num], ext)
@@ -381,15 +382,15 @@ class Testcase(object):
lt = False
name = self.instr+self.extension
for root, dirs, files in os.walk(os.path.dirname(__file__)+'/benchmarks'):
if((name+'-tp.S') in files):
if (name + '-tp.S') in files:
tp = True
if name+'.S' in files:
lt = True
return (tp, lt)
return tp, lt
def get_entryname(self):
"""
Returns the name of the entry the instruction form would be the data file
Return the name of the entry the instruction form would be the data file
Returns
-------

View File

@@ -10,12 +10,14 @@ import re
here = os.path.abspath(os.path.dirname(__file__))
# Stolen from pip
def read(*names, **kwargs):
with io.open(os.path.join(os.path.dirname(__file__), *names),
encoding=kwargs.get("encoding", "utf8")) as fp:
return fp.read()
# Stolen from pip
def find_version(*file_paths):
version_file = read(*file_paths)
@@ -24,6 +26,7 @@ def find_version(*file_paths):
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
# Get the long description from the README file
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = f.read()
@@ -43,11 +46,11 @@ setup(
# The project's main homepage
url='https://github.com/RRZE-HPC/OSACA',
#Author details
# Author details
author='Jan Laukemann',
author_email='jan.laukemann@fau.de',
#Choose your license
# Choose your license
license='AGPLv3',
# See https://pypi.python.org/pypi?%3Aaction=list_classifiers
@@ -77,7 +80,6 @@ setup(
# What doesd your project relate to?
keywords='hpc performance benchmark analysis architecture',
# You can just specify the packages manually here if your project is
# simple. Or you can use find_packages().
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
@@ -97,10 +99,10 @@ setup(
# dependencies). You can install these using the following syntax,
# for example:
# $ pip install -e .[dev,test]
#extras_require={
# extras_require={
# 'dev': ['check-manifest'],
# 'test': ['coverage'],
#},
# },
# If there are data files included in your packages that need to be
# installed, specify them here. If using Python 2.6 or less, then these
@@ -111,7 +113,7 @@ setup(
# need to place data files outside of your packages. See:
# http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
# In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
#data_files=[('my_data', ['data/data_file'])],
# data_files=[('my_data', ['data/data_file'])],
# To provide executable scripts, use entry points in preference to the
# "scripts" keyword. Entry points provide cross-platform support and allow
@@ -122,7 +124,3 @@ setup(
],
},
)

View File

@@ -7,29 +7,30 @@ import os
import unittest
sys.path.insert(0, '..')
from osaca.osaca import Osaca
from osaca.osaca import OSACA
class TestOsaca(unittest.TestCase):
def testIACABinary(self):
out = StringIO()
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
osa = Osaca('IVB', curr_dir+'/testfiles/taxCalc-ivb-iaca', out)
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca', out)
osa.inspect_with_iaca()
result = out.getvalue()
result = '\n'.join(result.split('\n')[-27:])
with open(curr_dir+'/test_osaca_iaca.out', encoding='utf-8') as f:
with open(curr_dir + '/test_osaca_iaca.out', encoding='utf-8') as f:
assertion = f.read()
self.assertEqual(assertion, result)
# Test ASM file with IACA marker in two lines
def testIACAasm1(self):
out = StringIO()
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
osa = Osaca('IVB', curr_dir+'/testfiles/taxCalc-ivb-iaca.S', out)
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca.S', out)
osa.inspect_with_iaca()
result = out.getvalue()
result = '\n'.join(result.split('\n')[-27:])
with open(curr_dir+'/test_osaca_iaca_asm.out', encoding='utf-8') as f:
with open(curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f:
assertion = f.read()
self.assertEqual(assertion, result)
@@ -37,10 +38,10 @@ class TestOsaca(unittest.TestCase):
def testIACAasm2(self):
out = StringIO()
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
osa = Osaca('IVB', curr_dir+'/testfiles/taxCalc-ivb-iaca2.S', out)
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca2.S', out)
osa.inspect_with_iaca()
result = out.getvalue()
result = '\n'.join(result.split('\n')[-27:])
with open(curr_dir+'/test_osaca_iaca_asm.out', encoding='utf-8') as f:
with open(curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f:
assertion = f.read()
self.assertEqual(assertion, result)