mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-08 04:00:05 +01:00
598 lines
21 KiB
Python
Executable File
598 lines
21 KiB
Python
Executable File
#!/apps/python/3.5-anaconda/bin/python
|
|
|
|
import argparse
|
|
import sys
|
|
import subprocess
|
|
import os
|
|
import re
|
|
from Params import *
|
|
from EUsched import *
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
import numpy as np
|
|
|
|
#----------Global variables--------------
|
|
arch = ''
|
|
archList = ['SNB','IVB','HSW', 'BDW', 'SKL']
|
|
filepath = ''
|
|
srcCode = ''
|
|
marker = r'//STARTLOOP'
|
|
asm_line = re.compile(r'\s[0-9a-f]+[:]')
|
|
numSeps = 0
|
|
sem = 0
|
|
firstAppearance = True
|
|
instrForms = list()
|
|
df = ''
|
|
horizontalSeparator = ''
|
|
longestInstr = 30
|
|
cycList = []
|
|
reciList = []
|
|
# Matches every variation of the IACA start marker
|
|
iaca_sm = re.compile(r'\s*movl[ \t]+\$111[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))')
|
|
# Matches every variation of the IACA end marker
|
|
iaca_em = re.compile(r'\s*movl[ \t]+\$222[ \t]*,[ \t]*%ebx[ \t]*\n\s*\.byte[ \t]+100[ \t]*((,[ \t]*103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144)))|(\n\s*\.byte[ \t]+103[ \t]*((,[ \t]*144)|(\n\s*\.byte[ \t]+144))))')
|
|
#---------------------------------------
|
|
|
|
# Check if the architecture arg is valid
|
|
def check_arch():
|
|
if(arch in archList):
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
# Check if the given filepath exists and if the format is the needed elf64
|
|
def check_elffile():
|
|
if(os.path.isfile(filepath)):
|
|
create_elffile()
|
|
if('file format elf64' in srcCode[1]):
|
|
return True
|
|
return False
|
|
|
|
# Check if the given filepath exists
|
|
def check_file(iacaFlag=False):
|
|
if(os.path.isfile(filepath)):
|
|
get_file(iacaFlag)
|
|
return True
|
|
return False
|
|
|
|
# Load binary file in variable srcCode and separate by line
|
|
def create_elffile():
|
|
global srcCode
|
|
srcCode = subprocess.run(['objdump', '--source', filepath], stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n')
|
|
|
|
# Load arbitrary file in variable srcCode and separate by line
|
|
def get_file(iacaFlag):
|
|
global srcCode
|
|
srcCode = ''
|
|
try:
|
|
f = open(filepath, 'r')
|
|
except IOError:
|
|
print('IOError: file \'{}\' not found'.format(filepath))
|
|
for line in f:
|
|
srcCode += line
|
|
f.close()
|
|
if(iacaFlag):
|
|
return
|
|
srcCode = srcCode.split('\n')
|
|
|
|
|
|
def check_line(line):
|
|
global numSeps
|
|
global sem
|
|
global firstAppearance
|
|
# Check if marker is in line
|
|
if(marker in line):
|
|
# First, check if high level code in indented with whitespaces or tabs
|
|
if(firstAppearance):
|
|
set_char_counter(line)
|
|
firstAppearance = False
|
|
# Now count the number of whitespaces
|
|
numSeps = (re.split(marker, line)[0]).count(cntChar)
|
|
sem = 2
|
|
elif(sem > 0):
|
|
# We're in the marked code snippet
|
|
# Check if the line is ASM code and - if not - check if we're still in the loop
|
|
match = re.search(asm_line, line)
|
|
if(match):
|
|
# Further analysis of instructions
|
|
# Check if there are comments in line
|
|
if(r'//' in line):
|
|
return
|
|
check_instr(''.join(re.split(r'\t', line)[-1:]))
|
|
elif((re.split(r'\S', line)[0]).count(cntChar) <= numSeps):
|
|
# Not in the loop anymore - or yet. We decrement the semaphore
|
|
sem = sem-1
|
|
|
|
|
|
# Check if separators are either tabulators or whitespaces
|
|
def set_char_counter(line):
|
|
global cntChar
|
|
numSpaces = (re.split(marker, line)[0]).count(' ')
|
|
numTabs = (re.split(marker, line)[0]).count('\t')
|
|
if(numSpaces != 0 and numTabs == 0):
|
|
cntChar = ' '
|
|
elif(numSpaces == 0 and numTabs != 0):
|
|
cntChar = '\t'
|
|
else:
|
|
raise NotImplementedError('Indentation of code is only supported for whitespaces and tabs.')
|
|
|
|
|
|
def check_instr(instr):
|
|
global instrForms
|
|
global longestInstr
|
|
# Check for strange clang padding bytes
|
|
while(instr.startswith('data32')):
|
|
instr = instr[7:]
|
|
# Separate mnemonic and operands
|
|
mnemonic = instr.split()[0]
|
|
params = ''.join(instr.split()[1:])
|
|
# Check if line is not only a byte
|
|
empty_byte = re.compile(r'[0-9a-f]{2}')
|
|
if(re.match(empty_byte, mnemonic) and len(mnemonic) == 2):
|
|
return
|
|
# Check if there's one or more operands and store all in a list
|
|
param_list = flatten(separate_params(params))
|
|
param_list_types = list(param_list)
|
|
# check operands and separate them by IMMEDIATE (IMD), REGISTER (REG). MEMORY (MEM) or LABEL(LBL)
|
|
for i in range(len(param_list)):
|
|
op = param_list[i]
|
|
if(len(op) <= 0):
|
|
op = Parameter('NONE')
|
|
elif(op[0] == '$'):
|
|
op = Parameter('IMD')
|
|
elif(op[0] == '%' and '(' not in op):
|
|
j = len(op)
|
|
opmask = False
|
|
if('{' in op):
|
|
j = op.index('{')
|
|
opmask = True
|
|
op = Register(op[1:j], opmask)
|
|
elif('<' in op or op.startswith('.')):
|
|
op = Parameter('LBL')
|
|
else:
|
|
op = MemAddr(op)
|
|
param_list[i] = op.print()
|
|
param_list_types[i] = op
|
|
#Add to list
|
|
if(len(instr) > longestInstr):
|
|
longestInstr = len(instr)
|
|
instrForm = [mnemonic]+list(reversed(param_list_types))+[instr]
|
|
instrForms.append(instrForm)
|
|
|
|
|
|
# Extract instruction forms out of binary file
|
|
def iaca_bin():
|
|
global marker
|
|
global sem
|
|
global instrForms
|
|
|
|
marker = r'fs addr32 nop'
|
|
for line in srcCode:
|
|
# Check if marker is in line
|
|
if(marker in line):
|
|
sem += 1
|
|
elif(sem == 1):
|
|
# We're in the marked code snippet
|
|
# Check if the line is ASM code
|
|
match = re.search(asm_line, line)
|
|
if(match):
|
|
# Further analysis of instructions
|
|
# Check if there are comments in line
|
|
if(r'//' in line):
|
|
continue
|
|
# Do the same instruction check as for the OSACA marker line check
|
|
check_instr(''.join(re.split(r'\t', line)[-1:]))
|
|
elif(sem == 2):
|
|
# Not in the loop anymore. Due to the fact it's the IACA marker we can stop here
|
|
# After removing the last line which belongs to the IACA marker
|
|
del instrForms[-1:]
|
|
return
|
|
|
|
|
|
# Extract instruction forms out of assembly file
|
|
def iaca_asm():
|
|
# Extract the code snippet surround by the IACA markers
|
|
code = srcCode
|
|
# Search for the start marker
|
|
match = re.match(iaca_sm, code)
|
|
while(not match):
|
|
code = code.split('\n',1)[1]
|
|
match = re.match(iaca_sm, code)
|
|
# Search for the end marker
|
|
code = (code.split('144',1)[1]).split('\n',1)[1]
|
|
res = ''
|
|
match = re.match(iaca_em, code)
|
|
while(not match):
|
|
res += code.split('\n',1)[0]+'\n'
|
|
code = code.split('\n',1)[1]
|
|
match = re.match(iaca_em, code)
|
|
# Split the result by line go on like with OSACA markers
|
|
res = res.split('\n')
|
|
for line in res:
|
|
line = line.split('#')[0]
|
|
line = line.lstrip()
|
|
if(len(line) == 0 or '//' in line or line.startswith('..')):
|
|
continue
|
|
check_instr(line)
|
|
|
|
|
|
def separate_params(params):
|
|
param_list = [params]
|
|
if(',' in params):
|
|
if(')' in params):
|
|
if(params.index(')') < len(params)-1 and params[params.index(')')+1] == ','):
|
|
i = params.index(')')+1
|
|
elif(params.index('(') < params.index(',')):
|
|
return param_list
|
|
else:
|
|
i = params.index(',')
|
|
else:
|
|
i = params.index(',')
|
|
param_list = [params[:i],separate_params(params[i+1:])]
|
|
elif('#' in params):
|
|
i = params.index('#')
|
|
param_list = [params[:i]]
|
|
return param_list
|
|
|
|
def flatten(l):
|
|
if l == []:
|
|
return l
|
|
if(isinstance(l[0], list)):
|
|
return flatten(l[0]) + flatten(l[1:])
|
|
return l[:1] + flatten(l[1:])
|
|
|
|
def read_csv():
|
|
global df
|
|
currDir = os.path.realpath(__file__)[:-8]
|
|
df = pd.read_csv(currDir+'data/'+arch.lower()+'_data.csv')
|
|
|
|
def create_horiz_sep():
|
|
global horizontalSeparator
|
|
horizontalSeparator = '-'*(longestInstr+8)
|
|
|
|
def create_output(tp_list=False,pr_sched=True):
|
|
global longestInstr
|
|
|
|
#Check the output alignment depending on the longest instruction
|
|
if(longestInstr > 70):
|
|
longestInstr = 70
|
|
create_horiz_sep()
|
|
ws = ' '*(len(horizontalSeparator)-23)
|
|
# Write general information about the benchmark
|
|
output = ( '--'+horizontalSeparator+'\n'
|
|
'| Analyzing of file:\t'+os.getcwd()+'/'+filepath+'\n'
|
|
'| Architecture:\t\t'+arch+'\n'
|
|
'| Timestamp:\t\t'+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'\n')
|
|
|
|
if(tp_list):
|
|
output += create_TP_list(instrForms)
|
|
if(pr_sched):
|
|
output += '\n\n'
|
|
sched = Scheduler(arch, instrForms)
|
|
schedOutput,totalTP = sched.schedule_FCFS()
|
|
output += sched.get_head()+schedOutput
|
|
output += 'Total number of estimated throughput: '+str(totalTP)
|
|
return output
|
|
|
|
|
|
def create_TP_list(instrForms):
|
|
warning = False
|
|
ws = ' '*(len(horizontalSeparator)-23)
|
|
|
|
output = ('\n| INSTRUCTION'+ws+'CLOCK CYCLES\n'
|
|
'| '+horizontalSeparator+'\n|\n')
|
|
# Check for the throughput data in CSV
|
|
# First determine if we're searching for the SSE, AVX or AVX512 type of instruction
|
|
for elem in instrForms:
|
|
extension = ''
|
|
opExt = []
|
|
for i in range(1, len(elem)-1):
|
|
optmp = ''
|
|
if(isinstance(elem[i], Register) and elem[i].reg_type == 'GPR'):
|
|
optmp = 'r'+str(elem[i].size)
|
|
elif(isinstance(elem[i], MemAddr)):
|
|
optmp = 'mem'
|
|
else:
|
|
optmp = elem[i].print().lower()
|
|
opExt.append(optmp)
|
|
operands = '_'.join(opExt)
|
|
# Now look up the value in the dataframe
|
|
# Check if there is a stored throughput value in database
|
|
import warnings
|
|
warnings.filterwarnings("ignore", 'This pattern has match groups')
|
|
series = df['instr'].str.contains(elem[0]+'-'+operands)
|
|
if( True in series.values):
|
|
# It's a match!
|
|
notFound = False
|
|
try:
|
|
tp = df[df.instr == elem[0]+'-'+operands].TP.values[0]
|
|
except IndexError:
|
|
# Something went wrong
|
|
print('Error while fetching data from database')
|
|
continue
|
|
# Did not found the exact instruction form.
|
|
# Try to find the instruction form for register operands only
|
|
else:
|
|
opExtRegs = []
|
|
for operand in opExt:
|
|
try:
|
|
regTmp = Register(operand)
|
|
opExtRegs.append(True)
|
|
except KeyError:
|
|
opExtRegs.append(False)
|
|
pass
|
|
if(not True in opExtRegs):
|
|
# No register in whole instruction form. How can I found out what regsize we need?
|
|
print('Feature not included yet: ', end='')
|
|
print(elem[0]+' for '+operands)
|
|
tp = 0
|
|
notFound = True
|
|
warning = True
|
|
|
|
numWhitespaces = longestInstr-len(elem[-1])
|
|
ws = ' '*numWhitespaces+'| '
|
|
n_f = ' '*(5-len(str(tp)))+'*'
|
|
data = '| '+elem[-1]+ws+str(tp)+n_f+'\n'
|
|
output += data
|
|
continue
|
|
if(opExtRegs[0] == False):
|
|
# Instruction stores result in memory. Check for storing in register instead
|
|
if(len(opExt) > 1):
|
|
if(opExtRegs[1] == True):
|
|
opExt[0] = opExt[1]
|
|
elif(len(optExt > 2)):
|
|
if(opExtRegs[2] == True):
|
|
opExt[0] = opExt[2]
|
|
if(len(opExtRegs) == 2 and opExtRegs[1] == False):
|
|
# Instruction loads value from memory and has only two operands. Check for loading from register instead
|
|
if(opExtRegs[0] == True):
|
|
opExt[1] = opExt[0]
|
|
if(len(opExtRegs) == 3 and opExtRegs[2] == False):
|
|
# Instruction loads value from memory and has three operands. Check for loading from register instead
|
|
opExt[2] = opExt[0]
|
|
operands = '_'.join(opExt)
|
|
# Check for register equivalent instruction
|
|
series = df['instr'].str.contains(elem[0]+'-'+operands)
|
|
if( True in series.values):
|
|
# It's a match!
|
|
notFound = False
|
|
try:
|
|
tp = df[df.instr == elem[0]+'-'+operands].TP.values[0]
|
|
|
|
except IndexError:
|
|
# Something went wrong
|
|
print('Error while fetching data from database')
|
|
continue
|
|
# Did not found the register instruction form. Set warning and go on with throughput 0
|
|
else:
|
|
tp = 0
|
|
notFound = True
|
|
warning = True
|
|
# Check the alignement again
|
|
numWhitespaces = longestInstr-len(elem[-1])
|
|
ws = ' '*numWhitespaces+'| '
|
|
n_f = ''
|
|
if(notFound):
|
|
n_f = ' '*(5-len(str(tp)))+'*'
|
|
data = '| '+elem[-1]+ws+'{:3.2f}'.format(tp)+n_f+'\n'
|
|
output += data
|
|
# Finally end the list of throughput values
|
|
numWhitespaces = longestInstr-27
|
|
ws = ' '+' '*numWhitespaces
|
|
output += '| '+horizontalSeparator+'\n'
|
|
if(warning):
|
|
output += ('\n\n* There was no throughput value found '
|
|
'for the specific instruction form.'
|
|
'\n Please create a testcase via the create_testcase-method '
|
|
'or add a value manually.')
|
|
return output
|
|
|
|
|
|
def create_sequences():
|
|
global cycList
|
|
global reciList
|
|
|
|
for i in range(1, 101):
|
|
cycList.append(i)
|
|
reciList.append(1/i)
|
|
|
|
def validate_val(clkC, instr, isTP):
|
|
clmn = 'LT'
|
|
if(isTP):
|
|
clmn = 'TP'
|
|
for i in range(0, 100):
|
|
if(cycList[i]*1.05 > float(clkC) and cycList[i]*0.95 < float(clkC)):
|
|
# Value is probably correct, so round it to the estimated value
|
|
return cycList[i]
|
|
# Check reciprocal only if it is a throughput value
|
|
elif(isTP and reciList[i]*1.05 > float(clkC) and reciList[i]*0.95 < float(clkC)):
|
|
# Value is probably correct, so round it to the estimated value
|
|
return reciList[i]
|
|
# No value close to an integer or its reciprokal found, we assume the measurement is incorrect
|
|
print('Your measurement for {} ({}) is probably wrong. Please inspect your benchmark!'.format(instr, clmn))
|
|
print('The program will continue with the given value')
|
|
return clkC
|
|
|
|
def write_csv(csv):
|
|
try:
|
|
f = open('data/'+arch.lower()+'_data.csv', 'w')
|
|
except IOError:
|
|
print('IOError: file \'{}\' not found in ./data'.format(arch.lower()+'_data.csv'))
|
|
f.write(csv)
|
|
f.close()
|
|
|
|
##---------------main functions depending on arguments----------------------
|
|
|
|
#reads ibench output and includes it in the architecture specific csv file
|
|
def include_ibench():
|
|
global df
|
|
|
|
# Check args and exit program if something's wrong
|
|
if(not check_arch()):
|
|
print('Invalid microarchitecture.')
|
|
sys.exit()
|
|
if(not check_file()):
|
|
print('Invalid file path or file format.')
|
|
sys.exit()
|
|
# Check for database for the chosen architecture
|
|
read_csv()
|
|
# Create sequence of numbers and their reciprokals for validate the measurements
|
|
create_sequences()
|
|
|
|
print('Everything seems fine! Let\'s start!')
|
|
newData = []
|
|
addedValues = 0
|
|
for line in srcCode:
|
|
if('Using frequency' in line or len(line) == 0):
|
|
continue
|
|
clmn = 'LT'
|
|
instr = line.split()[0][:-1]
|
|
if('TP' in line):
|
|
# We found a command with a throughput value. Get instruction and the number of clock cycles
|
|
# and remove the '-TP' suffix
|
|
clmn = 'TP'
|
|
instr = instr[:-3]
|
|
# Otherwise stay with Latency
|
|
clkC = line.split()[1]
|
|
clkC_tmp = clkC
|
|
clkC = validate_val(clkC, instr, True if (clmn == 'TP') else False)
|
|
txtOutput = True if (clkC_tmp == clkC) else False
|
|
val = -2
|
|
new = False
|
|
try:
|
|
entry = df.loc[lambda df: df.instr == instr,clmn]
|
|
val = entry.values[0]
|
|
except IndexError:
|
|
# Instruction not in database yet --> add it
|
|
new = True
|
|
# First check if LT or TP value has already been added before
|
|
for i,item in enumerate(newData):
|
|
if(instr in item):
|
|
if(clmn == 'TP'):
|
|
newData[i][1] = clkC
|
|
elif(clmn == 'LT'):
|
|
newData[i][2] = clkC
|
|
new = False
|
|
break
|
|
if(new and clmn == 'TP'):
|
|
newData.append([instr,clkC,'-1'])
|
|
elif(new and clmn == 'LT'):
|
|
newData.append([instr,'-1',clkC])
|
|
new = True
|
|
addedValues += 1
|
|
pass
|
|
# If val is -1 (= not filled with a valid value) add it immediately
|
|
if(val == -1):
|
|
df.set_value(entry.index[0], clmn, clkC)
|
|
addedValues += 1
|
|
continue
|
|
if(not new and abs((val/np.float64(clkC))-1) > 0.05):
|
|
print('Different measurement for {} ({}): {}(old) vs. {}(new)\nPlease check for correctness (no changes were made).'.format(instr, clmn, val, clkC))
|
|
txtOutput = True
|
|
if(txtOutput):
|
|
print()
|
|
txtOutput = False
|
|
# Now merge the DataFrames and write new csv file
|
|
df = df.append(pd.DataFrame(newData, columns=['instr','TP','LT']), ignore_index=True)
|
|
csv = df.to_csv(index=False)
|
|
write_csv(csv)
|
|
print('ibench output {} successfully in database included.'.format(filepath.split('/')[-1]))
|
|
print('{} values were added.'.format(addedValues))
|
|
|
|
|
|
# main function of the tool
|
|
def inspect_binary():
|
|
# Check args and exit program if something's wrong
|
|
if(not check_arch()):
|
|
print('Invalid microarchitecture.')
|
|
sys.exit()
|
|
if(not check_elffile()):
|
|
print('Invalid file path or file format.')
|
|
sys.exit()
|
|
# Finally check for database for the chosen architecture
|
|
read_csv()
|
|
|
|
print('Everything seems fine! Let\'s start checking!')
|
|
for line in srcCode:
|
|
check_line(line)
|
|
output = create_output()
|
|
print(output)
|
|
|
|
|
|
# main function of the tool with IACA markers instead of OSACA marker
|
|
def inspect_with_iaca():
|
|
# Check args and exit program if something's wrong
|
|
if(not check_arch()):
|
|
print('Invalid microarchitecture.')
|
|
sys.exit()
|
|
# Check if input file is a binary or assembly file
|
|
try:
|
|
binaryFile = True
|
|
if(not check_elffile()):
|
|
print('Invalid file path or file format.')
|
|
sys.exit()
|
|
except (TypeError,IndexError):
|
|
binaryFile = False
|
|
if(not check_file(True)):
|
|
print('Invalid file path or file format.')
|
|
sys.exit()
|
|
# Finally check for database for the chosen architecture
|
|
read_csv()
|
|
|
|
print('Everything seems fine! Let\'s start checking!')
|
|
if(binaryFile):
|
|
iaca_bin()
|
|
else:
|
|
iaca_asm()
|
|
output = create_output()
|
|
print(output)
|
|
|
|
|
|
|
|
##------------------------------------------------------------------------------
|
|
##------------Main method--------------
|
|
def main():
|
|
global inp
|
|
global arch
|
|
global filepath
|
|
# Parse args
|
|
parser = argparse.ArgumentParser(description='Analyzes a marked innermost loop snippet for a given architecture type and prints out the estimated average throughput')
|
|
parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.1')
|
|
parser.add_argument('--arch', dest='arch', type=str, help='define architecture (SNB, IVB, HSW, BDW, SKL)')
|
|
parser.add_argument('filepath', type=str, help='path to object (Binary, ASM, CSV)')
|
|
group = parser.add_mutually_exclusive_group(required=False)
|
|
group.add_argument('-i', '--include-ibench', dest='incl', action='store_true', help='includes the given values in form of the output of ibench in the database')
|
|
group.add_argument('--iaca', dest='iaca', action='store_true', help='search for IACA markers instead the OSACA marker')
|
|
group.add_argument('-m', '--insert-marker', dest='insert_marker', action='store_true', help='try to find blocks probably corresponding to loops in assembly and insert IACA marker')
|
|
|
|
# Store args in global variables
|
|
inp = parser.parse_args()
|
|
if(inp.arch is None and inp.insert_marker is None):
|
|
raise ValueError('Please specify an architecture')
|
|
if(inp.arch is not None):
|
|
arch = inp.arch.upper()
|
|
filepath = inp.filepath
|
|
inclIbench = inp.incl
|
|
iacaFlag = inp.iaca
|
|
insert_m = inp.insert_marker
|
|
|
|
if(inclIbench):
|
|
include_ibench()
|
|
elif(iacaFlag):
|
|
inspect_with_iaca()
|
|
elif(insert_m):
|
|
try:
|
|
from kerncrafts import iaca
|
|
except ImportError:
|
|
print('ImportError: Module kerncraft not installed. Use \'pip install --user kerncraft\' for installation.\nFor more information see https://github.com/RRZE-HPC/kerncraft')
|
|
sys.exit()
|
|
iaca.iaca_instrumentation(input_file=filepath, output_file=filepath,
|
|
block_selection='manual', pointer_increment=1)
|
|
else:
|
|
inspect_binary()
|
|
|
|
|
|
##------------Main method--------------
|
|
if __name__ == '__main__':
|
|
main()
|