Merge pull request #18 from RRZE-HPC/api_cleanup

API cleanup
This commit is contained in:
Julian
2019-01-10 13:42:32 +01:00
committed by GitHub
8 changed files with 618 additions and 676 deletions

View File

@@ -2,5 +2,7 @@ sudo: false
language: python
python:
- "3.5"
- "3.6"
- "3.7"
install: pip install tox-travis
script: tox

View File

@@ -8,63 +8,60 @@ from operator import add
import pandas as pd
from osaca.param import Register, MemAddr
#from param import Register, MemAddr
class Scheduler(object):
arch_dict = {'SNB': 6, 'IVB': 6, 'HSW': 8, 'BDW': 8, 'SKL': 8, 'ZEN': 10}
dv_port_dict = {'SKL': 0, 'ZEN': 3}
ports = None # type: int
instrList = None # type: list<list<str,Param[,Param][,Param],str>>,
dv_ports_dict = {'SKL': [0], 'ZEN': [3]}
# content of most inner list in instrList: instr, operand(s), instr form
df = None # type: DataFrame
# for parallel ld/st in archs with 1 st/cy and >1 ld/cy, able to do 1 st and 1 ld in 1cy
ld_ports = None # type: list<int>
# enable flag for parallel ld/st
en_par_ldst = False # type: boolean
dv_port = -1 # type: int
def __init__(self, arch, instruction_list):
arch = arch.upper()
try:
self.ports = self.arch_dict[arch]
except KeyError:
print('Architecture not supportet for EU scheduling.', file=sys.stderr)
print('Architecture not supported for EU scheduling.', file=sys.stderr)
sys.exit(1)
# check for parallel ld/st in a cycle
if arch == 'ZEN':
self.en_par_ldst = True
self.ld_ports = [9, 10]
# check for DV port
try:
self.dv_port = self.dv_port_dict[arch]
except KeyError:
# no DV port available (yet, new feature in OSACA v0.2)
# do nothing
pass
self.dv_ports = self.dv_ports_dict.get(arch, [])
self.instrList = instruction_list
# curr_dir = os.path.realpath(__file__)[:-11]
osaca_dir = os.path.expanduser('~/.osaca/')
self.df = pd.read_csv(osaca_dir + 'data/' + arch.lower() + '_data.csv', quotechar='"',
converters={'ports': ast.literal_eval})
def new_schedule(self):
def new_schedule(self, machine_readable=False):
"""
Schedule Instruction Form list and calculate port bindings.
Parameters
----------
machine_readable : bool
Boolean for indicating if the return value should be human readable (if False) or
machine readable (if True)
Returns
-------
(str, [int, ...])
A tuple containing the graphic output of the schedule as string and
the port bindings as list of ints.
(str, [float, ...]) or ([[float, ...], ...], [float, ...])
A tuple containing the output of the schedule as string (if machine_readable is not
given or False) or as list of lists (if machine_readable is True) and the port bindings
as list of float.
"""
sched = self.get_head()
# Initialize ports
# Add DV port, if it is existing
tmp_port = 0
if self.dv_port != -1:
tmp_port = 1
occ_ports = [[0] * (self.ports + tmp_port) for x in range(len(self.instrList))]
port_bndgs = [0] * (self.ports + tmp_port)
occ_ports = [[0] * (self.ports + len(self.dv_ports)) for x in range(len(self.instrList))]
port_bndgs = [0] * (self.ports + len(self.dv_ports))
# Store instruction counter for parallel ld/st
par_ldst = 0
# Count the number of store instr if we schedule for an architecture with par ld/st
@@ -86,9 +83,9 @@ class Scheduler(object):
except IndexError:
# Instruction form not in CSV
if instrForm[0][:3] == 'nop':
sched += self.get_line(occ_ports[i], '* ' + instrForm[-1])
sched += self.format_port_occupation_line(occ_ports[i], '* ' + instrForm[-1])
else:
sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1])
sched += self.format_port_occupation_line(occ_ports[i], 'X ' + instrForm[-1])
continue
occ_ports[i] = list(tup)
# Check if it's a ld including instr
@@ -104,13 +101,16 @@ class Scheduler(object):
occ_ports[i][port] = '(' + str(occ_ports[i][port]) + ')'
# Write schedule line
if len(p_flg) > 0:
sched += self.get_line(occ_ports[i], p_flg + instrForm[-1])
sched += self.format_port_occupation_line(occ_ports[i], p_flg + instrForm[-1])
for port in self.ld_ports:
occ_ports[i][port] = 0
else:
sched += self.get_line(occ_ports[i], instrForm[-1])
sched += self.format_port_occupation_line(occ_ports[i], instrForm[-1])
# Add throughput to total port binding
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
if machine_readable:
list(map(self.append, occ_ports, self.instrList))
return occ_ports, port_bndgs
return sched, port_bndgs
def schedule(self):
@@ -139,9 +139,9 @@ class Scheduler(object):
except IndexError:
# Instruction form not in CSV
if instrForm[0][:3] == 'nop':
sched += self.get_line(occ_ports[i], '* ' + instrForm[-1])
sched += self.format_port_occupation_line(occ_ports[i], '* ' + instrForm[-1])
else:
sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1])
sched += self.format_port_occupation_line(occ_ports[i], 'X ' + instrForm[-1])
continue
if wTP:
# Get the occurance of each port from the occupation list
@@ -165,7 +165,7 @@ class Scheduler(object):
for j in range(0, self.ports):
occ_ports[i][j] = t_all.count(j) / variations
# Write schedule line
sched += self.get_line(occ_ports[i], instrForm[-1])
sched += self.format_port_occupation_line(occ_ports[i], instrForm[-1])
# Add throughput to total port binding
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
return sched, port_bndgs
@@ -177,6 +177,10 @@ class Scheduler(object):
return self.flatten(l[0]) + self.flatten(l[1:])
return l[:1] + self.flatten(l[1:])
def append(self, l, e):
if(isinstance(l, list)):
l.append(e)
def schedule_fcfs(self):
"""
Schedule Instruction Form list for a single run with latencies.
@@ -199,7 +203,7 @@ class Scheduler(object):
raise IndexError()
except IndexError:
# Instruction form not in CSV
sched += self.get_line([0] * self.ports, '* ' + instrForm[-1])
sched += self.format_port_occupation_line([0] * self.ports, '* ' + instrForm[-1])
continue
found = False
while not found:
@@ -211,7 +215,7 @@ class Scheduler(object):
found = True
good = [entry.LT.values[0] if (j in portOcc) else 0 for j in
range(0, self.ports)]
sched += self.get_line(good, instrForm[-1])
sched += self.format_port_occupation_line(good, instrForm[-1])
# Add new occupation
occ_ports = [occ_ports[j] + good[j] for j in range(0, self.ports)]
break
@@ -316,22 +320,15 @@ class Scheduler(object):
str
String containing the header
"""
horiz_line = '-' * 7 * self.ports
if self.dv_port != -1:
horiz_line += '-' * 6
horiz_line += '-\n'
port_anno = (' ' * int(math.floor((len(horiz_line) - 24) / 2)) + 'Ports Pressure in cycles'
+ ' ' * int(math.ceil((len(horiz_line) - 24) / 2)) + '\n')
port_line = ''
for i in range(0, self.ports):
port_line += '| {} '.format(i)
if i == self.dv_port:
port_line = port_line + '- DV '
port_line += '|\n'
head = port_anno + port_line + horiz_line
return head
port_names = self.get_port_naming()
def get_line(self, occ_ports, instr_name):
port_line = ''.join('|{:^6}'.format(pn) for pn in port_names) + '|\n'
horiz_line = '-' * (len(port_line) - 1) + '\n'
port_anno = ' ' * ((len(port_line) - 25) // 2) + 'Ports Pressure in cycles\n'
return port_anno + port_line + horiz_line
def format_port_occupation_line(self, occ_ports, instr_name):
"""
Create line with port occupation for output.
@@ -348,24 +345,31 @@ class Scheduler(object):
String for output containing port scheduling for instr_name
"""
line = ''
for p_num, i in enumerate(occ_ports):
pipe = '|'
if isinstance(i, str):
cycles = i
i = float(i[1:-1])
r_space = ''
for cycles in occ_ports:
if cycles == 0:
line += '|' + ' ' * 6
elif cycles >= 10:
line += '|{:^6.1f}'.format(cycles)
else:
cycles = ' ' if (i == 0) else '%.2f' % float(i)
r_space = ' '
if p_num == self.dv_port + 1 and p_num != 0:
pipe = ' '
if i >= 10:
line += pipe + cycles + r_space
else:
line += pipe + ' ' + cycles + r_space
line += '|{:^6.2f}'.format(cycles)
line += '| ' + instr_name + '\n'
return line
def get_port_naming(self):
"""
Return list of port names
:return: list of strings
"""
port_names = []
dv_ports_appended = 0
for i in range(self.ports):
port_names.append(str(i))
if i in self.dv_ports:
dv_ports_appended += 1
port_names.append(str(i)+'DV')
return port_names
def get_port_binding(self, port_bndg):
"""
Create port binding out of scheduling result.
@@ -380,36 +384,23 @@ class Scheduler(object):
str
String containing the port binding graphical output
"""
sp_left, sp_right, total = self.get_spaces(port_bndg)
col_widths = self.get_column_widths(port_bndg)
header = 'Port Binding in Cycles Per Iteration:\n'
horiz_line = '-' * 10 + '-' * total + '\n'
horiz_line = '-' * 10 + '-' * (sum(col_widths) + len(col_widths)) + '\n'
port_line = '| Port |'
after_dv = 0
for i in range(0, self.ports):
if i == self.dv_port:
port_line += ' ' * int(sp_left[i]) + str(i) + ' ' * int(sp_right[i]) + '-'
port_line += ' ' * int(sp_left[i + 1] - 1) + 'DV' + ' ' * int(sp_right[i + 1]) + '|'
after_dv = 1
else:
port_line += (' ' * int(sp_left[i + after_dv]) + str(i)
+ ' ' * int(sp_right[i + after_dv]))
port_line += '|'
for i, port_name in enumerate(self.get_port_naming()):
port_line += port_name.center(col_widths[i]) + '|'
port_line += '\n'
cyc_line = '| Cycles |'
for i in range(len(port_bndg)):
pipe = '|' if (i != self.dv_port) else ' '
cyc = str(round(port_bndg[i], 2))
cyc_line += ' {} {}'.format(cyc, pipe)
cyc_line += '{}|'.format(str(round(port_bndg[i], 2)).center(col_widths[i]))
cyc_line += '\n'
binding = header + horiz_line + port_line + horiz_line + cyc_line + horiz_line
return binding
def get_spaces(self, port_bndg):
len_list = [len(str(round(x, 2))) + 1 for x in port_bndg]
total = sum([x + 2 for x in len_list])
sp_left = [math.ceil(x / 2) for x in len_list]
sp_right = [math.floor(x / 2) for x in len_list]
return sp_left, sp_right, total
def get_column_widths(self, port_bndg):
return [max(len(str(round(x, 2))), len(name)) + 2
for x, name in zip(port_bndg, self.get_port_naming())]
def get_operand_suffix(self, instr_form):
"""

View File

@@ -5,6 +5,8 @@ import argparse
from osaca.testcase import Testcase
from osaca.param import Register, MemAddr, Parameter
#from testcase import Testcase
#from param import Register, MemAddr, Parameter
class InstrExtractor(object):

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,7 @@
#!/usr/bin/env python3
import re
class Parameter(object):
type_list = ['REG', 'MEM', 'IMD', 'LBL', 'NONE']
@@ -21,38 +24,27 @@ class MemAddr(Parameter):
def __init__(self, name):
super().__init__("MEM")
self.sreg = False
self.offset = False
self.base = False
self.index = False
self.scale = False
if ':' in name:
if name[1:name.index(':')].upper() not in self.segment_regs:
raise NameError('Type not supported: '+name)
self.sreg = True
self.offset = True
if '(' not in name or ('(' in name and name.index('(') != 0):
self.offset = True
if '(' in name:
self.parentheses = name[name.index('(')+1:-1]
self.commacnt = self.parentheses.count(',')
if self.commacnt == 0:
self.base = True
elif self.commacnt == 1 or self.commacnt == 2 and int(self.parentheses[-1:]) == 1:
self.base = True
self.index = True
elif self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales:
self.base = True
self.index = True
self.scale = True
else:
raise NameError('Type not supported: '+name)
name = name.strip(', \t')
self.offset = None
self.base = None
self.index = None
self.scale = None
m = re.match(r'(?P<offset>[x0-9a-fA-F]*)\((?P<base>[^,\)]+)(?:,\s*(?P<index>[^,\)]+)'
r'(?:,\s*(?P<scale>[^,\)]+))?)?\)', name)
if not m:
raise ValueError('Type not supported: {!r}'.format(name))
self.offset = m.group('offset') or None
self.base = m.group('base') or None
self.index = m.group('index') or None
self.scale = m.group('scale') or None
def __str__(self):
"""returns string representation"""
mem_format = 'MEM('
if self.sreg:
mem_format += 'sreg:'
if self.offset:
mem_format += 'offset'
if self.base and not self.index:

View File

@@ -5,6 +5,7 @@ from subprocess import call
from math import ceil
from osaca.param import Register, MemAddr, Parameter
#from param import Register, MemAddr, Parameter
class Testcase(object):

View File

@@ -7,41 +7,41 @@ import os
import unittest
sys.path.insert(0, '..')
from osaca.osaca import OSACA
from osaca import osaca
class TestOsaca(unittest.TestCase):
maxDiff = None
@unittest.skip("Binary analysis is error prone and currently not working with FSF's objdump")
def testIACABinary(self):
out = StringIO()
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca', out)
osa.inspect_with_iaca()
result = out.getvalue()
result = '\n'.join(result.split('\n')[-27:])
assembly = osaca.get_assembly_from_binary(curr_dir + '/testfiles/taxCalc-ivb-iaca')
osa = osaca.OSACA('IVB', assembly)
result = osa.generate_text_output()
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
with open(curr_dir + '/test_osaca_iaca.out', encoding='utf-8') as f:
assertion = f.read()
self.assertEqual(assertion, result)
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))
# Test ASM file with IACA marker in two lines
def testIACAasm1(self):
out = StringIO()
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca.S', out)
osa.inspect_with_iaca()
result = out.getvalue()
result = '\n'.join(result.split('\n')[-27:])
with open(curr_dir + '/testfiles/taxCalc-ivb-iaca.S') as f:
osa = osaca.OSACA('IVB', f.read())
result = osa.generate_text_output()
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
with open(curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f:
assertion = f.read()
self.assertEqual(assertion, result)
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))
# Test ASM file with IACA marker in four lines
def testIACAasm2(self):
out = StringIO()
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca2.S', out)
osa.inspect_with_iaca()
result = out.getvalue()
result = '\n'.join(result.split('\n')[-27:])
with open(curr_dir + '/testfiles/taxCalc-ivb-iaca2.S') as f:
osa = osaca.OSACA('IVB', f.read())
result = osa.generate_text_output()
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
with open(curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f:
assertion = f.read()
self.assertEqual(assertion, result)
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))

View File

@@ -9,18 +9,18 @@ Port Binding in Cycles Per Iteration:
Ports Pressure in cycles
| 0 | 1 | 2 | 3 | 4 | 5 |
-------------------------------------------
| 0.50 | 0.50 | | | | | lea 0x1(%rax,%rax,1),%edx
| | 1.00 | | | | 1.00 | vcvtsi2ss %edx,%xmm2,%xmm2
| 1.00 | | | | | | vmulss %xmm2,%xmm0,%xmm3
| 0.50 | 0.50 | | | | | lea 0x2(%rax,%rax,1),%ecx
| | 1.00 | | | | | vaddss %xmm3,%xmm1,%xmm4
| | | | | | 1.00 | vxorps %xmm1,%xmm1,%xmm1
| | 1.00 | | | | 1.00 | vcvtsi2ss %ecx,%xmm1,%xmm1
| 1.00 | | | | | | vmulss %xmm1,%xmm0,%xmm5
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm4,0x4(%rsp,%rax,8)
| | 1.00 | | | | | vaddss %xmm5,%xmm4,%xmm1
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm1,0x8(%rsp,%rax,8)
| 0.33 | 0.33 | | | | 0.33 | inc %rax
| 0.33 | 0.33 | | | | 0.33 | cmp $0x1f3,%rax
| | | | | | | jb 400bc2 <main+0x62>
| 0.50 | 0.50 | | | | | lea 1(%rax,%rax),%edx
| | 1.00 | | | | 1.00 | vcvtsi2ss %edx,%xmm2,%xmm2
| 1.00 | | | | | | vmulss %xmm2,%xmm0,%xmm3
| 0.50 | 0.50 | | | | | lea 2(%rax,%rax),%ecx
| | 1.00 | | | | | vaddss %xmm3,%xmm1,%xmm4
| | | | | | 1.00 | vxorps %xmm1,%xmm1,%xmm1
| | 1.00 | | | | 1.00 | vcvtsi2ss %ecx,%xmm1,%xmm1
| 1.00 | | | | | | vmulss %xmm1,%xmm0,%xmm5
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm4,4(%rsp,%rax,8)
| | 1.00 | | | | | vaddss %xmm5,%xmm4,%xmm1
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm1,8(%rsp,%rax,8)
| 0.33 | 0.33 | | | | 0.33 | inc %rax
| 0.33 | 0.33 | | | | 0.33 | cmp $499,%rax
| | | | | | | X jb main_98
Total number of estimated throughput: 5.67