mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 00:50:06 +01:00
@@ -2,5 +2,7 @@ sudo: false
|
||||
language: python
|
||||
python:
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.7"
|
||||
install: pip install tox-travis
|
||||
script: tox
|
||||
|
||||
@@ -8,63 +8,60 @@ from operator import add
|
||||
import pandas as pd
|
||||
|
||||
from osaca.param import Register, MemAddr
|
||||
#from param import Register, MemAddr
|
||||
|
||||
|
||||
class Scheduler(object):
|
||||
arch_dict = {'SNB': 6, 'IVB': 6, 'HSW': 8, 'BDW': 8, 'SKL': 8, 'ZEN': 10}
|
||||
dv_port_dict = {'SKL': 0, 'ZEN': 3}
|
||||
ports = None # type: int
|
||||
instrList = None # type: list<list<str,Param[,Param][,Param],str>>,
|
||||
dv_ports_dict = {'SKL': [0], 'ZEN': [3]}
|
||||
# content of most inner list in instrList: instr, operand(s), instr form
|
||||
df = None # type: DataFrame
|
||||
# for parallel ld/st in archs with 1 st/cy and >1 ld/cy, able to do 1 st and 1 ld in 1cy
|
||||
ld_ports = None # type: list<int>
|
||||
# enable flag for parallel ld/st
|
||||
en_par_ldst = False # type: boolean
|
||||
dv_port = -1 # type: int
|
||||
|
||||
def __init__(self, arch, instruction_list):
|
||||
arch = arch.upper()
|
||||
try:
|
||||
self.ports = self.arch_dict[arch]
|
||||
except KeyError:
|
||||
print('Architecture not supportet for EU scheduling.', file=sys.stderr)
|
||||
print('Architecture not supported for EU scheduling.', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
# check for parallel ld/st in a cycle
|
||||
if arch == 'ZEN':
|
||||
self.en_par_ldst = True
|
||||
self.ld_ports = [9, 10]
|
||||
# check for DV port
|
||||
try:
|
||||
self.dv_port = self.dv_port_dict[arch]
|
||||
except KeyError:
|
||||
# no DV port available (yet, new feature in OSACA v0.2)
|
||||
# do nothing
|
||||
pass
|
||||
self.dv_ports = self.dv_ports_dict.get(arch, [])
|
||||
self.instrList = instruction_list
|
||||
# curr_dir = os.path.realpath(__file__)[:-11]
|
||||
osaca_dir = os.path.expanduser('~/.osaca/')
|
||||
self.df = pd.read_csv(osaca_dir + 'data/' + arch.lower() + '_data.csv', quotechar='"',
|
||||
converters={'ports': ast.literal_eval})
|
||||
|
||||
def new_schedule(self):
|
||||
def new_schedule(self, machine_readable=False):
|
||||
"""
|
||||
Schedule Instruction Form list and calculate port bindings.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
machine_readable : bool
|
||||
Boolean for indicating if the return value should be human readable (if False) or
|
||||
machine readable (if True)
|
||||
|
||||
Returns
|
||||
-------
|
||||
(str, [int, ...])
|
||||
A tuple containing the graphic output of the schedule as string and
|
||||
the port bindings as list of ints.
|
||||
(str, [float, ...]) or ([[float, ...], ...], [float, ...])
|
||||
A tuple containing the output of the schedule as string (if machine_readable is not
|
||||
given or False) or as list of lists (if machine_readable is True) and the port bindings
|
||||
as list of float.
|
||||
"""
|
||||
sched = self.get_head()
|
||||
# Initialize ports
|
||||
# Add DV port, if it is existing
|
||||
tmp_port = 0
|
||||
if self.dv_port != -1:
|
||||
tmp_port = 1
|
||||
occ_ports = [[0] * (self.ports + tmp_port) for x in range(len(self.instrList))]
|
||||
port_bndgs = [0] * (self.ports + tmp_port)
|
||||
occ_ports = [[0] * (self.ports + len(self.dv_ports)) for x in range(len(self.instrList))]
|
||||
port_bndgs = [0] * (self.ports + len(self.dv_ports))
|
||||
# Store instruction counter for parallel ld/st
|
||||
par_ldst = 0
|
||||
# Count the number of store instr if we schedule for an architecture with par ld/st
|
||||
@@ -86,9 +83,9 @@ class Scheduler(object):
|
||||
except IndexError:
|
||||
# Instruction form not in CSV
|
||||
if instrForm[0][:3] == 'nop':
|
||||
sched += self.get_line(occ_ports[i], '* ' + instrForm[-1])
|
||||
sched += self.format_port_occupation_line(occ_ports[i], '* ' + instrForm[-1])
|
||||
else:
|
||||
sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1])
|
||||
sched += self.format_port_occupation_line(occ_ports[i], 'X ' + instrForm[-1])
|
||||
continue
|
||||
occ_ports[i] = list(tup)
|
||||
# Check if it's a ld including instr
|
||||
@@ -104,13 +101,16 @@ class Scheduler(object):
|
||||
occ_ports[i][port] = '(' + str(occ_ports[i][port]) + ')'
|
||||
# Write schedule line
|
||||
if len(p_flg) > 0:
|
||||
sched += self.get_line(occ_ports[i], p_flg + instrForm[-1])
|
||||
sched += self.format_port_occupation_line(occ_ports[i], p_flg + instrForm[-1])
|
||||
for port in self.ld_ports:
|
||||
occ_ports[i][port] = 0
|
||||
else:
|
||||
sched += self.get_line(occ_ports[i], instrForm[-1])
|
||||
sched += self.format_port_occupation_line(occ_ports[i], instrForm[-1])
|
||||
# Add throughput to total port binding
|
||||
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
|
||||
if machine_readable:
|
||||
list(map(self.append, occ_ports, self.instrList))
|
||||
return occ_ports, port_bndgs
|
||||
return sched, port_bndgs
|
||||
|
||||
def schedule(self):
|
||||
@@ -139,9 +139,9 @@ class Scheduler(object):
|
||||
except IndexError:
|
||||
# Instruction form not in CSV
|
||||
if instrForm[0][:3] == 'nop':
|
||||
sched += self.get_line(occ_ports[i], '* ' + instrForm[-1])
|
||||
sched += self.format_port_occupation_line(occ_ports[i], '* ' + instrForm[-1])
|
||||
else:
|
||||
sched += self.get_line(occ_ports[i], 'X ' + instrForm[-1])
|
||||
sched += self.format_port_occupation_line(occ_ports[i], 'X ' + instrForm[-1])
|
||||
continue
|
||||
if wTP:
|
||||
# Get the occurance of each port from the occupation list
|
||||
@@ -165,7 +165,7 @@ class Scheduler(object):
|
||||
for j in range(0, self.ports):
|
||||
occ_ports[i][j] = t_all.count(j) / variations
|
||||
# Write schedule line
|
||||
sched += self.get_line(occ_ports[i], instrForm[-1])
|
||||
sched += self.format_port_occupation_line(occ_ports[i], instrForm[-1])
|
||||
# Add throughput to total port binding
|
||||
port_bndgs = list(map(add, port_bndgs, occ_ports[i]))
|
||||
return sched, port_bndgs
|
||||
@@ -177,6 +177,10 @@ class Scheduler(object):
|
||||
return self.flatten(l[0]) + self.flatten(l[1:])
|
||||
return l[:1] + self.flatten(l[1:])
|
||||
|
||||
def append(self, l, e):
|
||||
if(isinstance(l, list)):
|
||||
l.append(e)
|
||||
|
||||
def schedule_fcfs(self):
|
||||
"""
|
||||
Schedule Instruction Form list for a single run with latencies.
|
||||
@@ -199,7 +203,7 @@ class Scheduler(object):
|
||||
raise IndexError()
|
||||
except IndexError:
|
||||
# Instruction form not in CSV
|
||||
sched += self.get_line([0] * self.ports, '* ' + instrForm[-1])
|
||||
sched += self.format_port_occupation_line([0] * self.ports, '* ' + instrForm[-1])
|
||||
continue
|
||||
found = False
|
||||
while not found:
|
||||
@@ -211,7 +215,7 @@ class Scheduler(object):
|
||||
found = True
|
||||
good = [entry.LT.values[0] if (j in portOcc) else 0 for j in
|
||||
range(0, self.ports)]
|
||||
sched += self.get_line(good, instrForm[-1])
|
||||
sched += self.format_port_occupation_line(good, instrForm[-1])
|
||||
# Add new occupation
|
||||
occ_ports = [occ_ports[j] + good[j] for j in range(0, self.ports)]
|
||||
break
|
||||
@@ -316,22 +320,15 @@ class Scheduler(object):
|
||||
str
|
||||
String containing the header
|
||||
"""
|
||||
horiz_line = '-' * 7 * self.ports
|
||||
if self.dv_port != -1:
|
||||
horiz_line += '-' * 6
|
||||
horiz_line += '-\n'
|
||||
port_anno = (' ' * int(math.floor((len(horiz_line) - 24) / 2)) + 'Ports Pressure in cycles'
|
||||
+ ' ' * int(math.ceil((len(horiz_line) - 24) / 2)) + '\n')
|
||||
port_line = ''
|
||||
for i in range(0, self.ports):
|
||||
port_line += '| {} '.format(i)
|
||||
if i == self.dv_port:
|
||||
port_line = port_line + '- DV '
|
||||
port_line += '|\n'
|
||||
head = port_anno + port_line + horiz_line
|
||||
return head
|
||||
port_names = self.get_port_naming()
|
||||
|
||||
def get_line(self, occ_ports, instr_name):
|
||||
port_line = ''.join('|{:^6}'.format(pn) for pn in port_names) + '|\n'
|
||||
horiz_line = '-' * (len(port_line) - 1) + '\n'
|
||||
port_anno = ' ' * ((len(port_line) - 25) // 2) + 'Ports Pressure in cycles\n'
|
||||
|
||||
return port_anno + port_line + horiz_line
|
||||
|
||||
def format_port_occupation_line(self, occ_ports, instr_name):
|
||||
"""
|
||||
Create line with port occupation for output.
|
||||
|
||||
@@ -348,24 +345,31 @@ class Scheduler(object):
|
||||
String for output containing port scheduling for instr_name
|
||||
"""
|
||||
line = ''
|
||||
for p_num, i in enumerate(occ_ports):
|
||||
pipe = '|'
|
||||
if isinstance(i, str):
|
||||
cycles = i
|
||||
i = float(i[1:-1])
|
||||
r_space = ''
|
||||
for cycles in occ_ports:
|
||||
if cycles == 0:
|
||||
line += '|' + ' ' * 6
|
||||
elif cycles >= 10:
|
||||
line += '|{:^6.1f}'.format(cycles)
|
||||
else:
|
||||
cycles = ' ' if (i == 0) else '%.2f' % float(i)
|
||||
r_space = ' '
|
||||
if p_num == self.dv_port + 1 and p_num != 0:
|
||||
pipe = ' '
|
||||
if i >= 10:
|
||||
line += pipe + cycles + r_space
|
||||
else:
|
||||
line += pipe + ' ' + cycles + r_space
|
||||
line += '|{:^6.2f}'.format(cycles)
|
||||
line += '| ' + instr_name + '\n'
|
||||
return line
|
||||
|
||||
def get_port_naming(self):
|
||||
"""
|
||||
Return list of port names
|
||||
|
||||
:return: list of strings
|
||||
"""
|
||||
port_names = []
|
||||
dv_ports_appended = 0
|
||||
for i in range(self.ports):
|
||||
port_names.append(str(i))
|
||||
if i in self.dv_ports:
|
||||
dv_ports_appended += 1
|
||||
port_names.append(str(i)+'DV')
|
||||
return port_names
|
||||
|
||||
def get_port_binding(self, port_bndg):
|
||||
"""
|
||||
Create port binding out of scheduling result.
|
||||
@@ -380,36 +384,23 @@ class Scheduler(object):
|
||||
str
|
||||
String containing the port binding graphical output
|
||||
"""
|
||||
sp_left, sp_right, total = self.get_spaces(port_bndg)
|
||||
col_widths = self.get_column_widths(port_bndg)
|
||||
header = 'Port Binding in Cycles Per Iteration:\n'
|
||||
horiz_line = '-' * 10 + '-' * total + '\n'
|
||||
horiz_line = '-' * 10 + '-' * (sum(col_widths) + len(col_widths)) + '\n'
|
||||
port_line = '| Port |'
|
||||
after_dv = 0
|
||||
for i in range(0, self.ports):
|
||||
if i == self.dv_port:
|
||||
port_line += ' ' * int(sp_left[i]) + str(i) + ' ' * int(sp_right[i]) + '-'
|
||||
port_line += ' ' * int(sp_left[i + 1] - 1) + 'DV' + ' ' * int(sp_right[i + 1]) + '|'
|
||||
after_dv = 1
|
||||
else:
|
||||
port_line += (' ' * int(sp_left[i + after_dv]) + str(i)
|
||||
+ ' ' * int(sp_right[i + after_dv]))
|
||||
port_line += '|'
|
||||
for i, port_name in enumerate(self.get_port_naming()):
|
||||
port_line += port_name.center(col_widths[i]) + '|'
|
||||
port_line += '\n'
|
||||
cyc_line = '| Cycles |'
|
||||
for i in range(len(port_bndg)):
|
||||
pipe = '|' if (i != self.dv_port) else ' '
|
||||
cyc = str(round(port_bndg[i], 2))
|
||||
cyc_line += ' {} {}'.format(cyc, pipe)
|
||||
cyc_line += '{}|'.format(str(round(port_bndg[i], 2)).center(col_widths[i]))
|
||||
cyc_line += '\n'
|
||||
binding = header + horiz_line + port_line + horiz_line + cyc_line + horiz_line
|
||||
return binding
|
||||
|
||||
def get_spaces(self, port_bndg):
|
||||
len_list = [len(str(round(x, 2))) + 1 for x in port_bndg]
|
||||
total = sum([x + 2 for x in len_list])
|
||||
sp_left = [math.ceil(x / 2) for x in len_list]
|
||||
sp_right = [math.floor(x / 2) for x in len_list]
|
||||
return sp_left, sp_right, total
|
||||
def get_column_widths(self, port_bndg):
|
||||
return [max(len(str(round(x, 2))), len(name)) + 2
|
||||
for x, name in zip(port_bndg, self.get_port_naming())]
|
||||
|
||||
def get_operand_suffix(self, instr_form):
|
||||
"""
|
||||
|
||||
@@ -5,6 +5,8 @@ import argparse
|
||||
|
||||
from osaca.testcase import Testcase
|
||||
from osaca.param import Register, MemAddr, Parameter
|
||||
#from testcase import Testcase
|
||||
#from param import Register, MemAddr, Parameter
|
||||
|
||||
|
||||
class InstrExtractor(object):
|
||||
|
||||
1026
osaca/osaca.py
1026
osaca/osaca.py
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
|
||||
|
||||
class Parameter(object):
|
||||
type_list = ['REG', 'MEM', 'IMD', 'LBL', 'NONE']
|
||||
|
||||
@@ -21,38 +24,27 @@ class MemAddr(Parameter):
|
||||
|
||||
def __init__(self, name):
|
||||
super().__init__("MEM")
|
||||
self.sreg = False
|
||||
self.offset = False
|
||||
self.base = False
|
||||
self.index = False
|
||||
self.scale = False
|
||||
if ':' in name:
|
||||
if name[1:name.index(':')].upper() not in self.segment_regs:
|
||||
raise NameError('Type not supported: '+name)
|
||||
self.sreg = True
|
||||
self.offset = True
|
||||
if '(' not in name or ('(' in name and name.index('(') != 0):
|
||||
self.offset = True
|
||||
if '(' in name:
|
||||
self.parentheses = name[name.index('(')+1:-1]
|
||||
self.commacnt = self.parentheses.count(',')
|
||||
if self.commacnt == 0:
|
||||
self.base = True
|
||||
elif self.commacnt == 1 or self.commacnt == 2 and int(self.parentheses[-1:]) == 1:
|
||||
self.base = True
|
||||
self.index = True
|
||||
elif self.commacnt == 2 and int(self.parentheses[-1:]) in self.scales:
|
||||
self.base = True
|
||||
self.index = True
|
||||
self.scale = True
|
||||
else:
|
||||
raise NameError('Type not supported: '+name)
|
||||
name = name.strip(', \t')
|
||||
self.offset = None
|
||||
self.base = None
|
||||
self.index = None
|
||||
self.scale = None
|
||||
|
||||
m = re.match(r'(?P<offset>[x0-9a-fA-F]*)\((?P<base>[^,\)]+)(?:,\s*(?P<index>[^,\)]+)'
|
||||
r'(?:,\s*(?P<scale>[^,\)]+))?)?\)', name)
|
||||
|
||||
if not m:
|
||||
raise ValueError('Type not supported: {!r}'.format(name))
|
||||
|
||||
self.offset = m.group('offset') or None
|
||||
self.base = m.group('base') or None
|
||||
self.index = m.group('index') or None
|
||||
self.scale = m.group('scale') or None
|
||||
|
||||
|
||||
def __str__(self):
|
||||
"""returns string representation"""
|
||||
mem_format = 'MEM('
|
||||
if self.sreg:
|
||||
mem_format += 'sreg:'
|
||||
if self.offset:
|
||||
mem_format += 'offset'
|
||||
if self.base and not self.index:
|
||||
|
||||
@@ -5,6 +5,7 @@ from subprocess import call
|
||||
from math import ceil
|
||||
|
||||
from osaca.param import Register, MemAddr, Parameter
|
||||
#from param import Register, MemAddr, Parameter
|
||||
|
||||
|
||||
class Testcase(object):
|
||||
|
||||
@@ -7,41 +7,41 @@ import os
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, '..')
|
||||
from osaca.osaca import OSACA
|
||||
from osaca import osaca
|
||||
|
||||
|
||||
class TestOsaca(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
@unittest.skip("Binary analysis is error prone and currently not working with FSF's objdump")
|
||||
def testIACABinary(self):
|
||||
out = StringIO()
|
||||
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
|
||||
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca', out)
|
||||
osa.inspect_with_iaca()
|
||||
result = out.getvalue()
|
||||
result = '\n'.join(result.split('\n')[-27:])
|
||||
assembly = osaca.get_assembly_from_binary(curr_dir + '/testfiles/taxCalc-ivb-iaca')
|
||||
osa = osaca.OSACA('IVB', assembly)
|
||||
result = osa.generate_text_output()
|
||||
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
|
||||
with open(curr_dir + '/test_osaca_iaca.out', encoding='utf-8') as f:
|
||||
assertion = f.read()
|
||||
self.assertEqual(assertion, result)
|
||||
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))
|
||||
|
||||
# Test ASM file with IACA marker in two lines
|
||||
def testIACAasm1(self):
|
||||
out = StringIO()
|
||||
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
|
||||
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca.S', out)
|
||||
osa.inspect_with_iaca()
|
||||
result = out.getvalue()
|
||||
result = '\n'.join(result.split('\n')[-27:])
|
||||
with open(curr_dir + '/testfiles/taxCalc-ivb-iaca.S') as f:
|
||||
osa = osaca.OSACA('IVB', f.read())
|
||||
result = osa.generate_text_output()
|
||||
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
|
||||
with open(curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f:
|
||||
assertion = f.read()
|
||||
self.assertEqual(assertion, result)
|
||||
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))
|
||||
|
||||
# Test ASM file with IACA marker in four lines
|
||||
def testIACAasm2(self):
|
||||
out = StringIO()
|
||||
curr_dir = '/'.join(os.path.realpath(__file__).split('/')[:-1])
|
||||
osa = OSACA('IVB', curr_dir + '/testfiles/taxCalc-ivb-iaca2.S', out)
|
||||
osa.inspect_with_iaca()
|
||||
result = out.getvalue()
|
||||
result = '\n'.join(result.split('\n')[-27:])
|
||||
with open(curr_dir + '/testfiles/taxCalc-ivb-iaca2.S') as f:
|
||||
osa = osaca.OSACA('IVB', f.read())
|
||||
result = osa.generate_text_output()
|
||||
result = result[result.find('Port Binding in Cycles Per Iteration:'):]
|
||||
with open(curr_dir + '/test_osaca_iaca_asm.out', encoding='utf-8') as f:
|
||||
assertion = f.read()
|
||||
self.assertEqual(assertion, result)
|
||||
self.assertEqual(assertion.replace(' ', ''), result.replace(' ', ''))
|
||||
|
||||
@@ -9,18 +9,18 @@ Port Binding in Cycles Per Iteration:
|
||||
Ports Pressure in cycles
|
||||
| 0 | 1 | 2 | 3 | 4 | 5 |
|
||||
-------------------------------------------
|
||||
| 0.50 | 0.50 | | | | | lea 0x1(%rax,%rax,1),%edx
|
||||
| | 1.00 | | | | 1.00 | vcvtsi2ss %edx,%xmm2,%xmm2
|
||||
| 1.00 | | | | | | vmulss %xmm2,%xmm0,%xmm3
|
||||
| 0.50 | 0.50 | | | | | lea 0x2(%rax,%rax,1),%ecx
|
||||
| | 1.00 | | | | | vaddss %xmm3,%xmm1,%xmm4
|
||||
| | | | | | 1.00 | vxorps %xmm1,%xmm1,%xmm1
|
||||
| | 1.00 | | | | 1.00 | vcvtsi2ss %ecx,%xmm1,%xmm1
|
||||
| 1.00 | | | | | | vmulss %xmm1,%xmm0,%xmm5
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm4,0x4(%rsp,%rax,8)
|
||||
| | 1.00 | | | | | vaddss %xmm5,%xmm4,%xmm1
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm1,0x8(%rsp,%rax,8)
|
||||
| 0.33 | 0.33 | | | | 0.33 | inc %rax
|
||||
| 0.33 | 0.33 | | | | 0.33 | cmp $0x1f3,%rax
|
||||
| | | | | | | jb 400bc2 <main+0x62>
|
||||
| 0.50 | 0.50 | | | | | lea 1(%rax,%rax),%edx
|
||||
| | 1.00 | | | | 1.00 | vcvtsi2ss %edx,%xmm2,%xmm2
|
||||
| 1.00 | | | | | | vmulss %xmm2,%xmm0,%xmm3
|
||||
| 0.50 | 0.50 | | | | | lea 2(%rax,%rax),%ecx
|
||||
| | 1.00 | | | | | vaddss %xmm3,%xmm1,%xmm4
|
||||
| | | | | | 1.00 | vxorps %xmm1,%xmm1,%xmm1
|
||||
| | 1.00 | | | | 1.00 | vcvtsi2ss %ecx,%xmm1,%xmm1
|
||||
| 1.00 | | | | | | vmulss %xmm1,%xmm0,%xmm5
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm4,4(%rsp,%rax,8)
|
||||
| | 1.00 | | | | | vaddss %xmm5,%xmm4,%xmm1
|
||||
| | | 0.50 | 0.50 | 1.00 | | vmovss %xmm1,8(%rsp,%rax,8)
|
||||
| 0.33 | 0.33 | | | | 0.33 | inc %rax
|
||||
| 0.33 | 0.33 | | | | 0.33 | cmp $499,%rax
|
||||
| | | | | | | X jb main_98
|
||||
Total number of estimated throughput: 5.67
|
||||
|
||||
Reference in New Issue
Block a user