mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 00:50:06 +01:00
applied flake8 and black rules
This commit is contained in:
@@ -7,7 +7,8 @@ import re
|
|||||||
def __read(*names, **kwargs):
|
def __read(*names, **kwargs):
|
||||||
"""Reads in file"""
|
"""Reads in file"""
|
||||||
with io.open(
|
with io.open(
|
||||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
os.path.join(os.path.dirname(__file__), *names),
|
||||||
|
encoding=kwargs.get("encoding", "utf8"),
|
||||||
) as fp:
|
) as fp:
|
||||||
return fp.read()
|
return fp.read()
|
||||||
|
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
|||||||
|
|
||||||
comment = None
|
comment = None
|
||||||
if load:
|
if load:
|
||||||
if 'ymm' in operand_types:
|
if "ymm" in operand_types:
|
||||||
port2D3D_pressure = 2
|
port2D3D_pressure = 2
|
||||||
else:
|
else:
|
||||||
port2D3D_pressure = 1
|
port2D3D_pressure = 1
|
||||||
@@ -96,7 +96,7 @@ class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder):
|
|||||||
latency += 4
|
latency += 4
|
||||||
comment = "with load"
|
comment = "with load"
|
||||||
if store:
|
if store:
|
||||||
if 'ymm' in operand_types:
|
if "ymm" in operand_types:
|
||||||
port4_pressure = 2
|
port4_pressure = 2
|
||||||
else:
|
else:
|
||||||
port4_pressure = 1
|
port4_pressure = 1
|
||||||
@@ -716,14 +716,14 @@ skx_mov_instructions = list(
|
|||||||
# ('movapd xmm xmm', ('1*p5', 1)),
|
# ('movapd xmm xmm', ('1*p5', 1)),
|
||||||
# ('vmovapd xmm xmm', ('1*p5', 1)),
|
# ('vmovapd xmm xmm', ('1*p5', 1)),
|
||||||
# ('vmovapd ymm ymm', ('1*p5', 1)),
|
# ('vmovapd ymm ymm', ('1*p5', 1)),
|
||||||
('vmovapd zmm zmm', ('', 0)),
|
("vmovapd zmm zmm", ("", 0)),
|
||||||
# https://www.felixcloutier.com/x86/movaps
|
# https://www.felixcloutier.com/x86/movaps
|
||||||
# TODO with masking!
|
# TODO with masking!
|
||||||
# TODO the following may eliminate or be bound to 1*p0156:
|
# TODO the following may eliminate or be bound to 1*p0156:
|
||||||
# ('movaps xmm xmm', ('1*p5', 1)),
|
# ('movaps xmm xmm', ('1*p5', 1)),
|
||||||
# ('vmovaps xmm xmm', ('1*p5', 1)),
|
# ('vmovaps xmm xmm', ('1*p5', 1)),
|
||||||
# ('vmovaps ymm ymm', ('1*p5', 1)),
|
# ('vmovaps ymm ymm', ('1*p5', 1)),
|
||||||
('vmovaps zmm zmm', ('', 0)),
|
("vmovaps zmm zmm", ("", 0)),
|
||||||
# https://www.felixcloutier.com/x86/movbe
|
# https://www.felixcloutier.com/x86/movbe
|
||||||
("movbe gpr mem", ("1*p15", 4)),
|
("movbe gpr mem", ("1*p15", 4)),
|
||||||
("movbe mem gpr", ("1*p15", 4)),
|
("movbe mem gpr", ("1*p15", 4)),
|
||||||
|
|||||||
@@ -140,9 +140,11 @@ def extract_model(tree, arch, skip_mem=True):
|
|||||||
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
|
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
|
||||||
continue
|
continue
|
||||||
# skip if measured TP is smaller than computed
|
# skip if measured TP is smaller than computed
|
||||||
if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]),
|
if [
|
||||||
float(x.attrib["TP_unrolled"]))
|
float(x.attrib["TP_ports"])
|
||||||
for x in arch_tag.findall("measurement")][0]:
|
> min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
|
||||||
|
for x in arch_tag.findall("measurement")
|
||||||
|
][0]:
|
||||||
print(
|
print(
|
||||||
"Calculated TP is greater than measured TP.",
|
"Calculated TP is greater than measured TP.",
|
||||||
iform,
|
iform,
|
||||||
@@ -160,13 +162,15 @@ def extract_model(tree, arch, skip_mem=True):
|
|||||||
throughput = float(measurement_tag.attrib["TP_ports"])
|
throughput = float(measurement_tag.attrib["TP_ports"])
|
||||||
else:
|
else:
|
||||||
throughput = min(
|
throughput = min(
|
||||||
measurement_tag.attrib.get("TP_loop", float('inf')),
|
measurement_tag.attrib.get("TP_loop", float("inf")),
|
||||||
measurement_tag.attrib.get("TP_unroll", float('inf')),
|
measurement_tag.attrib.get("TP_unroll", float("inf")),
|
||||||
measurement_tag.attrib.get("TP", float('inf')),
|
measurement_tag.attrib.get("TP", float("inf")),
|
||||||
)
|
)
|
||||||
if throughput == float('inf'):
|
if throughput == float("inf"):
|
||||||
throughput = None
|
throughput = None
|
||||||
uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
uops = (
|
||||||
|
int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
||||||
|
)
|
||||||
if "ports" in measurement_tag.attrib:
|
if "ports" in measurement_tag.attrib:
|
||||||
port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
|
port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
|
||||||
latencies = [
|
latencies = [
|
||||||
@@ -202,7 +206,11 @@ def extract_model(tree, arch, skip_mem=True):
|
|||||||
# Check if all are equal
|
# Check if all are equal
|
||||||
if port_pressure:
|
if port_pressure:
|
||||||
if port_pressure[1:] != port_pressure[:-1]:
|
if port_pressure[1:] != port_pressure[:-1]:
|
||||||
print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
|
print(
|
||||||
|
"Contradicting port occupancies, using latest IACA:",
|
||||||
|
iform,
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
port_pressure = port_pressure[-1]
|
port_pressure = port_pressure[-1]
|
||||||
else:
|
else:
|
||||||
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
||||||
@@ -222,9 +230,11 @@ def extract_model(tree, arch, skip_mem=True):
|
|||||||
port_4 = True
|
port_4 = True
|
||||||
# Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
|
# Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
|
||||||
if port_23 and not port_4:
|
if port_23 and not port_4:
|
||||||
if arch.upper() in ["SNB", "IVB"] and any(
|
if (
|
||||||
[p.get('name', '') == 'ymm' for p in parameters]) and \
|
arch.upper() in ["SNB", "IVB"]
|
||||||
not '128' in mnemonic:
|
and any([p.get("name", "") == "ymm" for p in parameters])
|
||||||
|
and not ("128" in mnemonic)
|
||||||
|
):
|
||||||
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
||||||
# instruction name
|
# instruction name
|
||||||
port2D3D_pressure = 2
|
port2D3D_pressure = 2
|
||||||
|
|||||||
@@ -125,7 +125,10 @@ def _get_asmbench_output(input_data, isa):
|
|||||||
db_entries = {}
|
db_entries = {}
|
||||||
for i in range(0, len(input_data), 4):
|
for i in range(0, len(input_data), 4):
|
||||||
if input_data[i + 3].strip() != "":
|
if input_data[i + 3].strip() != "":
|
||||||
print("asmbench output not in the correct format! Format must be: ", file=sys.stderr)
|
print(
|
||||||
|
"asmbench output not in the correct format! Format must be: ",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
print(
|
print(
|
||||||
"-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n"
|
"-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n"
|
||||||
"Throughput: Y cycles\n\n-------------",
|
"Throughput: Y cycles\n\n-------------",
|
||||||
@@ -540,7 +543,16 @@ def _get_sanity_report(
|
|||||||
|
|
||||||
|
|
||||||
def _get_sanity_report_verbose(
|
def _get_sanity_report_verbose(
|
||||||
total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, bad_operands, colors=False
|
total,
|
||||||
|
m_tp,
|
||||||
|
m_l,
|
||||||
|
m_pp,
|
||||||
|
suspic_instr,
|
||||||
|
dup_arch,
|
||||||
|
dup_isa,
|
||||||
|
only_isa,
|
||||||
|
bad_operands,
|
||||||
|
colors=False,
|
||||||
):
|
):
|
||||||
"""Get the verbose part of the sanity report with all missing instruction forms."""
|
"""Get the verbose part of the sanity report with all missing instruction forms."""
|
||||||
BRIGHT_CYAN = "\033[1;36;1m" if colors else ""
|
BRIGHT_CYAN = "\033[1;36;1m" if colors else ""
|
||||||
|
|||||||
@@ -202,7 +202,12 @@ class Frontend(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def combined_view(
|
def combined_view(
|
||||||
self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True
|
self,
|
||||||
|
kernel,
|
||||||
|
cp_kernel: KernelDG,
|
||||||
|
dep_dict,
|
||||||
|
ignore_unknown=False,
|
||||||
|
show_cmnts=True,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Build combined view of kernel including port pressure (TP), a CP column and a
|
Build combined view of kernel including port pressure (TP), a CP column and a
|
||||||
@@ -238,8 +243,8 @@ class Frontend(object):
|
|||||||
lcd_sum = 0.0
|
lcd_sum = 0.0
|
||||||
lcd_lines = {}
|
lcd_lines = {}
|
||||||
if dep_dict:
|
if dep_dict:
|
||||||
longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency'])
|
longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"])
|
||||||
lcd_sum = dep_dict[longest_lcd]['latency']
|
lcd_sum = dep_dict[longest_lcd]["latency"]
|
||||||
lcd_lines = {
|
lcd_lines = {
|
||||||
instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
|
instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,13 @@ from functools import lru_cache
|
|||||||
from osaca.db_interface import import_benchmark_output, sanity_check
|
from osaca.db_interface import import_benchmark_output, sanity_check
|
||||||
from osaca.frontend import Frontend
|
from osaca.frontend import Frontend
|
||||||
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
|
from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
|
||||||
from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel, reduce_to_section
|
from osaca.semantics import (
|
||||||
|
INSTR_FLAGS,
|
||||||
|
ArchSemantics,
|
||||||
|
KernelDG,
|
||||||
|
MachineModel,
|
||||||
|
reduce_to_section,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
SUPPORTED_ARCHS = [
|
SUPPORTED_ARCHS = [
|
||||||
@@ -37,7 +43,8 @@ DEFAULT_ARCHS = {
|
|||||||
def __read(*names, **kwargs):
|
def __read(*names, **kwargs):
|
||||||
"""Reads in file"""
|
"""Reads in file"""
|
||||||
with io.open(
|
with io.open(
|
||||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
os.path.join(os.path.dirname(__file__), *names),
|
||||||
|
encoding=kwargs.get("encoding", "utf8"),
|
||||||
) as fp:
|
) as fp:
|
||||||
return fp.read()
|
return fp.read()
|
||||||
|
|
||||||
@@ -79,7 +86,10 @@ def create_parser(parser=None):
|
|||||||
|
|
||||||
# Add arguments
|
# Add arguments
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-V", "--version", action="version", version="%(prog)s " + __find_version("__init__.py")
|
"-V",
|
||||||
|
"--version",
|
||||||
|
action="version",
|
||||||
|
version="%(prog)s " + __find_version("__init__.py"),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--arch",
|
"--arch",
|
||||||
@@ -167,7 +177,9 @@ def create_parser(parser=None):
|
|||||||
help="Write analysis to this file (default to stdout).",
|
help="Write analysis to this file (default to stdout).",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"file", type=argparse.FileType("r"), help="Path to object (ASM or instruction file)."
|
"file",
|
||||||
|
type=argparse.FileType("r"),
|
||||||
|
help="Path to object (ASM or instruction file).",
|
||||||
)
|
)
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
@@ -347,7 +359,10 @@ def run(args, output_file=sys.stdout):
|
|||||||
# Sanity check on DB
|
# Sanity check on DB
|
||||||
verbose = True if args.verbose > 0 else False
|
verbose = True if args.verbose > 0 else False
|
||||||
sanity_check(
|
sanity_check(
|
||||||
args.arch, verbose=verbose, internet_check=args.internet_check, output_file=output_file
|
args.arch,
|
||||||
|
verbose=verbose,
|
||||||
|
internet_check=args.internet_check,
|
||||||
|
output_file=output_file,
|
||||||
)
|
)
|
||||||
elif "import_data" in args:
|
elif "import_data" in args:
|
||||||
# Import microbench output file into DB
|
# Import microbench output file into DB
|
||||||
|
|||||||
@@ -26,9 +26,9 @@ class ParserAArch64(BaseParser):
|
|||||||
pp.ZeroOrMore(pp.Word(pp.printables))
|
pp.ZeroOrMore(pp.Word(pp.printables))
|
||||||
).setResultsName(self.COMMENT_ID)
|
).setResultsName(self.COMMENT_ID)
|
||||||
# Define ARM assembly identifier
|
# Define ARM assembly identifier
|
||||||
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
|
decimal_number = pp.Combine(
|
||||||
"value"
|
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
|
||||||
)
|
).setResultsName("value")
|
||||||
hex_number = pp.Combine(pp.Literal("0x") + pp.Word(pp.hexnums)).setResultsName("value")
|
hex_number = pp.Combine(pp.Literal("0x") + pp.Word(pp.hexnums)).setResultsName("value")
|
||||||
relocation = pp.Combine(pp.Literal(":") + pp.Word(pp.alphanums + "_") + pp.Literal(":"))
|
relocation = pp.Combine(pp.Literal(":") + pp.Word(pp.alphanums + "_") + pp.Literal(":"))
|
||||||
first = pp.Word(pp.alphas + "_.", exact=1)
|
first = pp.Word(pp.alphas + "_.", exact=1)
|
||||||
@@ -152,7 +152,9 @@ class ParserAArch64(BaseParser):
|
|||||||
pp.Literal("{")
|
pp.Literal("{")
|
||||||
+ (
|
+ (
|
||||||
pp.delimitedList(pp.Combine(self.list_element), delim=",").setResultsName("list")
|
pp.delimitedList(pp.Combine(self.list_element), delim=",").setResultsName("list")
|
||||||
^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName("range")
|
^ pp.delimitedList(pp.Combine(self.list_element), delim="-").setResultsName(
|
||||||
|
"range"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
+ pp.Literal("}")
|
+ pp.Literal("}")
|
||||||
+ pp.Optional(index)
|
+ pp.Optional(index)
|
||||||
@@ -256,9 +258,7 @@ class ParserAArch64(BaseParser):
|
|||||||
# 2. Parse label
|
# 2. Parse label
|
||||||
if result is None:
|
if result is None:
|
||||||
try:
|
try:
|
||||||
result = self.process_operand(
|
result = self.process_operand(self.label.parseString(line, parseAll=True).asDict())
|
||||||
self.label.parseString(line, parseAll=True).asDict()
|
|
||||||
)
|
|
||||||
result = AttrDict.convert_dict(result)
|
result = AttrDict.convert_dict(result)
|
||||||
instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
|
instruction_form[self.LABEL_ID] = result[self.LABEL_ID].name
|
||||||
if self.COMMENT_ID in result[self.LABEL_ID]:
|
if self.COMMENT_ID in result[self.LABEL_ID]:
|
||||||
@@ -293,7 +293,9 @@ class ParserAArch64(BaseParser):
|
|||||||
try:
|
try:
|
||||||
result = self.parse_instruction(line)
|
result = self.parse_instruction(line)
|
||||||
except (pp.ParseException, KeyError) as e:
|
except (pp.ParseException, KeyError) as e:
|
||||||
raise ValueError("Unable to parse {!r} on line {}".format(line, line_number)) from e
|
raise ValueError(
|
||||||
|
"Unable to parse {!r} on line {}".format(line, line_number)
|
||||||
|
) from e
|
||||||
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
|
instruction_form[self.INSTRUCTION_ID] = result[self.INSTRUCTION_ID]
|
||||||
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
|
instruction_form[self.OPERANDS_ID] = result[self.OPERANDS_ID]
|
||||||
instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
|
instruction_form[self.COMMENT_ID] = result[self.COMMENT_ID]
|
||||||
@@ -390,9 +392,9 @@ class ParserAArch64(BaseParser):
|
|||||||
new_dict["pre_indexed"] = True
|
new_dict["pre_indexed"] = True
|
||||||
if "post_indexed" in memory_address:
|
if "post_indexed" in memory_address:
|
||||||
if "value" in memory_address["post_indexed"]:
|
if "value" in memory_address["post_indexed"]:
|
||||||
new_dict["post_indexed"] = {"value": int(
|
new_dict["post_indexed"] = {
|
||||||
memory_address["post_indexed"]["value"], 0
|
"value": int(memory_address["post_indexed"]["value"], 0)
|
||||||
)}
|
}
|
||||||
else:
|
else:
|
||||||
new_dict["post_indexed"] = memory_address["post_indexed"]
|
new_dict["post_indexed"] = memory_address["post_indexed"]
|
||||||
return AttrDict({self.MEMORY_ID: new_dict})
|
return AttrDict({self.MEMORY_ID: new_dict})
|
||||||
@@ -408,27 +410,27 @@ class ParserAArch64(BaseParser):
|
|||||||
Resolve range or list register operand to list of registers.
|
Resolve range or list register operand to list of registers.
|
||||||
Returns None if neither list nor range
|
Returns None if neither list nor range
|
||||||
"""
|
"""
|
||||||
if 'register' in operand:
|
if "register" in operand:
|
||||||
if 'list' in operand.register:
|
if "list" in operand.register:
|
||||||
index = operand.register.get('index')
|
index = operand.register.get("index")
|
||||||
range_list = []
|
range_list = []
|
||||||
for reg in operand.register.list:
|
for reg in operand.register.list:
|
||||||
reg = deepcopy(reg)
|
reg = deepcopy(reg)
|
||||||
if index is not None:
|
if index is not None:
|
||||||
reg['index'] = int(index, 0)
|
reg["index"] = int(index, 0)
|
||||||
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
||||||
return range_list
|
return range_list
|
||||||
elif 'range' in operand.register:
|
elif "range" in operand.register:
|
||||||
base_register = operand.register.range[0]
|
base_register = operand.register.range[0]
|
||||||
index = operand.register.get('index')
|
index = operand.register.get("index")
|
||||||
range_list = []
|
range_list = []
|
||||||
start_name = base_register.name
|
start_name = base_register.name
|
||||||
end_name = operand.register.range[1].name
|
end_name = operand.register.range[1].name
|
||||||
for name in range(int(start_name), int(end_name) + 1):
|
for name in range(int(start_name), int(end_name) + 1):
|
||||||
reg = deepcopy(base_register)
|
reg = deepcopy(base_register)
|
||||||
if index is not None:
|
if index is not None:
|
||||||
reg['index'] = int(index, 0)
|
reg["index"] = int(index, 0)
|
||||||
reg['name'] = str(name)
|
reg["name"] = str(name)
|
||||||
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
range_list.append(AttrDict({self.REGISTER_ID: reg}))
|
||||||
return range_list
|
return range_list
|
||||||
# neither register list nor range, return unmodified
|
# neither register list nor range, return unmodified
|
||||||
@@ -482,10 +484,12 @@ class ParserAArch64(BaseParser):
|
|||||||
return AttrDict({self.IMMEDIATE_ID: immediate})
|
return AttrDict({self.IMMEDIATE_ID: immediate})
|
||||||
else:
|
else:
|
||||||
# change 'mantissa' key to 'value'
|
# change 'mantissa' key to 'value'
|
||||||
return AttrDict({
|
return AttrDict(
|
||||||
self.IMMEDIATE_ID: AttrDict({
|
{
|
||||||
"value": immediate[dict_name]["mantissa"],
|
self.IMMEDIATE_ID: AttrDict(
|
||||||
"type": dict_name})}
|
{"value": immediate[dict_name]["mantissa"], "type": dict_name}
|
||||||
|
)
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def process_label(self, label):
|
def process_label(self, label):
|
||||||
|
|||||||
@@ -23,9 +23,9 @@ class ParserX86ATT(BaseParser):
|
|||||||
|
|
||||||
def construct_parser(self):
|
def construct_parser(self):
|
||||||
"""Create parser for ARM AArch64 ISA."""
|
"""Create parser for ARM AArch64 ISA."""
|
||||||
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)).setResultsName(
|
decimal_number = pp.Combine(
|
||||||
"value"
|
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums)
|
||||||
)
|
).setResultsName("value")
|
||||||
hex_number = pp.Combine(
|
hex_number = pp.Combine(
|
||||||
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
|
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
|
||||||
).setResultsName("value")
|
).setResultsName("value")
|
||||||
@@ -41,7 +41,8 @@ class ParserX86ATT(BaseParser):
|
|||||||
identifier = pp.Group(
|
identifier = pp.Group(
|
||||||
pp.Optional(id_offset).setResultsName("offset")
|
pp.Optional(id_offset).setResultsName("offset")
|
||||||
+ pp.Combine(
|
+ pp.Combine(
|
||||||
pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"), joinString="::"
|
pp.delimitedList(pp.Combine(first + pp.Optional(rest)), delim="::"),
|
||||||
|
joinString="::",
|
||||||
).setResultsName("name")
|
).setResultsName("name")
|
||||||
+ pp.Optional(relocation).setResultsName("relocation")
|
+ pp.Optional(relocation).setResultsName("relocation")
|
||||||
).setResultsName("identifier")
|
).setResultsName("identifier")
|
||||||
@@ -443,7 +444,12 @@ class ParserX86ATT(BaseParser):
|
|||||||
"""Check if register is a vector register"""
|
"""Check if register is a vector register"""
|
||||||
if register is None:
|
if register is None:
|
||||||
return False
|
return False
|
||||||
if register["name"].rstrip(string.digits).lower() in ["mm", "xmm", "ymm", "zmm"]:
|
if register["name"].rstrip(string.digits).lower() in [
|
||||||
|
"mm",
|
||||||
|
"xmm",
|
||||||
|
"ymm",
|
||||||
|
"zmm",
|
||||||
|
]:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@@ -47,7 +47,9 @@ class ArchSemantics(ISASemantics):
|
|||||||
indices = [port_list.index(p) for p in ports]
|
indices = [port_list.index(p) for p in ports]
|
||||||
# check if port sum of used ports for uop are unbalanced
|
# check if port sum of used ports for uop are unbalanced
|
||||||
port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
|
port_sums = self._to_list(itemgetter(*indices)(self.get_throughput_sum(kernel)))
|
||||||
instr_ports = self._to_list(itemgetter(*indices)(instruction_form["port_pressure"]))
|
instr_ports = self._to_list(
|
||||||
|
itemgetter(*indices)(instruction_form["port_pressure"])
|
||||||
|
)
|
||||||
if len(set(port_sums)) > 1:
|
if len(set(port_sums)) > 1:
|
||||||
# balance ports
|
# balance ports
|
||||||
# init list for keeping track of the current change
|
# init list for keeping track of the current change
|
||||||
@@ -270,7 +272,8 @@ class ArchSemantics(ISASemantics):
|
|||||||
reg_type
|
reg_type
|
||||||
]
|
]
|
||||||
st_data_port_pressure = [
|
st_data_port_pressure = [
|
||||||
pp * multiplier for pp in st_data_port_pressure]
|
pp * multiplier for pp in st_data_port_pressure
|
||||||
|
]
|
||||||
data_port_pressure = [
|
data_port_pressure = [
|
||||||
sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
|
sum(x) for x in zip(data_port_pressure, st_data_port_pressure)
|
||||||
]
|
]
|
||||||
@@ -343,7 +346,9 @@ class ArchSemantics(ISASemantics):
|
|||||||
def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
|
def _handle_instruction_found(self, instruction_data, port_number, instruction_form, flags):
|
||||||
"""Apply performance data to instruction if it was found in the archDB"""
|
"""Apply performance data to instruction if it was found in the archDB"""
|
||||||
throughput = instruction_data["throughput"]
|
throughput = instruction_data["throughput"]
|
||||||
port_pressure = self._machine_model.average_port_pressure(instruction_data["port_pressure"])
|
port_pressure = self._machine_model.average_port_pressure(
|
||||||
|
instruction_data["port_pressure"]
|
||||||
|
)
|
||||||
instruction_form["port_uops"] = instruction_data["port_pressure"]
|
instruction_form["port_uops"] = instruction_data["port_pressure"]
|
||||||
try:
|
try:
|
||||||
assert isinstance(port_pressure, list)
|
assert isinstance(port_pressure, list)
|
||||||
|
|||||||
@@ -1,20 +1,19 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
from collections import defaultdict
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from itertools import product
|
from itertools import product
|
||||||
import hashlib
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
import ruamel.yaml
|
import ruamel.yaml
|
||||||
from ruamel.yaml.compat import StringIO
|
|
||||||
|
|
||||||
from osaca import __version__, utils
|
from osaca import __version__, utils
|
||||||
from osaca.parser import ParserX86ATT
|
from osaca.parser import ParserX86ATT
|
||||||
|
from ruamel.yaml.compat import StringIO
|
||||||
|
|
||||||
|
|
||||||
class MachineModel(object):
|
class MachineModel(object):
|
||||||
@@ -37,7 +36,13 @@ class MachineModel(object):
|
|||||||
"hidden_loads": None,
|
"hidden_loads": None,
|
||||||
"load_latency": {},
|
"load_latency": {},
|
||||||
"load_throughput": [
|
"load_throughput": [
|
||||||
{"base": b, "index": i, "offset": o, "scale": s, "port_pressure": []}
|
{
|
||||||
|
"base": b,
|
||||||
|
"index": i,
|
||||||
|
"offset": o,
|
||||||
|
"scale": s,
|
||||||
|
"port_pressure": [],
|
||||||
|
}
|
||||||
for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
|
for b, i, o, s in product(["gpr"], ["gpr", None], ["imd", None], [1, 8])
|
||||||
],
|
],
|
||||||
"load_throughput_default": [],
|
"load_throughput_default": [],
|
||||||
@@ -128,7 +133,8 @@ class MachineModel(object):
|
|||||||
instruction_form
|
instruction_form
|
||||||
for instruction_form in name_matched_iforms
|
for instruction_form in name_matched_iforms
|
||||||
if self._match_operands(
|
if self._match_operands(
|
||||||
instruction_form["operands"] if "operands" in instruction_form else [], operands
|
instruction_form["operands"] if "operands" in instruction_form else [],
|
||||||
|
operands,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
@@ -150,7 +156,13 @@ class MachineModel(object):
|
|||||||
return average_pressure
|
return average_pressure
|
||||||
|
|
||||||
def set_instruction(
|
def set_instruction(
|
||||||
self, name, operands=None, latency=None, port_pressure=None, throughput=None, uops=None
|
self,
|
||||||
|
name,
|
||||||
|
operands=None,
|
||||||
|
latency=None,
|
||||||
|
port_pressure=None,
|
||||||
|
throughput=None,
|
||||||
|
uops=None,
|
||||||
):
|
):
|
||||||
"""Import instruction form information."""
|
"""Import instruction form information."""
|
||||||
# If it already exists. Overwrite information.
|
# If it already exists. Overwrite information.
|
||||||
@@ -500,7 +512,11 @@ class MachineModel(object):
|
|||||||
"""Check if the types of operand ``i_operand`` and ``operand`` match."""
|
"""Check if the types of operand ``i_operand`` and ``operand`` match."""
|
||||||
# check for wildcard
|
# check for wildcard
|
||||||
if self.WILDCARD in operand:
|
if self.WILDCARD in operand:
|
||||||
if "class" in i_operand and i_operand["class"] == "register" or "register" in i_operand:
|
if (
|
||||||
|
"class" in i_operand
|
||||||
|
and i_operand["class"] == "register"
|
||||||
|
or "register" in i_operand
|
||||||
|
):
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
@@ -527,20 +543,27 @@ class MachineModel(object):
|
|||||||
return self._is_AArch64_mem_type(i_operand, operand["memory"])
|
return self._is_AArch64_mem_type(i_operand, operand["memory"])
|
||||||
# immediate
|
# immediate
|
||||||
if i_operand["class"] == "immediate" and i_operand["imd"] == self.WILDCARD:
|
if i_operand["class"] == "immediate" and i_operand["imd"] == self.WILDCARD:
|
||||||
return "value" in operand or \
|
return "value" in operand or (
|
||||||
("immediate" in operand and "value" in operand["immediate"])
|
"immediate" in operand and "value" in operand["immediate"]
|
||||||
|
)
|
||||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "int":
|
if i_operand["class"] == "immediate" and i_operand["imd"] == "int":
|
||||||
return ("value" in operand and operand.get("type", None) == "int") or \
|
return ("value" in operand and operand.get("type", None) == "int") or (
|
||||||
("immediate" in operand and "value" in operand["immediate"] and
|
"immediate" in operand
|
||||||
operand["immediate"].get("type", None) == "int")
|
and "value" in operand["immediate"]
|
||||||
|
and operand["immediate"].get("type", None) == "int"
|
||||||
|
)
|
||||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "float":
|
if i_operand["class"] == "immediate" and i_operand["imd"] == "float":
|
||||||
return ("float" in operand and operand.get("type", None) == "float") or \
|
return ("float" in operand and operand.get("type", None) == "float") or (
|
||||||
("immediate" in operand and "float" in operand["immediate"] and
|
"immediate" in operand
|
||||||
operand["immediate"].get("type", None) == "float")
|
and "float" in operand["immediate"]
|
||||||
|
and operand["immediate"].get("type", None) == "float"
|
||||||
|
)
|
||||||
if i_operand["class"] == "immediate" and i_operand["imd"] == "double":
|
if i_operand["class"] == "immediate" and i_operand["imd"] == "double":
|
||||||
return ("double" in operand and operand.get("type", None) == "double") or \
|
return ("double" in operand and operand.get("type", None) == "double") or (
|
||||||
("immediate" in operand and "double" in operand["immediate"] and
|
"immediate" in operand
|
||||||
operand["immediate"].get("type", None) == "double")
|
and "double" in operand["immediate"]
|
||||||
|
and operand["immediate"].get("type", None) == "double"
|
||||||
|
)
|
||||||
# identifier
|
# identifier
|
||||||
if "identifier" in operand or (
|
if "identifier" in operand or (
|
||||||
"immediate" in operand and "identifier" in operand["immediate"]
|
"immediate" in operand and "identifier" in operand["immediate"]
|
||||||
@@ -577,7 +600,10 @@ class MachineModel(object):
|
|||||||
def _compare_db_entries(self, operand_1, operand_2):
|
def _compare_db_entries(self, operand_1, operand_2):
|
||||||
"""Check if operand types in DB format (i.e., not parsed) match."""
|
"""Check if operand types in DB format (i.e., not parsed) match."""
|
||||||
operand_attributes = list(
|
operand_attributes = list(
|
||||||
filter(lambda x: True if x != "source" and x != "destination" else False, operand_1)
|
filter(
|
||||||
|
lambda x: True if x != "source" and x != "destination" else False,
|
||||||
|
operand_1,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
for key in operand_attributes:
|
for key in operand_attributes:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from copy import deepcopy
|
|
||||||
|
|
||||||
from osaca import utils
|
from osaca import utils
|
||||||
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
||||||
@@ -100,40 +99,51 @@ class ISASemantics(object):
|
|||||||
# post-process pre- and post-indexing for aarch64 memory operands
|
# post-process pre- and post-indexing for aarch64 memory operands
|
||||||
if self._isa == "aarch64":
|
if self._isa == "aarch64":
|
||||||
for operand in [op for op in op_dict["source"] if "memory" in op]:
|
for operand in [op for op in op_dict["source"] if "memory" in op]:
|
||||||
post_indexed = ("post_indexed" in operand["memory"] and
|
post_indexed = (
|
||||||
operand["memory"]["post_indexed"])
|
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||||
pre_indexed = ("pre_indexed" in operand["memory"] and
|
)
|
||||||
operand["memory"]["pre_indexed"])
|
pre_indexed = (
|
||||||
|
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||||
|
)
|
||||||
if post_indexed or pre_indexed:
|
if post_indexed or pre_indexed:
|
||||||
op_dict["src_dst"].append(
|
op_dict["src_dst"].append(
|
||||||
AttrDict.convert_dict({
|
AttrDict.convert_dict(
|
||||||
"register": operand["memory"]["base"],
|
{
|
||||||
"pre_indexed": pre_indexed,
|
"register": operand["memory"]["base"],
|
||||||
"post_indexed": post_indexed})
|
"pre_indexed": pre_indexed,
|
||||||
|
"post_indexed": post_indexed,
|
||||||
|
}
|
||||||
|
)
|
||||||
)
|
)
|
||||||
for operand in [op for op in op_dict["destination"] if "memory" in op]:
|
for operand in [op for op in op_dict["destination"] if "memory" in op]:
|
||||||
post_indexed = ("post_indexed" in operand["memory"] and
|
post_indexed = (
|
||||||
operand["memory"]["post_indexed"])
|
"post_indexed" in operand["memory"] and operand["memory"]["post_indexed"]
|
||||||
pre_indexed = ("pre_indexed" in operand["memory"] and
|
)
|
||||||
operand["memory"]["pre_indexed"])
|
pre_indexed = (
|
||||||
|
"pre_indexed" in operand["memory"] and operand["memory"]["pre_indexed"]
|
||||||
|
)
|
||||||
if post_indexed or pre_indexed:
|
if post_indexed or pre_indexed:
|
||||||
op_dict["src_dst"].append(
|
op_dict["src_dst"].append(
|
||||||
AttrDict.convert_dict({
|
AttrDict.convert_dict(
|
||||||
"register": operand["memory"]["base"],
|
{
|
||||||
"pre_indexed": pre_indexed,
|
"register": operand["memory"]["base"],
|
||||||
"post_indexed": post_indexed})
|
"pre_indexed": pre_indexed,
|
||||||
|
"post_indexed": post_indexed,
|
||||||
|
}
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# store operand list in dict and reassign operand key/value pair
|
# store operand list in dict and reassign operand key/value pair
|
||||||
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
|
instruction_form["semantic_operands"] = AttrDict.convert_dict(op_dict)
|
||||||
# assign LD/ST flags
|
# assign LD/ST flags
|
||||||
instruction_form["flags"] = instruction_form["flags"] if "flags" in instruction_form else []
|
instruction_form["flags"] = (
|
||||||
|
instruction_form["flags"] if "flags" in instruction_form else []
|
||||||
|
)
|
||||||
if self._has_load(instruction_form):
|
if self._has_load(instruction_form):
|
||||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
|
instruction_form["flags"] += [INSTR_FLAGS.HAS_LD]
|
||||||
if self._has_store(instruction_form):
|
if self._has_store(instruction_form):
|
||||||
instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
|
instruction_form["flags"] += [INSTR_FLAGS.HAS_ST]
|
||||||
|
|
||||||
|
|
||||||
def get_reg_changes(self, instruction_form, only_postindexed=False):
|
def get_reg_changes(self, instruction_form, only_postindexed=False):
|
||||||
"""
|
"""
|
||||||
Returns register changes, as dict, for insruction_form, based on operation defined in isa.
|
Returns register changes, as dict, for insruction_form, based on operation defined in isa.
|
||||||
@@ -141,12 +151,16 @@ class ISASemantics(object):
|
|||||||
Empty dict if no changes of registers occured. None for registers with unknown changes.
|
Empty dict if no changes of registers occured. None for registers with unknown changes.
|
||||||
If only_postindexed is True, only considers changes due to post_indexed memory references.
|
If only_postindexed is True, only considers changes due to post_indexed memory references.
|
||||||
"""
|
"""
|
||||||
if instruction_form.get('instruction') is None:
|
if instruction_form.get("instruction") is None:
|
||||||
return {}
|
return {}
|
||||||
dest_reg_names = [op.register.get('prefix', '') + op.register.name
|
dest_reg_names = [
|
||||||
for op in chain(instruction_form.semantic_operands.destination,
|
op.register.get("prefix", "") + op.register.name
|
||||||
instruction_form.semantic_operands.src_dst)
|
for op in chain(
|
||||||
if 'register' in op]
|
instruction_form.semantic_operands.destination,
|
||||||
|
instruction_form.semantic_operands.src_dst,
|
||||||
|
)
|
||||||
|
if "register" in op
|
||||||
|
]
|
||||||
isa_data = self._isa_model.get_instruction(
|
isa_data = self._isa_model.get_instruction(
|
||||||
instruction_form["instruction"], instruction_form["operands"]
|
instruction_form["instruction"], instruction_form["operands"]
|
||||||
)
|
)
|
||||||
@@ -162,50 +176,50 @@ class ISASemantics(object):
|
|||||||
|
|
||||||
if only_postindexed:
|
if only_postindexed:
|
||||||
for o in instruction_form.operands:
|
for o in instruction_form.operands:
|
||||||
if 'post_indexed' in o.get('memory', {}):
|
if "post_indexed" in o.get("memory", {}):
|
||||||
base_name = o.memory.base.get('prefix', '') + o.memory.base.name
|
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||||
return {base_name: {
|
return {
|
||||||
'name': o.memory.base.get('prefix', '') + o.memory.base.name,
|
base_name: {
|
||||||
'value': o.memory.post_indexed.value
|
"name": o.memory.base.get("prefix", "") + o.memory.base.name,
|
||||||
}}
|
"value": o.memory.post_indexed.value,
|
||||||
|
}
|
||||||
|
}
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
reg_operand_names = {} # e.g., {'rax': 'op1'}
|
reg_operand_names = {} # e.g., {'rax': 'op1'}
|
||||||
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
operand_state = {} # e.g., {'op1': {'name': 'rax', 'value': 0}} 0 means unchanged
|
||||||
|
|
||||||
for o in instruction_form.operands:
|
for o in instruction_form.operands:
|
||||||
if 'pre_indexed' in o.get('memory', {}):
|
if "pre_indexed" in o.get("memory", {}):
|
||||||
# Assuming no isa_data.operation
|
# Assuming no isa_data.operation
|
||||||
if isa_data.get("operation", None) is not None:
|
if isa_data.get("operation", None) is not None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"ISA information for pre-indexed instruction {!r} has operation set."
|
"ISA information for pre-indexed instruction {!r} has operation set."
|
||||||
"This is currently not supprted.".format(instruction_form.line))
|
"This is currently not supprted.".format(instruction_form.line)
|
||||||
base_name = o.memory.base.get('prefix', '') + o.memory.base.name
|
)
|
||||||
reg_operand_names = {base_name: 'op1'}
|
base_name = o.memory.base.get("prefix", "") + o.memory.base.name
|
||||||
operand_state = {'op1': {
|
reg_operand_names = {base_name: "op1"}
|
||||||
'name': base_name,
|
operand_state = {"op1": {"name": base_name, "value": o.memory.offset.value}}
|
||||||
'value': o.memory.offset.value
|
|
||||||
}}
|
|
||||||
|
|
||||||
if isa_data is not None and 'operation' in isa_data:
|
if isa_data is not None and "operation" in isa_data:
|
||||||
for i, o in enumerate(instruction_form.operands):
|
for i, o in enumerate(instruction_form.operands):
|
||||||
operand_name = "op{}".format(i + 1)
|
operand_name = "op{}".format(i + 1)
|
||||||
if "register" in o:
|
if "register" in o:
|
||||||
o_reg_name = o["register"].get('prefix', '') + o["register"]["name"]
|
o_reg_name = o["register"].get("prefix", "") + o["register"]["name"]
|
||||||
reg_operand_names[o_reg_name] = operand_name
|
reg_operand_names[o_reg_name] = operand_name
|
||||||
operand_state[operand_name] = {
|
operand_state[operand_name] = {"name": o_reg_name, "value": 0}
|
||||||
'name': o_reg_name,
|
|
||||||
'value': 0}
|
|
||||||
elif "immediate" in o:
|
elif "immediate" in o:
|
||||||
operand_state[operand_name] = {'value': o["immediate"]["value"]}
|
operand_state[operand_name] = {"value": o["immediate"]["value"]}
|
||||||
elif "memory" in o:
|
elif "memory" in o:
|
||||||
# TODO lea needs some thinking about
|
# TODO lea needs some thinking about
|
||||||
pass
|
pass
|
||||||
|
|
||||||
operand_changes = exec(isa_data['operation'], {}, operand_state)
|
exec(isa_data["operation"], {}, operand_state)
|
||||||
|
|
||||||
change_dict = {reg_name: operand_state.get(reg_operand_names.get(reg_name))
|
change_dict = {
|
||||||
for reg_name in dest_reg_names}
|
reg_name: operand_state.get(reg_operand_names.get(reg_name))
|
||||||
|
for reg_name in dest_reg_names
|
||||||
|
}
|
||||||
return change_dict
|
return change_dict
|
||||||
|
|
||||||
def _apply_found_ISA_data(self, isa_data, operands):
|
def _apply_found_ISA_data(self, isa_data, operands):
|
||||||
@@ -231,8 +245,10 @@ class ISASemantics(object):
|
|||||||
if "hidden_operands" in isa_data:
|
if "hidden_operands" in isa_data:
|
||||||
op_dict["destination"] += [
|
op_dict["destination"] += [
|
||||||
AttrDict.convert_dict(
|
AttrDict.convert_dict(
|
||||||
{hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}})
|
{hop["class"]: {k: hop[k] for k in ["class", "source", "destination"]}}
|
||||||
for hop in isa_data["hidden_operands"]]
|
)
|
||||||
|
for hop in isa_data["hidden_operands"]
|
||||||
|
]
|
||||||
return op_dict
|
return op_dict
|
||||||
|
|
||||||
for i, op in enumerate(isa_data["operands"]):
|
for i, op in enumerate(isa_data["operands"]):
|
||||||
|
|||||||
@@ -16,7 +16,12 @@ class KernelDG(nx.DiGraph):
|
|||||||
INSTRUCTION_THRESHOLD = 50
|
INSTRUCTION_THRESHOLD = 50
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, parsed_kernel, parser, hw_model: MachineModel, semantics: ArchSemantics, timeout=10
|
self,
|
||||||
|
parsed_kernel,
|
||||||
|
parser,
|
||||||
|
hw_model: MachineModel,
|
||||||
|
semantics: ArchSemantics,
|
||||||
|
timeout=10,
|
||||||
):
|
):
|
||||||
self.timed_out = False
|
self.timed_out = False
|
||||||
self.kernel = parsed_kernel
|
self.kernel = parsed_kernel
|
||||||
@@ -73,7 +78,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
else instruction_form["latency_wo_load"]
|
else instruction_form["latency_wo_load"]
|
||||||
)
|
)
|
||||||
if "storeload_dep" in dep_flags:
|
if "storeload_dep" in dep_flags:
|
||||||
edge_weight += self.model.get('store_to_load_forward_latency', 0)
|
edge_weight += self.model.get("store_to_load_forward_latency", 0)
|
||||||
dg.add_edge(
|
dg.add_edge(
|
||||||
instruction_form["line_number"],
|
instruction_form["line_number"],
|
||||||
dep["line_number"],
|
dep["line_number"],
|
||||||
@@ -98,7 +103,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
tmp_kernel = [] + kernel
|
tmp_kernel = [] + kernel
|
||||||
for orig_iform in kernel:
|
for orig_iform in kernel:
|
||||||
temp_iform = copy.copy(orig_iform)
|
temp_iform = copy.copy(orig_iform)
|
||||||
temp_iform['line_number'] += offset
|
temp_iform["line_number"] += offset
|
||||||
tmp_kernel.append(temp_iform)
|
tmp_kernel.append(temp_iform)
|
||||||
# get dependency graph
|
# get dependency graph
|
||||||
dg = self.create_DG(tmp_kernel)
|
dg = self.create_DG(tmp_kernel)
|
||||||
@@ -118,12 +123,15 @@ class KernelDG(nx.DiGraph):
|
|||||||
with Manager() as manager:
|
with Manager() as manager:
|
||||||
all_paths = manager.list()
|
all_paths = manager.list()
|
||||||
processes = [
|
processes = [
|
||||||
Process(target=self._extend_path, args=(all_paths, instr_section, dg, offset))
|
Process(
|
||||||
|
target=self._extend_path,
|
||||||
|
args=(all_paths, instr_section, dg, offset),
|
||||||
|
)
|
||||||
for instr_section in instrs
|
for instr_section in instrs
|
||||||
]
|
]
|
||||||
for p in processes:
|
for p in processes:
|
||||||
p.start()
|
p.start()
|
||||||
if (timeout == -1):
|
if timeout == -1:
|
||||||
# no timeout
|
# no timeout
|
||||||
for p in processes:
|
for p in processes:
|
||||||
p.join()
|
p.join()
|
||||||
@@ -162,7 +170,7 @@ class KernelDG(nx.DiGraph):
|
|||||||
# extend path by edge bound latencies (e.g., store-to-load latency)
|
# extend path by edge bound latencies (e.g., store-to-load latency)
|
||||||
lat_path = []
|
lat_path = []
|
||||||
for s, d in nx.utils.pairwise(path):
|
for s, d in nx.utils.pairwise(path):
|
||||||
edge_lat = dg.edges[s, d]['latency']
|
edge_lat = dg.edges[s, d]["latency"]
|
||||||
# map source node back to original line numbers
|
# map source node back to original line numbers
|
||||||
if s >= offset:
|
if s >= offset:
|
||||||
s -= offset
|
s -= offset
|
||||||
@@ -310,17 +318,17 @@ class KernelDG(nx.DiGraph):
|
|||||||
if change is None or reg_state.get(reg, {}) is None:
|
if change is None or reg_state.get(reg, {}) is None:
|
||||||
reg_state[reg] = None
|
reg_state[reg] = None
|
||||||
else:
|
else:
|
||||||
reg_state.setdefault(reg, {'name': reg, 'value': 0})
|
reg_state.setdefault(reg, {"name": reg, "value": 0})
|
||||||
if change['name'] != reg:
|
if change["name"] != reg:
|
||||||
# renaming occured, ovrwrite value with up-to-now change of source register
|
# renaming occured, ovrwrite value with up-to-now change of source register
|
||||||
reg_state[reg]['name'] = change['name']
|
reg_state[reg]["name"] = change["name"]
|
||||||
src_reg_state = reg_state.get(change['name'], {'value': 0})
|
src_reg_state = reg_state.get(change["name"], {"value": 0})
|
||||||
if src_reg_state is None:
|
if src_reg_state is None:
|
||||||
# original register's state was changed beyond reconstruction
|
# original register's state was changed beyond reconstruction
|
||||||
reg_state[reg] = None
|
reg_state[reg] = None
|
||||||
continue
|
continue
|
||||||
reg_state[reg]['value'] = src_reg_state['value']
|
reg_state[reg]["value"] = src_reg_state["value"]
|
||||||
reg_state[reg]['value'] += change['value']
|
reg_state[reg]["value"] += change["value"]
|
||||||
return reg_state
|
return reg_state
|
||||||
|
|
||||||
def get_dependent_instruction_forms(self, instr_form=None, line_number=None):
|
def get_dependent_instruction_forms(self, instr_form=None, line_number=None):
|
||||||
@@ -340,7 +348,8 @@ class KernelDG(nx.DiGraph):
|
|||||||
if instruction_form.semantic_operands is None:
|
if instruction_form.semantic_operands is None:
|
||||||
return is_read
|
return is_read
|
||||||
for src in chain(
|
for src in chain(
|
||||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
instruction_form.semantic_operands.source,
|
||||||
|
instruction_form.semantic_operands.src_dst,
|
||||||
):
|
):
|
||||||
if "register" in src:
|
if "register" in src:
|
||||||
is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
|
is_read = self.parser.is_reg_dependend_of(register, src.register) or is_read
|
||||||
@@ -372,7 +381,8 @@ class KernelDG(nx.DiGraph):
|
|||||||
if instruction_form.semantic_operands is None:
|
if instruction_form.semantic_operands is None:
|
||||||
return False
|
return False
|
||||||
for src in chain(
|
for src in chain(
|
||||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
instruction_form.semantic_operands.source,
|
||||||
|
instruction_form.semantic_operands.src_dst,
|
||||||
):
|
):
|
||||||
# Here we check for mem dependecies only
|
# Here we check for mem dependecies only
|
||||||
if "memory" not in src:
|
if "memory" not in src:
|
||||||
@@ -387,23 +397,23 @@ class KernelDG(nx.DiGraph):
|
|||||||
addr_change -= mem.offset.value
|
addr_change -= mem.offset.value
|
||||||
if mem.base and src.base:
|
if mem.base and src.base:
|
||||||
base_change = register_changes.get(
|
base_change = register_changes.get(
|
||||||
src.base.get('prefix', '') + src.base.name,
|
src.base.get("prefix", "") + src.base.name,
|
||||||
{'name': src.base.get('prefix', '') + src.base.name, 'value': 0},
|
{"name": src.base.get("prefix", "") + src.base.name, "value": 0},
|
||||||
)
|
)
|
||||||
if base_change is None:
|
if base_change is None:
|
||||||
# Unknown change occurred
|
# Unknown change occurred
|
||||||
continue
|
continue
|
||||||
if mem.base.get('prefix', '') + mem.base['name'] != base_change['name']:
|
if mem.base.get("prefix", "") + mem.base["name"] != base_change["name"]:
|
||||||
# base registers do not match
|
# base registers do not match
|
||||||
continue
|
continue
|
||||||
addr_change += base_change['value']
|
addr_change += base_change["value"]
|
||||||
elif mem.base or src.base:
|
elif mem.base or src.base:
|
||||||
# base registers do not match
|
# base registers do not match
|
||||||
continue
|
continue
|
||||||
if mem.index and src.index:
|
if mem.index and src.index:
|
||||||
index_change = register_changes.get(
|
index_change = register_changes.get(
|
||||||
src.index.get('prefix', '') + src.index.name,
|
src.index.get("prefix", "") + src.index.name,
|
||||||
{'name': src.index.get('prefix', '') + src.index.name, 'value': 0},
|
{"name": src.index.get("prefix", "") + src.index.name, "value": 0},
|
||||||
)
|
)
|
||||||
if index_change is None:
|
if index_change is None:
|
||||||
# Unknown change occurred
|
# Unknown change occurred
|
||||||
@@ -411,10 +421,10 @@ class KernelDG(nx.DiGraph):
|
|||||||
if mem.scale != src.scale:
|
if mem.scale != src.scale:
|
||||||
# scale factors do not match
|
# scale factors do not match
|
||||||
continue
|
continue
|
||||||
if mem.index.get('prefix', '') + mem.index['name'] != index_change['name']:
|
if mem.index.get("prefix", "") + mem.index["name"] != index_change["name"]:
|
||||||
# index registers do not match
|
# index registers do not match
|
||||||
continue
|
continue
|
||||||
addr_change += index_change['value'] * src.scale
|
addr_change += index_change["value"] * src.scale
|
||||||
elif mem.index or src.index:
|
elif mem.index or src.index:
|
||||||
# index registers do not match
|
# index registers do not match
|
||||||
continue
|
continue
|
||||||
@@ -443,7 +453,8 @@ class KernelDG(nx.DiGraph):
|
|||||||
)
|
)
|
||||||
# Check also for possible pre- or post-indexing in memory addresses
|
# Check also for possible pre- or post-indexing in memory addresses
|
||||||
for src in chain(
|
for src in chain(
|
||||||
instruction_form.semantic_operands.source, instruction_form.semantic_operands.src_dst
|
instruction_form.semantic_operands.source,
|
||||||
|
instruction_form.semantic_operands.src_dst,
|
||||||
):
|
):
|
||||||
if "memory" in src:
|
if "memory" in src:
|
||||||
if "pre_indexed" in src.memory or "post_indexed" in src.memory:
|
if "pre_indexed" in src.memory or "post_indexed" in src.memory:
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
DATA_DIRS = [os.path.expanduser("~/.osaca/data"), os.path.join(os.path.dirname(__file__), "data")]
|
DATA_DIRS = [
|
||||||
|
os.path.expanduser("~/.osaca/data"),
|
||||||
|
os.path.join(os.path.dirname(__file__), "data"),
|
||||||
|
]
|
||||||
CACHE_DIR = os.path.expanduser("~/.osaca/cache")
|
CACHE_DIR = os.path.expanduser("~/.osaca/cache")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
14
setup.py
14
setup.py
@@ -18,7 +18,8 @@ here = os.path.abspath(os.path.dirname(__file__))
|
|||||||
# Stolen from pip
|
# Stolen from pip
|
||||||
def read(*names, **kwargs):
|
def read(*names, **kwargs):
|
||||||
with io.open(
|
with io.open(
|
||||||
os.path.join(os.path.dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
|
os.path.join(os.path.dirname(__file__), *names),
|
||||||
|
encoding=kwargs.get("encoding", "utf8"),
|
||||||
) as fp:
|
) as fp:
|
||||||
return fp.read()
|
return fp.read()
|
||||||
|
|
||||||
@@ -38,13 +39,20 @@ def _run_build_cache(dir):
|
|||||||
# This is run inside the install staging directory (that had no .pyc files)
|
# This is run inside the install staging directory (that had no .pyc files)
|
||||||
# We don't want to generate any.
|
# We don't want to generate any.
|
||||||
# https://github.com/eliben/pycparser/pull/135
|
# https://github.com/eliben/pycparser/pull/135
|
||||||
check_call([sys.executable, "-B", "_build_cache.py"], cwd=os.path.join(dir, "osaca", "data"))
|
check_call(
|
||||||
|
[sys.executable, "-B", "_build_cache.py"],
|
||||||
|
cwd=os.path.join(dir, "osaca", "data"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class install(_install):
|
class install(_install):
|
||||||
def run(self):
|
def run(self):
|
||||||
_install.run(self)
|
_install.run(self)
|
||||||
self.execute(_run_build_cache, (self.install_lib,), msg="Build ISA and architecture cache")
|
self.execute(
|
||||||
|
_run_build_cache,
|
||||||
|
(self.install_lib,),
|
||||||
|
msg="Build ISA and architecture cache",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class sdist(_sdist):
|
class sdist(_sdist):
|
||||||
|
|||||||
@@ -33,7 +33,13 @@ class TestCLI(unittest.TestCase):
|
|||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
osaca.check_arguments(args, parser)
|
osaca.check_arguments(args, parser)
|
||||||
args = parser.parse_args(
|
args = parser.parse_args(
|
||||||
["--arch", "csx", "--import", "WRONG_BENCH", self._find_file("gs", "csx", "gcc")]
|
[
|
||||||
|
"--arch",
|
||||||
|
"csx",
|
||||||
|
"--import",
|
||||||
|
"WRONG_BENCH",
|
||||||
|
self._find_file("gs", "csx", "gcc"),
|
||||||
|
]
|
||||||
)
|
)
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
osaca.check_arguments(args, parser)
|
osaca.check_arguments(args, parser)
|
||||||
@@ -65,7 +71,13 @@ class TestCLI(unittest.TestCase):
|
|||||||
def test_check_db(self):
|
def test_check_db(self):
|
||||||
parser = osaca.create_parser(parser=ErrorRaisingArgumentParser())
|
parser = osaca.create_parser(parser=ErrorRaisingArgumentParser())
|
||||||
args = parser.parse_args(
|
args = parser.parse_args(
|
||||||
["--arch", "tx2", "--db-check", "--verbose", self._find_test_file("triad_x86_iaca.s")]
|
[
|
||||||
|
"--arch",
|
||||||
|
"tx2",
|
||||||
|
"--db-check",
|
||||||
|
"--verbose",
|
||||||
|
self._find_test_file("triad_x86_iaca.s"),
|
||||||
|
]
|
||||||
)
|
)
|
||||||
output = StringIO()
|
output = StringIO()
|
||||||
osaca.run(args, output_file=output)
|
osaca.run(args, output_file=output)
|
||||||
@@ -134,7 +146,13 @@ class TestCLI(unittest.TestCase):
|
|||||||
for c in comps[a]:
|
for c in comps[a]:
|
||||||
with self.subTest(kernel=k, arch=a, comp=c):
|
with self.subTest(kernel=k, arch=a, comp=c):
|
||||||
args = parser.parse_args(
|
args = parser.parse_args(
|
||||||
["--arch", a, self._find_file(k, a, c), "--export-graph", "/dev/null"]
|
[
|
||||||
|
"--arch",
|
||||||
|
a,
|
||||||
|
self._find_file(k, a, c),
|
||||||
|
"--export-graph",
|
||||||
|
"/dev/null",
|
||||||
|
]
|
||||||
)
|
)
|
||||||
output = StringIO()
|
output = StringIO()
|
||||||
osaca.run(args, output_file=output)
|
osaca.run(args, output_file=output)
|
||||||
@@ -204,17 +222,13 @@ class TestCLI(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
output = StringIO()
|
output = StringIO()
|
||||||
osaca.run(args, output_file=output)
|
osaca.run(args, output_file=output)
|
||||||
self.assertTrue(
|
self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 1)
|
||||||
output.getvalue().count("WARNING: LCD analysis timed out") == 1
|
|
||||||
)
|
|
||||||
args = parser.parse_args(
|
args = parser.parse_args(
|
||||||
["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
|
["--ignore-unknown", "--lcd-timeout", "-1", self._find_test_file(kernel)]
|
||||||
)
|
)
|
||||||
output = StringIO()
|
output = StringIO()
|
||||||
osaca.run(args, output_file=output)
|
osaca.run(args, output_file=output)
|
||||||
self.assertTrue(
|
self.assertTrue(output.getvalue().count("WARNING: LCD analysis timed out") == 0)
|
||||||
output.getvalue().count("WARNING: LCD analysis timed out") == 0
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_lines_arg(self):
|
def test_lines_arg(self):
|
||||||
# Run tests with --lines option
|
# Run tests with --lines option
|
||||||
@@ -227,12 +241,24 @@ class TestCLI(unittest.TestCase):
|
|||||||
args = []
|
args = []
|
||||||
args.append(
|
args.append(
|
||||||
parser.parse_args(
|
parser.parse_args(
|
||||||
["--lines", "146-154", "--arch", "csx", self._find_test_file(kernel_x86)]
|
[
|
||||||
|
"--lines",
|
||||||
|
"146-154",
|
||||||
|
"--arch",
|
||||||
|
"csx",
|
||||||
|
self._find_test_file(kernel_x86),
|
||||||
|
]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
args.append(
|
args.append(
|
||||||
parser.parse_args(
|
parser.parse_args(
|
||||||
["--lines", "146:154", "--arch", "csx", self._find_test_file(kernel_x86)]
|
[
|
||||||
|
"--lines",
|
||||||
|
"146:154",
|
||||||
|
"--arch",
|
||||||
|
"csx",
|
||||||
|
self._find_test_file(kernel_x86),
|
||||||
|
]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
args.append(
|
args.append(
|
||||||
|
|||||||
@@ -17,7 +17,13 @@ class TestDBInterface(unittest.TestCase):
|
|||||||
sample_entry = {
|
sample_entry = {
|
||||||
"name": "DoItRightAndDoItFast",
|
"name": "DoItRightAndDoItFast",
|
||||||
"operands": [
|
"operands": [
|
||||||
{"class": "memory", "offset": "imd", "base": "gpr", "index": "gpr", "scale": 8},
|
{
|
||||||
|
"class": "memory",
|
||||||
|
"offset": "imd",
|
||||||
|
"base": "gpr",
|
||||||
|
"index": "gpr",
|
||||||
|
"scale": 8,
|
||||||
|
},
|
||||||
{"class": "register", "name": "xmm"},
|
{"class": "register", "name": "xmm"},
|
||||||
],
|
],
|
||||||
"throughput": 1.25,
|
"throughput": 1.25,
|
||||||
@@ -35,7 +41,12 @@ class TestDBInterface(unittest.TestCase):
|
|||||||
del self.entry_tx2["operands"][1]["name"]
|
del self.entry_tx2["operands"][1]["name"]
|
||||||
self.entry_tx2["operands"][1]["prefix"] = "x"
|
self.entry_tx2["operands"][1]["prefix"] = "x"
|
||||||
# self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1]
|
# self.entry_zen1['port_pressure'] = [1, 1, 1, 1, 0, 1, 0, 0, 0, 0.5, 1, 0.5, 1]
|
||||||
self.entry_zen1["port_pressure"] = [[4, "0123"], [1, "4"], [1, "89"], [2, ["8D", "9D"]]]
|
self.entry_zen1["port_pressure"] = [
|
||||||
|
[4, "0123"],
|
||||||
|
[1, "4"],
|
||||||
|
[1, "89"],
|
||||||
|
[2, ["8D", "9D"]],
|
||||||
|
]
|
||||||
|
|
||||||
###########
|
###########
|
||||||
# Tests
|
# Tests
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
# OSACA-BEGIN
|
# OSACA-BEGIN
|
||||||
.L4:
|
.L4:
|
||||||
vmovsd %xmm0, 8(%rax)
|
vmovsd %xmm0, 8(%rax) # line 3 <----------------------------------+
|
||||||
addq $8, %rax
|
addq $8, %rax # |
|
||||||
vmovsd %xmm0, 8(%rax,%rcx,8)
|
vmovsd %xmm0, 8(%rax,%rcx,8) # line 5 <-----------------------------------------------+
|
||||||
vaddsd (%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == (%rax+8)
|
vaddsd (%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == (%rax+8) ---+ |
|
||||||
subq $-8, %rax
|
subq $-8, %rax # | |
|
||||||
vaddsd -8(%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == -8(%rax+16)
|
vaddsd -8(%rax), %xmm0, %xmm0 # depends on line 3, 8(%rax) == -8(%rax+16) ---+ |
|
||||||
dec %rcx
|
dec %rcx # |
|
||||||
vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
|
vaddsd 8(%rax,%rcx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
|
||||||
movq %rcx, %rdx
|
movq %rcx, %rdx # |
|
||||||
vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8)
|
vaddsd 8(%rax,%rdx,8), %xmm0, %xmm0 # depends on line 5, 8(%rax,%rdx,8) == 8(%rax+8,%rdx-1,8) --+
|
||||||
vmulsd %xmm1, %xmm0, %xmm0
|
vmulsd %xmm1, %xmm0, %xmm0
|
||||||
addq $8, %rax
|
addq $8, %rax
|
||||||
cmpq %rsi, %rax
|
cmpq %rsi, %rax
|
||||||
|
|||||||
@@ -34,7 +34,8 @@ class TestFrontend(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.machine_model_tx2 = MachineModel(arch="tx2")
|
self.machine_model_tx2 = MachineModel(arch="tx2")
|
||||||
self.semantics_csx = ArchSemantics(
|
self.semantics_csx = ArchSemantics(
|
||||||
self.machine_model_csx, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml")
|
self.machine_model_csx,
|
||||||
|
path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "isa/x86.yml"),
|
||||||
)
|
)
|
||||||
self.semantics_tx2 = ArchSemantics(
|
self.semantics_tx2 = ArchSemantics(
|
||||||
self.machine_model_tx2,
|
self.machine_model_tx2,
|
||||||
@@ -71,7 +72,11 @@ class TestFrontend(unittest.TestCase):
|
|||||||
|
|
||||||
def test_frontend_AArch64(self):
|
def test_frontend_AArch64(self):
|
||||||
dg = KernelDG(
|
dg = KernelDG(
|
||||||
self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2)
|
self.kernel_AArch64,
|
||||||
|
self.parser_AArch64,
|
||||||
|
self.machine_model_tx2,
|
||||||
|
self.semantics_tx2,
|
||||||
|
)
|
||||||
fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml"))
|
fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml"))
|
||||||
fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
|
fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
|
||||||
# TODO compare output with checked string
|
# TODO compare output with checked string
|
||||||
|
|||||||
@@ -109,7 +109,8 @@ class TestMarkerUtils(unittest.TestCase):
|
|||||||
kernel_start = len(
|
kernel_start = len(
|
||||||
list(
|
list(
|
||||||
filter(
|
filter(
|
||||||
None, (prologue + mov_start_var + bytes_var_1).split("\n")
|
None,
|
||||||
|
(prologue + mov_start_var + bytes_var_1).split("\n"),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -142,7 +143,12 @@ class TestMarkerUtils(unittest.TestCase):
|
|||||||
epilogue = ".LE9:\t\t#12.2\n" "call dummy\n"
|
epilogue = ".LE9:\t\t#12.2\n" "call dummy\n"
|
||||||
kernel_length = len(list(filter(None, kernel.split("\n"))))
|
kernel_length = len(list(filter(None, kernel.split("\n"))))
|
||||||
|
|
||||||
bytes_variations = [bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines]
|
bytes_variations = [
|
||||||
|
bytes_1_line,
|
||||||
|
bytes_2_lines_1,
|
||||||
|
bytes_2_lines_2,
|
||||||
|
bytes_3_lines,
|
||||||
|
]
|
||||||
mov_start_variations = [mov_start_1, mov_start_2]
|
mov_start_variations = [mov_start_1, mov_start_2]
|
||||||
mov_end_variations = [mov_end_1, mov_end_2]
|
mov_end_variations = [mov_end_1, mov_end_2]
|
||||||
# actual tests
|
# actual tests
|
||||||
@@ -171,7 +177,8 @@ class TestMarkerUtils(unittest.TestCase):
|
|||||||
kernel_start = len(
|
kernel_start = len(
|
||||||
list(
|
list(
|
||||||
filter(
|
filter(
|
||||||
None, (prologue + mov_start_var + bytes_var_1).split("\n")
|
None,
|
||||||
|
(prologue + mov_start_var + bytes_var_1).split("\n"),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -24,7 +24,9 @@ class TestParserAArch64(unittest.TestCase):
|
|||||||
|
|
||||||
def test_comment_parser(self):
|
def test_comment_parser(self):
|
||||||
self.assertEqual(self._get_comment(self.parser, "// some comments"), "some comments")
|
self.assertEqual(self._get_comment(self.parser, "// some comments"), "some comments")
|
||||||
self.assertEqual(self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end")
|
self.assertEqual(
|
||||||
|
self._get_comment(self.parser, "\t\t//AA BB CC \t end \t"), "AA BB CC end"
|
||||||
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self._get_comment(self.parser, "\t//// comment //// comment"),
|
self._get_comment(self.parser, "\t//// comment //// comment"),
|
||||||
"// comment //// comment",
|
"// comment //// comment",
|
||||||
@@ -36,7 +38,8 @@ class TestParserAArch64(unittest.TestCase):
|
|||||||
self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
|
self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
|
||||||
self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1")
|
self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t//label1").name, ".L1")
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment), "label1"
|
" ".join(self._get_label(self.parser, ".L1:\t\t\t//label1").comment),
|
||||||
|
"label1",
|
||||||
)
|
)
|
||||||
with self.assertRaises(ParseException):
|
with self.assertRaises(ParseException):
|
||||||
self._get_label(self.parser, "\t.cfi_startproc")
|
self._get_label(self.parser, "\t.cfi_startproc")
|
||||||
@@ -316,7 +319,8 @@ class TestParserAArch64(unittest.TestCase):
|
|||||||
value1 = self.parser.normalize_imd(imd_decimal_1)
|
value1 = self.parser.normalize_imd(imd_decimal_1)
|
||||||
self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1))
|
self.assertEqual(value1, self.parser.normalize_imd(imd_hex_1))
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
|
self.parser.normalize_imd(imd_decimal_2),
|
||||||
|
self.parser.normalize_imd(imd_hex_2),
|
||||||
)
|
)
|
||||||
self.assertEqual(self.parser.normalize_imd(imd_float_11), value1)
|
self.assertEqual(self.parser.normalize_imd(imd_float_11), value1)
|
||||||
self.assertEqual(self.parser.normalize_imd(imd_float_12), value1)
|
self.assertEqual(self.parser.normalize_imd(imd_float_12), value1)
|
||||||
|
|||||||
@@ -26,7 +26,8 @@ class TestParserX86ATT(unittest.TestCase):
|
|||||||
self.assertEqual(self._get_comment(self.parser, "# some comments"), "some comments")
|
self.assertEqual(self._get_comment(self.parser, "# some comments"), "some comments")
|
||||||
self.assertEqual(self._get_comment(self.parser, "\t\t#AA BB CC \t end \t"), "AA BB CC end")
|
self.assertEqual(self._get_comment(self.parser, "\t\t#AA BB CC \t end \t"), "AA BB CC end")
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self._get_comment(self.parser, "\t## comment ## comment"), "# comment ## comment"
|
self._get_comment(self.parser, "\t## comment ## comment"),
|
||||||
|
"# comment ## comment",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_label_parser(self):
|
def test_label_parser(self):
|
||||||
@@ -35,7 +36,8 @@ class TestParserX86ATT(unittest.TestCase):
|
|||||||
self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
|
self.assertEqual(self._get_label(self.parser, ".2.3_2_pack.3:").name, ".2.3_2_pack.3")
|
||||||
self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1")
|
self.assertEqual(self._get_label(self.parser, ".L1:\t\t\t#label1").name, ".L1")
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
" ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment), "label1"
|
" ".join(self._get_label(self.parser, ".L1:\t\t\t#label1").comment),
|
||||||
|
"label1",
|
||||||
)
|
)
|
||||||
with self.assertRaises(ParseException):
|
with self.assertRaises(ParseException):
|
||||||
self._get_label(self.parser, "\t.cfi_startproc")
|
self._get_label(self.parser, "\t.cfi_startproc")
|
||||||
@@ -47,7 +49,8 @@ class TestParserX86ATT(unittest.TestCase):
|
|||||||
self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2)
|
self.assertEqual(len(self._get_directive(self.parser, "\t.align\t16,0x90").parameters), 2)
|
||||||
self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0)
|
self.assertEqual(len(self._get_directive(self.parser, ".text").parameters), 0)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters), 2
|
len(self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters),
|
||||||
|
2,
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
|
self._get_directive(self.parser, '.file\t1 "path/to/file.c"').parameters[1],
|
||||||
@@ -62,7 +65,12 @@ class TestParserX86ATT(unittest.TestCase):
|
|||||||
self.parser,
|
self.parser,
|
||||||
"\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support",
|
"\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support",
|
||||||
).parameters,
|
).parameters,
|
||||||
["__TEXT", "__eh_frame", "coalesced", "no_toc+strip_static_syms+live_support"],
|
[
|
||||||
|
"__TEXT",
|
||||||
|
"__eh_frame",
|
||||||
|
"coalesced",
|
||||||
|
"no_toc+strip_static_syms+live_support",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self._get_directive(
|
self._get_directive(
|
||||||
@@ -74,7 +82,9 @@ class TestParserX86ATT(unittest.TestCase):
|
|||||||
self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90"
|
self._get_directive(self.parser, "\t.align\t16,0x90").parameters[1], "0x90"
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self._get_directive(self.parser, " .byte 100,103,144 #IACA START")["name"],
|
self._get_directive(self.parser, " .byte 100,103,144 #IACA START")[
|
||||||
|
"name"
|
||||||
|
],
|
||||||
"byte",
|
"byte",
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@@ -242,10 +252,12 @@ class TestParserX86ATT(unittest.TestCase):
|
|||||||
imd_decimal_2 = {"value": "8"}
|
imd_decimal_2 = {"value": "8"}
|
||||||
imd_hex_2 = {"value": "8"}
|
imd_hex_2 = {"value": "8"}
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.parser.normalize_imd(imd_decimal_1), self.parser.normalize_imd(imd_hex_1)
|
self.parser.normalize_imd(imd_decimal_1),
|
||||||
|
self.parser.normalize_imd(imd_hex_1),
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.parser.normalize_imd(imd_decimal_2), self.parser.normalize_imd(imd_hex_2)
|
self.parser.normalize_imd(imd_decimal_2),
|
||||||
|
self.parser.normalize_imd(imd_hex_2),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_reg_dependency(self):
|
def test_reg_dependency(self):
|
||||||
|
|||||||
@@ -11,8 +11,14 @@ from copy import deepcopy
|
|||||||
import networkx as nx
|
import networkx as nx
|
||||||
from osaca.osaca import get_unmatched_instruction_ratio
|
from osaca.osaca import get_unmatched_instruction_ratio
|
||||||
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
from osaca.parser import AttrDict, ParserAArch64, ParserX86ATT
|
||||||
from osaca.semantics import (INSTR_FLAGS, ArchSemantics, ISASemantics,
|
from osaca.semantics import (
|
||||||
KernelDG, MachineModel, reduce_to_section)
|
INSTR_FLAGS,
|
||||||
|
ArchSemantics,
|
||||||
|
ISASemantics,
|
||||||
|
KernelDG,
|
||||||
|
MachineModel,
|
||||||
|
reduce_to_section,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestSemanticTools(unittest.TestCase):
|
class TestSemanticTools(unittest.TestCase):
|
||||||
@@ -66,7 +72,8 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
cls.semantics_x86 = ISASemantics("x86")
|
cls.semantics_x86 = ISASemantics("x86")
|
||||||
cls.semantics_csx = ArchSemantics(
|
cls.semantics_csx = ArchSemantics(
|
||||||
cls.machine_model_csx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml")
|
cls.machine_model_csx,
|
||||||
|
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
|
||||||
)
|
)
|
||||||
cls.semantics_aarch64 = ISASemantics("aarch64")
|
cls.semantics_aarch64 = ISASemantics("aarch64")
|
||||||
cls.semantics_tx2 = ArchSemantics(
|
cls.semantics_tx2 = ArchSemantics(
|
||||||
@@ -173,7 +180,12 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
test_mm_x86.get_store_throughput(
|
test_mm_x86.get_store_throughput(
|
||||||
{"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": "NOT_NONE", "scale": 1}
|
{
|
||||||
|
"base": {"prefix": "NOT_IN_DB"},
|
||||||
|
"offset": None,
|
||||||
|
"index": "NOT_NONE",
|
||||||
|
"scale": 1,
|
||||||
|
}
|
||||||
),
|
),
|
||||||
[[1, "23"], [1, "4"]],
|
[[1, "23"], [1, "4"]],
|
||||||
)
|
)
|
||||||
@@ -185,7 +197,12 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
test_mm_arm.get_store_throughput(
|
test_mm_arm.get_store_throughput(
|
||||||
{"base": {"prefix": "NOT_IN_DB"}, "offset": None, "index": None, "scale": 1}
|
{
|
||||||
|
"base": {"prefix": "NOT_IN_DB"},
|
||||||
|
"offset": None,
|
||||||
|
"index": None,
|
||||||
|
"scale": 1,
|
||||||
|
}
|
||||||
),
|
),
|
||||||
[[1, "34"], [1, "5"]],
|
[[1, "34"], [1, "5"]],
|
||||||
)
|
)
|
||||||
@@ -310,7 +327,10 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
|
|
||||||
def test_memdependency_x86(self):
|
def test_memdependency_x86(self):
|
||||||
dg = KernelDG(
|
dg = KernelDG(
|
||||||
self.kernel_x86_memdep, self.parser_x86, self.machine_model_csx, self.semantics_csx
|
self.kernel_x86_memdep,
|
||||||
|
self.parser_x86,
|
||||||
|
self.machine_model_csx,
|
||||||
|
self.semantics_csx,
|
||||||
)
|
)
|
||||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
|
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
|
||||||
@@ -322,7 +342,10 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
|
|
||||||
def test_kernelDG_AArch64(self):
|
def test_kernelDG_AArch64(self):
|
||||||
dg = KernelDG(
|
dg = KernelDG(
|
||||||
self.kernel_AArch64, self.parser_AArch64, self.machine_model_tx2, self.semantics_tx2
|
self.kernel_AArch64,
|
||||||
|
self.parser_AArch64,
|
||||||
|
self.machine_model_tx2,
|
||||||
|
self.semantics_tx2,
|
||||||
)
|
)
|
||||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
|
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {7, 8})
|
||||||
@@ -400,7 +423,7 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
# based on line 6
|
# based on line 6
|
||||||
self.assertEqual(lc_deps[6]["latency"], 28.0)
|
self.assertEqual(lc_deps[6]["latency"], 28.0)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
[(iform.line_number, lat) for iform, lat in lc_deps[6]['dependencies']],
|
[(iform.line_number, lat) for iform, lat in lc_deps[6]["dependencies"]],
|
||||||
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
|
[(6, 4.0), (10, 6.0), (11, 6.0), (12, 6.0), (13, 6.0), (14, 0)],
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -423,7 +446,8 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
# w/o flag dependencies: ID 5 w/ len=1
|
# w/o flag dependencies: ID 5 w/ len=1
|
||||||
# TODO discuss
|
# TODO discuss
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
lc_deps[lcd_id2]["root"], dg.dg.nodes(data=True)[lcd_id2]["instruction_form"]
|
lc_deps[lcd_id2]["root"],
|
||||||
|
dg.dg.nodes(data=True)[lcd_id2]["instruction_form"],
|
||||||
)
|
)
|
||||||
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
|
self.assertEqual(len(lc_deps[lcd_id2]["dependencies"]), 1)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@@ -438,7 +462,7 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
self.parser_x86,
|
self.parser_x86,
|
||||||
self.machine_model_csx,
|
self.machine_model_csx,
|
||||||
self.semantics_x86,
|
self.semantics_x86,
|
||||||
timeout=10
|
timeout=10,
|
||||||
)
|
)
|
||||||
end_time = time.perf_counter()
|
end_time = time.perf_counter()
|
||||||
time_10 = end_time - start_time
|
time_10 = end_time - start_time
|
||||||
@@ -448,7 +472,7 @@ class TestSemanticTools(unittest.TestCase):
|
|||||||
self.parser_x86,
|
self.parser_x86,
|
||||||
self.machine_model_csx,
|
self.machine_model_csx,
|
||||||
self.semantics_x86,
|
self.semantics_x86,
|
||||||
timeout=2
|
timeout=2,
|
||||||
)
|
)
|
||||||
end_time = time.perf_counter()
|
end_time = time.perf_counter()
|
||||||
time_2 = end_time - start_time
|
time_2 = end_time - start_time
|
||||||
|
|||||||
@@ -1,33 +1,26 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from subprocess import check_call, check_output, CalledProcessError, STDOUT
|
|
||||||
from itertools import chain
|
|
||||||
import shutil
|
|
||||||
from functools import lru_cache
|
|
||||||
from glob import glob
|
|
||||||
from pathlib import Path
|
|
||||||
from pprint import pprint
|
|
||||||
import socket
|
|
||||||
import pickle
|
import pickle
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
from glob import glob
|
||||||
|
from itertools import chain
|
||||||
|
from pathlib import Path
|
||||||
|
from subprocess import STDOUT, CalledProcessError, check_call, check_output
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from osaca.osaca import reduce_to_section
|
|
||||||
|
|
||||||
from kerncraft.models import benchmark
|
|
||||||
from kerncraft.incore_model import (
|
from kerncraft.incore_model import (
|
||||||
parse_asm,
|
|
||||||
asm_instrumentation,
|
asm_instrumentation,
|
||||||
iaca_analyse_instrumented_binary,
|
iaca_analyse_instrumented_binary,
|
||||||
|
llvm_mca_analyse_instrumented_assembly,
|
||||||
osaca_analyse_instrumented_assembly,
|
osaca_analyse_instrumented_assembly,
|
||||||
llvm_mca_analyse_instrumented_assembly
|
parse_asm,
|
||||||
)
|
)
|
||||||
|
from kerncraft.models import benchmark
|
||||||
|
from osaca.osaca import reduce_to_section
|
||||||
|
|
||||||
# Scaling of inner dimension for 1D, 2D and 3D kernels
|
# Scaling of inner dimension for 1D, 2D and 3D kernels
|
||||||
# * consider kernels to be compiled with multiple compilers and different options
|
# * consider kernels to be compiled with multiple compilers and different options
|
||||||
@@ -39,37 +32,50 @@ from kerncraft.incore_model import (
|
|||||||
# Collect inner loop body assembly for each kernel/compiler/options combination
|
# Collect inner loop body assembly for each kernel/compiler/options combination
|
||||||
# * analyze with OSACA, IACA and LLVM-MCA
|
# * analyze with OSACA, IACA and LLVM-MCA
|
||||||
|
|
||||||
hosts_arch_map = {r"skylakesp2": "SKX",
|
hosts_arch_map = {
|
||||||
r"ivyep1": "IVB",
|
r"skylakesp2": "SKX",
|
||||||
r"naples1": "ZEN",
|
r"ivyep1": "IVB",
|
||||||
r"rome1": "ZEN2",
|
r"naples1": "ZEN",
|
||||||
r"warmup": "TX2",
|
r"rome1": "ZEN2",
|
||||||
r"qp4-node-[0-9]+": "A64FX"}
|
r"warmup": "TX2",
|
||||||
|
r"qp4-node-[0-9]+": "A64FX",
|
||||||
|
}
|
||||||
|
|
||||||
arch_info = {
|
arch_info = {
|
||||||
'SKX': {
|
"SKX": {
|
||||||
'prepare': ['likwid-setFrequencies -f 2.4 -t 0'.split()],
|
"prepare": ["likwid-setFrequencies -f 2.4 -t 0".split()],
|
||||||
'IACA': 'SKX',
|
"IACA": "SKX",
|
||||||
'OSACA': 'SKX',
|
"OSACA": "SKX",
|
||||||
'LLVM-MCA': '-mcpu=skylake-avx512',
|
"LLVM-MCA": "-mcpu=skylake-avx512",
|
||||||
'Ithemal': 'skl',
|
"Ithemal": "skl",
|
||||||
'isa': 'x86',
|
"isa": "x86",
|
||||||
'perfevents': [],
|
"perfevents": [],
|
||||||
"cflags": {
|
"cflags": {
|
||||||
'icc': {
|
"icc": {
|
||||||
"Ofast": "-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
"Ofast": (
|
||||||
"O3": "-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
"-Ofast -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||||
"O2": "-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
"-ffreestanding -falign-loops"
|
||||||
"O1": "-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline -ffreestanding -falign-loops".split(),
|
).split(),
|
||||||
|
"O3": (
|
||||||
|
"-O3 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||||
|
"-ffreestanding -falign-loops"
|
||||||
|
).split(),
|
||||||
|
"O2": (
|
||||||
|
"-O2 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||||
|
"-ffreestanding -falign-loops"
|
||||||
|
).split(),
|
||||||
|
"O1": (
|
||||||
|
"-O1 -fno-alias -xCORE-AVX512 -qopt-zmm-usage=high -nolib-inline "
|
||||||
|
"-ffreestanding -falign-loops"
|
||||||
|
).split(),
|
||||||
},
|
},
|
||||||
'clang': {
|
"clang": {
|
||||||
"Ofast": "-Ofast -march=skylake-avx512 -ffreestanding".split(),
|
"Ofast": "-Ofast -march=skylake-avx512 -ffreestanding".split(),
|
||||||
"O3": "-O3 -march=skylake-avx512 -ffreestanding".split(),
|
"O3": "-O3 -march=skylake-avx512 -ffreestanding".split(),
|
||||||
"O2": "-O2 -march=skylake-avx512 -ffreestanding".split(),
|
"O2": "-O2 -march=skylake-avx512 -ffreestanding".split(),
|
||||||
"O1": "-O1 -march=skylake-avx512 -ffreestanding".split(),
|
"O1": "-O1 -march=skylake-avx512 -ffreestanding".split(),
|
||||||
|
|
||||||
},
|
},
|
||||||
'gcc': {
|
"gcc": {
|
||||||
"Ofast": "-Ofast -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
"Ofast": "-Ofast -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||||
"O3": "-O3 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
"O3": "-O3 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||||
"O2": "-O2 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
"O2": "-O2 -march=skylake-avx512 -lm -ffreestanding -falign-loops=16".split(),
|
||||||
@@ -77,17 +83,19 @@ arch_info = {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'IVB': {
|
"IVB": {
|
||||||
'prepare': ['likwid-setFrequencies -f 3.0 -t 0'.split()],
|
"prepare": ["likwid-setFrequencies -f 3.0 -t 0".split()],
|
||||||
'IACA': 'IVB',
|
"IACA": "IVB",
|
||||||
'OSACA': 'IVB',
|
"OSACA": "IVB",
|
||||||
'LLVM-MCA': '-mcpu=ivybridge',
|
"LLVM-MCA": "-mcpu=ivybridge",
|
||||||
'Ithemal': 'ivb',
|
"Ithemal": "ivb",
|
||||||
'isa': 'x86',
|
"isa": "x86",
|
||||||
'perfevents': [],
|
"perfevents": [],
|
||||||
"cflags": {
|
"cflags": {
|
||||||
"icc": {
|
"icc": {
|
||||||
"Ofast": "-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"Ofast": (
|
||||||
|
"-Ofast -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||||
|
).split(),
|
||||||
"O3": "-O3 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O3": "-O3 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
"O2": "-O2 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O2": "-O2 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
"O1": "-O1 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O1": "-O1 -xAVX -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
@@ -106,14 +114,14 @@ arch_info = {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'ZEN': {
|
"ZEN": {
|
||||||
'prepare': ['likwid-setFrequencies -f 2.3 -t 0'.split()],
|
"prepare": ["likwid-setFrequencies -f 2.3 -t 0".split()],
|
||||||
'IACA': None,
|
"IACA": None,
|
||||||
'OSACA': 'ZEN1',
|
"OSACA": "ZEN1",
|
||||||
'LLVM-MCA': '-mcpu=znver1',
|
"LLVM-MCA": "-mcpu=znver1",
|
||||||
'Ithemal': None,
|
"Ithemal": None,
|
||||||
'isa': 'x86',
|
"isa": "x86",
|
||||||
'perfevents': [],
|
"perfevents": [],
|
||||||
"cflags": {
|
"cflags": {
|
||||||
"clang": {
|
"clang": {
|
||||||
"Ofast": "-Ofast -march=znver1 -ffreestanding".split(),
|
"Ofast": "-Ofast -march=znver1 -ffreestanding".split(),
|
||||||
@@ -128,21 +136,23 @@ arch_info = {
|
|||||||
"O1": "-O1 -march=znver1 -ffreestanding -falign-loops=16".split(),
|
"O1": "-O1 -march=znver1 -ffreestanding -falign-loops=16".split(),
|
||||||
},
|
},
|
||||||
"icc": {
|
"icc": {
|
||||||
"Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"Ofast": (
|
||||||
|
"-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||||
|
).split(),
|
||||||
"O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
"O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
"O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'ZEN2': {
|
"ZEN2": {
|
||||||
'prepare': ['likwid-setFrequencies -f 2.35 -t 0'.split()],
|
"prepare": ["likwid-setFrequencies -f 2.35 -t 0".split()],
|
||||||
'IACA': None,
|
"IACA": None,
|
||||||
'OSACA': 'ZEN2',
|
"OSACA": "ZEN2",
|
||||||
'LLVM-MCA': '-mcpu=znver2',
|
"LLVM-MCA": "-mcpu=znver2",
|
||||||
'Ithemal': None,
|
"Ithemal": None,
|
||||||
'isa': 'x86',
|
"isa": "x86",
|
||||||
'perfevents': [],
|
"perfevents": [],
|
||||||
"cflags": {
|
"cflags": {
|
||||||
"clang": {
|
"clang": {
|
||||||
"Ofast": "-Ofast -march=znver2 -ffreestanding".split(),
|
"Ofast": "-Ofast -march=znver2 -ffreestanding".split(),
|
||||||
@@ -157,22 +167,24 @@ arch_info = {
|
|||||||
"O1": "-O1 -march=znver2 -ffreestanding -falign-loops=16".split(),
|
"O1": "-O1 -march=znver2 -ffreestanding -falign-loops=16".split(),
|
||||||
},
|
},
|
||||||
"icc": {
|
"icc": {
|
||||||
"Ofast": "-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"Ofast": (
|
||||||
|
"-Ofast -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops"
|
||||||
|
).split(),
|
||||||
"O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O3": "-O3 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
"O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O2": "-O2 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
"O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
"O1": "-O1 -xAVX2 -fno-alias -nolib-inline -ffreestanding -falign-loops".split(),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'TX2': {
|
"TX2": {
|
||||||
'Clock [MHz]': 2200, # reading out via perf. counters is not supported
|
"Clock [MHz]": 2200, # reading out via perf. counters is not supported
|
||||||
'IACA': None,
|
"IACA": None,
|
||||||
'OSACA': 'TX2',
|
"OSACA": "TX2",
|
||||||
'assign_optimal_throughput': True,
|
"assign_optimal_throughput": True,
|
||||||
'LLVM-MCA': '-mcpu=thunderx2t99 -march=aarch64',
|
"LLVM-MCA": "-mcpu=thunderx2t99 -march=aarch64",
|
||||||
'Ithemal': None,
|
"Ithemal": None,
|
||||||
'isa': 'aarch64',
|
"isa": "aarch64",
|
||||||
'perfevents': [],
|
"perfevents": [],
|
||||||
"cflags": {
|
"cflags": {
|
||||||
"clang": {
|
"clang": {
|
||||||
"Ofast": "-Ofast -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
"Ofast": "-Ofast -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||||
@@ -188,16 +200,16 @@ arch_info = {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'A64FX': {
|
"A64FX": {
|
||||||
'Clock [MHz]': 1800, # reading out via perf. counters is not supported
|
"Clock [MHz]": 1800, # reading out via perf. counters is not supported
|
||||||
'L2_volume_metric': 'L1<->L2 data volume [GBytes]',
|
"L2_volume_metric": "L1<->L2 data volume [GBytes]",
|
||||||
'IACA': None,
|
"IACA": None,
|
||||||
'OSACA': 'A64FX',
|
"OSACA": "A64FX",
|
||||||
'assign_optimal_throughput': False,
|
"assign_optimal_throughput": False,
|
||||||
'LLVM-MCA': '-mcpu=a64fx -march=aarch64',
|
"LLVM-MCA": "-mcpu=a64fx -march=aarch64",
|
||||||
'Ithemal': None,
|
"Ithemal": None,
|
||||||
'isa': 'aarch64',
|
"isa": "aarch64",
|
||||||
'perfevents': [],
|
"perfevents": [],
|
||||||
"cflags": {
|
"cflags": {
|
||||||
"gcc": {
|
"gcc": {
|
||||||
"Ofast": "-Ofast -msve-vector-bits=512 -march=armv8.2-a+sve -ffreestanding".split(),
|
"Ofast": "-Ofast -msve-vector-bits=512 -march=armv8.2-a+sve -ffreestanding".split(),
|
||||||
@@ -211,7 +223,7 @@ arch_info = {
|
|||||||
"O2": "-O2 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
"O2": "-O2 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||||
"O1": "-O1 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
"O1": "-O1 -target aarch64-unknown-linux-gnu -ffreestanding".split(),
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -231,12 +243,13 @@ def get_kernels(kernels=None):
|
|||||||
if kernels is None:
|
if kernels is None:
|
||||||
kernels = []
|
kernels = []
|
||||||
for f in glob("kernels/*.c"):
|
for f in glob("kernels/*.c"):
|
||||||
f = f.rsplit('.', 1)[0].split('/', 1)[1]
|
f = f.rsplit(".", 1)[0].split("/", 1)[1]
|
||||||
if f == "dummy":
|
if f == "dummy":
|
||||||
continue
|
continue
|
||||||
kernels.append(f)
|
kernels.append(f)
|
||||||
return kernels
|
return kernels
|
||||||
|
|
||||||
|
|
||||||
# Columns:
|
# Columns:
|
||||||
# arch
|
# arch
|
||||||
# kernel
|
# kernel
|
||||||
@@ -259,6 +272,7 @@ def get_kernels(kernels=None):
|
|||||||
# allruns [list (length, repetitions, cy/it, L2 B/it)]
|
# allruns [list (length, repetitions, cy/it, L2 B/it)]
|
||||||
# perfevents [dict event: counter/it]
|
# perfevents [dict event: counter/it]
|
||||||
|
|
||||||
|
|
||||||
def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mca=True):
|
def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mca=True):
|
||||||
arch = get_current_arch()
|
arch = get_current_arch()
|
||||||
if arch is None:
|
if arch is None:
|
||||||
@@ -268,90 +282,132 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
|||||||
islocal = True
|
islocal = True
|
||||||
arches = [arch]
|
arches = [arch]
|
||||||
ainfo = arch_info.get(arch)
|
ainfo = arch_info.get(arch)
|
||||||
if 'prepare' in ainfo:
|
if "prepare" in ainfo:
|
||||||
for cmd in ainfo['prepare']:
|
for cmd in ainfo["prepare"]:
|
||||||
check_call(cmd)
|
check_call(cmd)
|
||||||
for arch in arches:
|
for arch in arches:
|
||||||
ainfo = arch_info.get(arch)
|
ainfo = arch_info.get(arch)
|
||||||
print(arch)
|
print(arch)
|
||||||
data_path = Path(f"build/{arch}/data.pkl")
|
data_path = Path(f"build/{arch}/data.pkl")
|
||||||
if data_path.exists():
|
if data_path.exists():
|
||||||
with data_path.open('rb') as f:
|
with data_path.open("rb") as f:
|
||||||
data = pickle.load(f)
|
data = pickle.load(f)
|
||||||
else:
|
else:
|
||||||
data = []
|
data = []
|
||||||
data_lastsaved = deepcopy(data)
|
data_lastsaved = deepcopy(data)
|
||||||
for compiler, compiler_cflags in ainfo['cflags'].items():
|
for compiler, compiler_cflags in ainfo["cflags"].items():
|
||||||
if not shutil.which(compiler) and islocal:
|
if not shutil.which(compiler) and islocal:
|
||||||
print(compiler, "not found in path! Skipping...")
|
print(compiler, "not found in path! Skipping...")
|
||||||
continue
|
continue
|
||||||
for cflags_name, cflags in compiler_cflags.items():
|
for cflags_name, cflags in compiler_cflags.items():
|
||||||
for kernel in get_kernels():
|
for kernel in get_kernels():
|
||||||
print(f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
|
print(
|
||||||
end=": ", flush=True)
|
f"{kernel:<15} {arch:>5} {compiler:>5} {cflags_name:>6}",
|
||||||
row = list([r for r in data
|
end=": ",
|
||||||
if r['arch'] == arch and r['kernel'] == kernel and
|
flush=True,
|
||||||
r['compiler'] == compiler and r['cflags_name'] == cflags_name])
|
)
|
||||||
|
row = list(
|
||||||
|
[
|
||||||
|
r
|
||||||
|
for r in data
|
||||||
|
if r["arch"] == arch
|
||||||
|
and r["kernel"] == kernel
|
||||||
|
and r["compiler"] == compiler
|
||||||
|
and r["cflags_name"] == cflags_name
|
||||||
|
]
|
||||||
|
)
|
||||||
if row:
|
if row:
|
||||||
row = row[0]
|
row = row[0]
|
||||||
else:
|
else:
|
||||||
orig_row = None
|
|
||||||
row = {
|
row = {
|
||||||
'arch': arch,
|
"arch": arch,
|
||||||
'kernel': kernel,
|
"kernel": kernel,
|
||||||
'compiler': compiler,
|
"compiler": compiler,
|
||||||
'cflags_name': cflags_name,
|
"cflags_name": cflags_name,
|
||||||
'element_size': 8,
|
"element_size": 8,
|
||||||
}
|
}
|
||||||
data.append(row)
|
data.append(row)
|
||||||
|
|
||||||
# Build
|
# Build
|
||||||
print("build", end="", flush=True)
|
print("build", end="", flush=True)
|
||||||
asm_path, exec_path, overwrite = build_kernel(
|
asm_path, exec_path, overwrite = build_kernel(
|
||||||
kernel, arch, compiler, cflags, cflags_name, dontbuild=not islocal)
|
kernel,
|
||||||
|
arch,
|
||||||
|
compiler,
|
||||||
|
cflags,
|
||||||
|
cflags_name,
|
||||||
|
dontbuild=not islocal,
|
||||||
|
)
|
||||||
|
|
||||||
if overwrite:
|
if overwrite:
|
||||||
# clear all measurment information
|
# clear all measurment information
|
||||||
row['best_length'] = None
|
row["best_length"] = None
|
||||||
row['best_runtime'] = None
|
row["best_runtime"] = None
|
||||||
row['L2_traffic'] = None
|
row["L2_traffic"] = None
|
||||||
row['allruns'] = None
|
row["allruns"] = None
|
||||||
row['perfevents'] = None
|
row["perfevents"] = None
|
||||||
|
|
||||||
# Mark for IACA, OSACA and LLVM-MCA
|
# Mark for IACA, OSACA and LLVM-MCA
|
||||||
print("mark", end="", flush=True)
|
print("mark", end="", flush=True)
|
||||||
try:
|
try:
|
||||||
marked_asmfile, marked_objfile, row['pointer_increment'], overwrite = mark(
|
(
|
||||||
asm_path, compiler, cflags, isa=ainfo['isa'], overwrite=overwrite)
|
marked_asmfile,
|
||||||
row['marking_error'] = None
|
marked_objfile,
|
||||||
|
row["pointer_increment"],
|
||||||
|
overwrite,
|
||||||
|
) = mark(
|
||||||
|
asm_path,
|
||||||
|
compiler,
|
||||||
|
cflags,
|
||||||
|
isa=ainfo["isa"],
|
||||||
|
overwrite=overwrite,
|
||||||
|
)
|
||||||
|
row["marking_error"] = None
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
row['marking_error'] = str(e)
|
row["marking_error"] = str(e)
|
||||||
print(":", e)
|
print(":", e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if overwrite:
|
if overwrite:
|
||||||
# clear all model generated information
|
# clear all model generated information
|
||||||
for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
|
for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
|
||||||
for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
|
for k in [
|
||||||
row[model+'_'+k] = None
|
"ports",
|
||||||
|
"prediction",
|
||||||
|
"throughput",
|
||||||
|
"cp",
|
||||||
|
"lcd",
|
||||||
|
"raw",
|
||||||
|
]:
|
||||||
|
row[model + "_" + k] = None
|
||||||
|
|
||||||
for model in ['IACA', 'OSACA', 'LLVM-MCA', 'Ithemal']:
|
for model in ["IACA", "OSACA", "LLVM-MCA", "Ithemal"]:
|
||||||
for k in ['ports', 'prediction', 'throughput', 'cp', 'lcd', 'raw']:
|
for k in [
|
||||||
if model+'_'+k not in row:
|
"ports",
|
||||||
row[model+'_'+k] = None
|
"prediction",
|
||||||
|
"throughput",
|
||||||
|
"cp",
|
||||||
|
"lcd",
|
||||||
|
"raw",
|
||||||
|
]:
|
||||||
|
if model + "_" + k not in row:
|
||||||
|
row[model + "_" + k] = None
|
||||||
|
|
||||||
# Analyze with IACA, if requested and configured
|
# Analyze with IACA, if requested and configured
|
||||||
if iaca and ainfo['IACA'] is not None:
|
if iaca and ainfo["IACA"] is not None:
|
||||||
print("IACA", end="", flush=True)
|
print("IACA", end="", flush=True)
|
||||||
if not row.get('IACA_ports'):
|
if not row.get("IACA_ports"):
|
||||||
row['IACA_raw'] = iaca_analyse_instrumented_binary(
|
row["IACA_raw"] = iaca_analyse_instrumented_binary(
|
||||||
marked_objfile, micro_architecture=ainfo['IACA'])
|
marked_objfile, micro_architecture=ainfo["IACA"]
|
||||||
row['IACA_ports'] = \
|
)
|
||||||
{k: v/(row['pointer_increment']/row['element_size'])
|
row["IACA_ports"] = {
|
||||||
for k,v in row['IACA_raw']['port cycles'].items()}
|
k: v / (row["pointer_increment"] / row["element_size"])
|
||||||
row['IACA_prediction'] = row['IACA_raw']['throughput']/(
|
for k, v in row["IACA_raw"]["port cycles"].items()
|
||||||
row['pointer_increment']/row['element_size'])
|
}
|
||||||
row['IACA_throughput'] = max(row['IACA_ports'].values())
|
row["IACA_prediction"] = row["IACA_raw"]["throughput"] / (
|
||||||
|
row["pointer_increment"] / row["element_size"]
|
||||||
|
)
|
||||||
|
row["IACA_throughput"] = max(row["IACA_ports"].values())
|
||||||
print(". ", end="", flush=True)
|
print(". ", end="", flush=True)
|
||||||
else:
|
else:
|
||||||
print("! ", end="", flush=True)
|
print("! ", end="", flush=True)
|
||||||
@@ -359,56 +415,70 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
|||||||
# Analyze with OSACA, if requested
|
# Analyze with OSACA, if requested
|
||||||
if osaca:
|
if osaca:
|
||||||
print("OSACA", end="", flush=True)
|
print("OSACA", end="", flush=True)
|
||||||
if not row.get('OSACA_ports'):
|
if not row.get("OSACA_ports"):
|
||||||
row['OSACA_raw'] = osaca_analyse_instrumented_assembly(
|
row["OSACA_raw"] = osaca_analyse_instrumented_assembly(
|
||||||
marked_asmfile, micro_architecture=ainfo['OSACA'],
|
marked_asmfile,
|
||||||
assign_optimal_throughput=ainfo.get('assign_optimal_throughput',
|
micro_architecture=ainfo["OSACA"],
|
||||||
True))
|
assign_optimal_throughput=ainfo.get(
|
||||||
row['OSACA_ports'] = \
|
"assign_optimal_throughput", True
|
||||||
{k: v/(row['pointer_increment']/row['element_size'])
|
),
|
||||||
for k,v in row['OSACA_raw']['port cycles'].items()}
|
)
|
||||||
row['OSACA_prediction'] = row['OSACA_raw']['throughput']/(
|
row["OSACA_ports"] = {
|
||||||
row['pointer_increment']/row['element_size'])
|
k: v / (row["pointer_increment"] / row["element_size"])
|
||||||
row['OSACA_throughput'] = max(row['OSACA_ports'].values())
|
for k, v in row["OSACA_raw"]["port cycles"].items()
|
||||||
row['OSACA_cp'] = row['OSACA_raw']['cp_latency']/(
|
}
|
||||||
row['pointer_increment']/row['element_size'])
|
row["OSACA_prediction"] = row["OSACA_raw"]["throughput"] / (
|
||||||
row['OSACA_lcd'] = row['OSACA_raw']['lcd']/(
|
row["pointer_increment"] / row["element_size"]
|
||||||
row['pointer_increment']/row['element_size'])
|
)
|
||||||
|
row["OSACA_throughput"] = max(row["OSACA_ports"].values())
|
||||||
|
row["OSACA_cp"] = row["OSACA_raw"]["cp_latency"] / (
|
||||||
|
row["pointer_increment"] / row["element_size"]
|
||||||
|
)
|
||||||
|
row["OSACA_lcd"] = row["OSACA_raw"]["lcd"] / (
|
||||||
|
row["pointer_increment"] / row["element_size"]
|
||||||
|
)
|
||||||
print(". ", end="", flush=True)
|
print(". ", end="", flush=True)
|
||||||
else:
|
else:
|
||||||
print("! ", end="", flush=True)
|
print("! ", end="", flush=True)
|
||||||
|
|
||||||
# Analyze with LLVM-MCA, if requested and configured
|
# Analyze with LLVM-MCA, if requested and configured
|
||||||
if llvm_mca and ainfo['LLVM-MCA'] is not None:
|
if llvm_mca and ainfo["LLVM-MCA"] is not None:
|
||||||
print("LLVM-MCA", end="", flush=True)
|
print("LLVM-MCA", end="", flush=True)
|
||||||
if not row.get('LLVM-MCA_ports'):
|
if not row.get("LLVM-MCA_ports"):
|
||||||
row['LLVM-MCA_raw'] = llvm_mca_analyse_instrumented_assembly(
|
row["LLVM-MCA_raw"] = llvm_mca_analyse_instrumented_assembly(
|
||||||
marked_asmfile,
|
marked_asmfile,
|
||||||
micro_architecture=ainfo['LLVM-MCA'],
|
micro_architecture=ainfo["LLVM-MCA"],
|
||||||
isa=ainfo['isa'])
|
isa=ainfo["isa"],
|
||||||
row['LLVM-MCA_ports'] = \
|
)
|
||||||
{k: v/(row['pointer_increment']/row['element_size'])
|
row["LLVM-MCA_ports"] = {
|
||||||
for k,v in row['LLVM-MCA_raw']['port cycles'].items()}
|
k: v / (row["pointer_increment"] / row["element_size"])
|
||||||
row['LLVM-MCA_prediction'] =row['LLVM-MCA_raw']['throughput']/(
|
for k, v in row["LLVM-MCA_raw"]["port cycles"].items()
|
||||||
row['pointer_increment']/row['element_size'])
|
}
|
||||||
row['LLVM-MCA_throughput'] = max(row['LLVM-MCA_ports'].values())
|
row["LLVM-MCA_prediction"] = row["LLVM-MCA_raw"]["throughput"] / (
|
||||||
row['LLVM-MCA_cp'] = row['LLVM-MCA_raw']['cp_latency']/(
|
row["pointer_increment"] / row["element_size"]
|
||||||
row['pointer_increment']/row['element_size'])
|
)
|
||||||
row['LLVM-MCA_lcd'] = row['LLVM-MCA_raw']['lcd']/(
|
row["LLVM-MCA_throughput"] = max(row["LLVM-MCA_ports"].values())
|
||||||
row['pointer_increment']/row['element_size'])
|
row["LLVM-MCA_cp"] = row["LLVM-MCA_raw"]["cp_latency"] / (
|
||||||
|
row["pointer_increment"] / row["element_size"]
|
||||||
|
)
|
||||||
|
row["LLVM-MCA_lcd"] = row["LLVM-MCA_raw"]["lcd"] / (
|
||||||
|
row["pointer_increment"] / row["element_size"]
|
||||||
|
)
|
||||||
print(". ", end="", flush=True)
|
print(". ", end="", flush=True)
|
||||||
else:
|
else:
|
||||||
print("! ", end="", flush=True)
|
print("! ", end="", flush=True)
|
||||||
|
|
||||||
# Analyze with Ithemal, if not running local and configured
|
# Analyze with Ithemal, if not running local and configured
|
||||||
if ainfo['Ithemal'] is not None and not islocal:
|
if ainfo["Ithemal"] is not None and not islocal:
|
||||||
print("Ithemal", end="", flush=True)
|
print("Ithemal", end="", flush=True)
|
||||||
if not row.get('Ithemal_prediction'):
|
if not row.get("Ithemal_prediction"):
|
||||||
with open(marked_asmfile) as f:
|
with open(marked_asmfile) as f:
|
||||||
parsed_code = parse_asm(f.read(), ainfo['isa'])
|
parsed_code = parse_asm(f.read(), ainfo["isa"])
|
||||||
kernel = reduce_to_section(parsed_code, ainfo['isa'])
|
kernel = reduce_to_section(parsed_code, ainfo["isa"])
|
||||||
row['Ithemal_prediction'] = get_ithemal_prediction(
|
row["Ithemal_prediction"] = get_ithemal_prediction(
|
||||||
get_intel_style_code(marked_objfile), model=ainfo['Ithemal'])
|
get_intel_style_code(marked_objfile),
|
||||||
|
model=ainfo["Ithemal"],
|
||||||
|
)
|
||||||
print(". ", end="", flush=True)
|
print(". ", end="", flush=True)
|
||||||
else:
|
else:
|
||||||
print("! ", end="", flush=True)
|
print("! ", end="", flush=True)
|
||||||
@@ -416,43 +486,45 @@ def build_mark_run_all_kernels(measurements=True, osaca=True, iaca=True, llvm_mc
|
|||||||
if measurements and islocal:
|
if measurements and islocal:
|
||||||
# run measurements if on same hardware
|
# run measurements if on same hardware
|
||||||
print("scale", end="", flush=True)
|
print("scale", end="", flush=True)
|
||||||
if not row.get('allruns'):
|
if not row.get("allruns"):
|
||||||
# find best length with concurrent L2 measurement
|
# find best length with concurrent L2 measurement
|
||||||
scaling_runs, best = scalingrun(exec_path)
|
scaling_runs, best = scalingrun(exec_path)
|
||||||
row['best_length'] = best[0]
|
row["best_length"] = best[0]
|
||||||
row['best_runtime'] = best[2]
|
row["best_runtime"] = best[2]
|
||||||
row['L2_traffic'] = best[3]
|
row["L2_traffic"] = best[3]
|
||||||
row['allruns'] = scaling_runs
|
row["allruns"] = scaling_runs
|
||||||
print(f"({best[0]}). ", end="", flush=True)
|
print(f"({best[0]}). ", end="", flush=True)
|
||||||
else:
|
else:
|
||||||
print(f"({row.get('best_length', None)})! ", end="", flush=True)
|
print(
|
||||||
|
f"({row.get('best_length', None)})! ",
|
||||||
|
end="",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# dump to file
|
# dump to file
|
||||||
if data != data_lastsaved:
|
if data != data_lastsaved:
|
||||||
print('saving... ', end="", flush=True)
|
print("saving... ", end="", flush=True)
|
||||||
with data_path.open('wb') as f:
|
with data_path.open("wb") as f:
|
||||||
try:
|
try:
|
||||||
pickle.dump(data, f)
|
pickle.dump(data, f)
|
||||||
data_lastsaved = deepcopy(data)
|
data_lastsaved = deepcopy(data)
|
||||||
print('saved!')
|
print("saved!")
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
pickle.dump(data, f)
|
pickle.dump(data, f)
|
||||||
print('saved!')
|
print("saved!")
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1 * 1024 + 1)):
|
||||||
def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1)):
|
# print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
|
||||||
#print('{:>8} {:>10} {:>10}'.format("x", "cy/it", "L2 B/it"))
|
parameters = chain(*[[total_iterations // i, i] for i in lengths])
|
||||||
parameters = chain(*[[total_iterations//i, i] for i in lengths])
|
|
||||||
# TODO use arch specific events and grooup
|
# TODO use arch specific events and grooup
|
||||||
r, o = perfctr(chain([kernel_exec], map(str, parameters)),
|
r, o = perfctr(chain([kernel_exec], map(str, parameters)), 1, group="L2")
|
||||||
1, group="L2")
|
|
||||||
global_infos = {}
|
global_infos = {}
|
||||||
for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", l) for l in o]:
|
for m in [re.match(r"(:?([a-z_\-0-9]+):)?([a-z]+): ([a-z\_\-0-9]+)", line) for line in o]:
|
||||||
if m is not None:
|
if m is not None:
|
||||||
try:
|
try:
|
||||||
v = int(m.group(4))
|
v = int(m.group(4))
|
||||||
@@ -464,37 +536,45 @@ def scalingrun(kernel_exec, total_iterations=25000000, lengths=range(8, 1*1024+1
|
|||||||
r[m.group(2)][m.group(3)] = v
|
r[m.group(2)][m.group(3)] = v
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
best = (float('inf'), None)
|
best = (float("inf"), None)
|
||||||
for markername, mmetrics in r.items():
|
for markername, mmetrics in r.items():
|
||||||
kernelname, repetitions, *_, xlength = markername.split('_')
|
kernelname, repetitions, *_, xlength = markername.split("_")
|
||||||
repetitions = int(repetitions)
|
repetitions = int(repetitions)
|
||||||
xlength = int(xlength)
|
xlength = int(xlength)
|
||||||
total_iterations = mmetrics['repetitions'] * mmetrics['iterations']
|
total_iterations = mmetrics["repetitions"] * mmetrics["iterations"]
|
||||||
if 'Clock [MHz]' in mmetrics:
|
if "Clock [MHz]" in mmetrics:
|
||||||
clock_hz = mmetrics['Clock [MHz]']*1e6
|
clock_hz = mmetrics["Clock [MHz]"] * 1e6
|
||||||
else:
|
else:
|
||||||
clock_hz = arch_info[get_current_arch()]['Clock [MHz]']*1e6
|
clock_hz = arch_info[get_current_arch()]["Clock [MHz]"] * 1e6
|
||||||
cyperit = mmetrics['Runtime (RDTSC) [s]'] * clock_hz / total_iterations
|
cyperit = mmetrics["Runtime (RDTSC) [s]"] * clock_hz / total_iterations
|
||||||
# TODO use arch specific events and grooup
|
# TODO use arch specific events and grooup
|
||||||
if 'L2D load data volume [GBytes]' in mmetrics:
|
if "L2D load data volume [GBytes]" in mmetrics:
|
||||||
l2perit = (mmetrics['L2D load data volume [GBytes]'] +
|
l2perit = (
|
||||||
mmetrics.get('L2D evict data volume [GBytes]', 0))*1e9 / total_iterations
|
(
|
||||||
|
mmetrics["L2D load data volume [GBytes]"]
|
||||||
|
+ mmetrics.get("L2D evict data volume [GBytes]", 0)
|
||||||
|
)
|
||||||
|
* 1e9
|
||||||
|
/ total_iterations
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
l2perit = \
|
l2perit = (
|
||||||
mmetrics[arch_info[get_current_arch()]['L2_volume_metric']]*1e9 / total_iterations
|
mmetrics[arch_info[get_current_arch()]["L2_volume_metric"]]
|
||||||
results.append(
|
* 1e9
|
||||||
(xlength, repetitions, cyperit, l2perit)
|
/ total_iterations
|
||||||
)
|
)
|
||||||
|
results.append((xlength, repetitions, cyperit, l2perit))
|
||||||
if cyperit < best[0]:
|
if cyperit < best[0]:
|
||||||
best = cyperit, results[-1]
|
best = cyperit, results[-1]
|
||||||
return results, best[1]
|
return results, best[1]
|
||||||
|
|
||||||
|
|
||||||
def mark(asm_path, compiler, cflags, isa, overwrite=False):
|
def mark(asm_path, compiler, cflags, isa, overwrite=False):
|
||||||
# Mark assembly for IACA, OSACA and LLVM-MCA
|
# Mark assembly for IACA, OSACA and LLVM-MCA
|
||||||
marked_asm_path = Path(asm_path).with_suffix(".marked.s")
|
marked_asm_path = Path(asm_path).with_suffix(".marked.s")
|
||||||
if not marked_asm_path.exists() or overwrite:
|
if not marked_asm_path.exists() or overwrite:
|
||||||
overwrite = True
|
overwrite = True
|
||||||
with open(asm_path) as fa, open(marked_asm_path, 'w') as fm:
|
with open(asm_path) as fa, open(marked_asm_path, "w") as fm:
|
||||||
try:
|
try:
|
||||||
_, pointer_increment = asm_instrumentation(fa, fm, isa=isa)
|
_, pointer_increment = asm_instrumentation(fa, fm, isa=isa)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
@@ -505,37 +585,46 @@ def mark(asm_path, compiler, cflags, isa, overwrite=False):
|
|||||||
# use maked assembly and extract asm_block and pointer_increment
|
# use maked assembly and extract asm_block and pointer_increment
|
||||||
with open(marked_asm_path) as f:
|
with open(marked_asm_path) as f:
|
||||||
marked_asm = f.read()
|
marked_asm = f.read()
|
||||||
m = re.search(r'pointer_increment=([0-9]+)', marked_asm)
|
m = re.search(r"pointer_increment=([0-9]+)", marked_asm)
|
||||||
if m:
|
if m:
|
||||||
pointer_increment = int(m.group(1))
|
pointer_increment = int(m.group(1))
|
||||||
else:
|
else:
|
||||||
os.unlink(marked_asm_path)
|
os.unlink(marked_asm_path)
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Could not find `pointer_increment=<byte increment>`. Plase place into file.")
|
"Could not find `pointer_increment=<byte increment>`. Plase place into file."
|
||||||
|
)
|
||||||
print("! ", end="", flush=True)
|
print("! ", end="", flush=True)
|
||||||
|
|
||||||
# Compile marked assembly to object for IACA
|
# Compile marked assembly to object for IACA
|
||||||
marked_obj = Path(asm_path).with_suffix(".marked.o")
|
marked_obj = Path(asm_path).with_suffix(".marked.o")
|
||||||
if not marked_obj.exists():
|
if not marked_obj.exists():
|
||||||
check_call([compiler] + ['-c', str(marked_asm_path), '-o', str(marked_obj)])
|
check_call([compiler] + ["-c", str(marked_asm_path), "-o", str(marked_obj)])
|
||||||
|
|
||||||
return str(marked_asm_path), str(marked_obj), pointer_increment, overwrite
|
return str(marked_asm_path), str(marked_obj), pointer_increment, overwrite
|
||||||
|
|
||||||
|
|
||||||
def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=False,
|
def build_kernel(
|
||||||
dontbuild=False):
|
kernel,
|
||||||
|
architecture,
|
||||||
|
compiler,
|
||||||
|
cflags,
|
||||||
|
cflags_name,
|
||||||
|
overwrite=False,
|
||||||
|
dontbuild=False,
|
||||||
|
):
|
||||||
build_path = f"build/{architecture}/{compiler}/{cflags_name}"
|
build_path = f"build/{architecture}/{compiler}/{cflags_name}"
|
||||||
kernel_assembly = f"{build_path}/{kernel}.s"
|
kernel_assembly = f"{build_path}/{kernel}.s"
|
||||||
kernel_object= f"{build_path}/{kernel}.o"
|
kernel_object = f"{build_path}/{kernel}.o"
|
||||||
executable = f"{build_path}/{kernel}"
|
executable = f"{build_path}/{kernel}"
|
||||||
Path(build_path).mkdir(parents=True, exist_ok=True)
|
Path(build_path).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if not overwrite:
|
if not overwrite:
|
||||||
# Overwrite if any kernel specific file is missing
|
# Overwrite if any kernel specific file is missing
|
||||||
overwrite = (
|
overwrite = (
|
||||||
not os.path.exists(kernel_object) or
|
not os.path.exists(kernel_object)
|
||||||
not os.path.exists(kernel_assembly) or
|
or not os.path.exists(kernel_assembly)
|
||||||
not os.path.exists(executable))
|
or not os.path.exists(executable)
|
||||||
|
)
|
||||||
|
|
||||||
if dontbuild and overwrite:
|
if dontbuild and overwrite:
|
||||||
raise ValueError("Must build, but not allowed.")
|
raise ValueError("Must build, but not allowed.")
|
||||||
@@ -545,31 +634,35 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=
|
|||||||
|
|
||||||
if not Path(f"{build_path}/compiler_version").exists():
|
if not Path(f"{build_path}/compiler_version").exists():
|
||||||
# Document compiler version
|
# Document compiler version
|
||||||
with open(f"{build_path}/compiler_version", 'w') as f:
|
with open(f"{build_path}/compiler_version", "w") as f:
|
||||||
f.write(check_output([compiler, "-v"], encoding='utf8', stderr=STDOUT))
|
f.write(check_output([compiler, "-v"], encoding="utf8", stderr=STDOUT))
|
||||||
|
|
||||||
if overwrite:
|
if overwrite:
|
||||||
# build object + assembly
|
# build object + assembly
|
||||||
check_call([compiler] +
|
check_call([compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-o", kernel_object])
|
||||||
cflags +
|
check_call(
|
||||||
["-c", f"kernels/{kernel}.c", "-o", kernel_object])
|
[compiler] + cflags + ["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly]
|
||||||
check_call([compiler] +
|
)
|
||||||
cflags +
|
|
||||||
["-c", f"kernels/{kernel}.c", "-S", "-o", kernel_assembly])
|
|
||||||
|
|
||||||
# build main and link executable
|
# build main and link executable
|
||||||
executable_cflags = [
|
executable_cflags = [
|
||||||
os.environ["LIKWID_DEFINES"],
|
os.environ["LIKWID_DEFINES"],
|
||||||
os.environ["LIKWID_INC"],
|
os.environ["LIKWID_INC"],
|
||||||
os.environ["LIKWID_LIB"]
|
os.environ["LIKWID_LIB"],
|
||||||
] + ['-Ofast']
|
] + ["-Ofast"]
|
||||||
check_call([compiler] + executable_cflags + [
|
check_call(
|
||||||
f"{build_path}/dummy.o",
|
[compiler]
|
||||||
kernel_object,
|
+ executable_cflags
|
||||||
"-DMAIN",
|
+ [
|
||||||
f"kernels/{kernel}.c",
|
f"{build_path}/dummy.o",
|
||||||
"-llikwid",
|
kernel_object,
|
||||||
"-o", executable])
|
"-DMAIN",
|
||||||
|
f"kernels/{kernel}.c",
|
||||||
|
"-llikwid",
|
||||||
|
"-o",
|
||||||
|
executable,
|
||||||
|
]
|
||||||
|
)
|
||||||
print(". ", end="", flush=True)
|
print(". ", end="", flush=True)
|
||||||
else:
|
else:
|
||||||
print("! ", end="", flush=True)
|
print("! ", end="", flush=True)
|
||||||
@@ -577,7 +670,7 @@ def build_kernel(kernel, architecture, compiler, cflags, cflags_name, overwrite=
|
|||||||
return kernel_assembly, executable, overwrite
|
return kernel_assembly, executable, overwrite
|
||||||
|
|
||||||
|
|
||||||
def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
def perfctr(cmd, cores, group="MEM", code_markers=True, verbose=0):
|
||||||
"""
|
"""
|
||||||
Run *cmd* with likwid-perfctr and returns result as dict.
|
Run *cmd* with likwid-perfctr and returns result as dict.
|
||||||
|
|
||||||
@@ -586,30 +679,32 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
|||||||
if CLI argument cores > 1, running with multi-core, otherwise single-core
|
if CLI argument cores > 1, running with multi-core, otherwise single-core
|
||||||
"""
|
"""
|
||||||
# Making sure likwid-perfctr is available:
|
# Making sure likwid-perfctr is available:
|
||||||
if benchmark.find_executable('likwid-perfctr') is None:
|
if benchmark.find_executable("likwid-perfctr") is None:
|
||||||
print("likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
|
print(
|
||||||
file=sys.stderr)
|
"likwid-perfctr was not found. Make sure likwid is installed and found in PATH.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# FIXME currently only single core measurements support!
|
# FIXME currently only single core measurements support!
|
||||||
perf_cmd = ['likwid-perfctr', '-f', '-O', '-g', group]
|
perf_cmd = ["likwid-perfctr", "-f", "-O", "-g", group]
|
||||||
|
|
||||||
cpu = 'S0:0'
|
cpu = "S0:0"
|
||||||
if cores > 1:
|
if cores > 1:
|
||||||
cpu += '-'+str(cores-1)
|
cpu += "-" + str(cores - 1)
|
||||||
|
|
||||||
# Pinned and measured on cpu
|
# Pinned and measured on cpu
|
||||||
perf_cmd += ['-C', cpu]
|
perf_cmd += ["-C", cpu]
|
||||||
|
|
||||||
# code must be marked using likwid markers
|
# code must be marked using likwid markers
|
||||||
perf_cmd.append('-m')
|
perf_cmd.append("-m")
|
||||||
|
|
||||||
perf_cmd += cmd
|
perf_cmd += cmd
|
||||||
if verbose > 1:
|
if verbose > 1:
|
||||||
print(' '.join(perf_cmd))
|
print(" ".join(perf_cmd))
|
||||||
try:
|
try:
|
||||||
with benchmark.fix_env_variable('OMP_NUM_THREADS', None):
|
with benchmark.fix_env_variable("OMP_NUM_THREADS", None):
|
||||||
output = check_output(perf_cmd).decode('utf-8').split('\n')
|
output = check_output(perf_cmd).decode("utf-8").split("\n")
|
||||||
except CalledProcessError as e:
|
except CalledProcessError as e:
|
||||||
print("Executing benchmark failed: {!s}".format(e), file=sys.stderr)
|
print("Executing benchmark failed: {!s}".format(e), file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@@ -626,7 +721,7 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
|||||||
m = re.match(r"TABLE,Region ([a-z\-0-9_]+),", line)
|
m = re.match(r"TABLE,Region ([a-z\-0-9_]+),", line)
|
||||||
if m:
|
if m:
|
||||||
cur_region_name = m.group(1)
|
cur_region_name = m.group(1)
|
||||||
line = line.split(',')
|
line = line.split(",")
|
||||||
try:
|
try:
|
||||||
# Metrics
|
# Metrics
|
||||||
cur_region_data[line[0]] = float(line[1])
|
cur_region_data[line[0]] = float(line[1])
|
||||||
@@ -639,12 +734,13 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
|||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
# Event counters
|
# Event counters
|
||||||
if line[2] == '-' or line[2] == 'nan':
|
if line[2] == "-" or line[2] == "nan":
|
||||||
counter_value = 0
|
counter_value = 0
|
||||||
else:
|
else:
|
||||||
counter_value = int(line[2])
|
counter_value = int(line[2])
|
||||||
if re.fullmatch(r'[A-Z0-9_]+', line[0]) and \
|
if re.fullmatch(r"[A-Z0-9_]+", line[0]) and re.fullmatch(
|
||||||
re.fullmatch(r'[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*', line[1]):
|
r"[A-Z0-9]+(:[A-Z0-9]+=[0-9A-Fa-fx]+)*", line[1]
|
||||||
|
):
|
||||||
cur_region_data.setdefault(line[0], {})
|
cur_region_data.setdefault(line[0], {})
|
||||||
cur_region_data[line[0]][line[1]] = counter_value
|
cur_region_data[line[0]][line[1]] = counter_value
|
||||||
continue
|
continue
|
||||||
@@ -659,49 +755,52 @@ def perfctr(cmd, cores, group='MEM', code_markers=True, verbose=0):
|
|||||||
|
|
||||||
|
|
||||||
def remove_html_tags(text):
|
def remove_html_tags(text):
|
||||||
return re.sub('<.*?>', '', text)
|
return re.sub("<.*?>", "", text)
|
||||||
|
|
||||||
|
|
||||||
def get_intel_style_code(marked_objfile):
|
def get_intel_style_code(marked_objfile):
|
||||||
# Disassembl with Intel syntax
|
# Disassembl with Intel syntax
|
||||||
cmd = ("objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
|
cmd = (
|
||||||
"--x86-asm-syntax=intel").split(" ") + [marked_objfile]
|
"objdump -d --demangle --no-leading-addr --no-leading-headers --no-show-raw-insn "
|
||||||
|
"--x86-asm-syntax=intel"
|
||||||
|
).split(" ") + [marked_objfile]
|
||||||
asm_raw = check_output(cmd).decode()
|
asm_raw = check_output(cmd).decode()
|
||||||
asm_raw = '\n'.join([l.strip() for l in asm_raw.split('\n')])
|
asm_raw = "\n".join([line.strip() for line in asm_raw.split("\n")])
|
||||||
kernel_raw = asm_raw[
|
kernel_raw = asm_raw[
|
||||||
asm_raw.index('mov\tebx, 111\nnop')+len('mov\tebx, 111\nnop') :
|
asm_raw.index("mov\tebx, 111\nnop")
|
||||||
asm_raw.index('mov\tebx, 222\nnop')
|
+ len("mov\tebx, 111\nnop") : asm_raw.index("mov\tebx, 222\nnop")
|
||||||
]
|
]
|
||||||
kernel_lines = kernel_raw.split('\n')
|
kernel_lines = kernel_raw.split("\n")
|
||||||
# Ignore label and jump
|
# Ignore label and jump
|
||||||
return '\n'.join(kernel_lines[:-2])
|
return "\n".join(kernel_lines[:-2])
|
||||||
|
|
||||||
|
|
||||||
def get_ithemal_prediction(code, model='skl'):
|
def get_ithemal_prediction(code, model="skl"):
|
||||||
url = "http://3.18.198.23/predict"
|
url = "http://3.18.198.23/predict"
|
||||||
assert model in ['skl', 'hsw', 'ivb']
|
assert model in ["skl", "hsw", "ivb"]
|
||||||
r = requests.post(url, {'code': code, 'model': model})
|
r = requests.post(url, {"code": code, "model": model})
|
||||||
raw_text = remove_html_tags(r.text)
|
raw_text = remove_html_tags(r.text)
|
||||||
m = re.search("Could not generate a prediction: (.*)", raw_text)
|
m = re.search("Could not generate a prediction: (.*)", raw_text)
|
||||||
if m:
|
if m:
|
||||||
print(" error:", m.group(1).strip(), end=' ')
|
print(" error:", m.group(1).strip(), end=" ")
|
||||||
return float('nan')
|
return float("nan")
|
||||||
m = re.search("Prediction: ([0-9\.]+) cycles per iteration", raw_text)
|
m = re.search("Prediction: ([0-9.]+) cycles per iteration", raw_text)
|
||||||
if m:
|
if m:
|
||||||
return float(m.group(1))
|
return float(m.group(1))
|
||||||
else:
|
else:
|
||||||
return float('nan')
|
return float("nan")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Check for correct LLVM-MCA version
|
# Check for correct LLVM-MCA version
|
||||||
try:
|
try:
|
||||||
llvm_mca = 'LLVM version 12.0.0' in check_output(['llvm-mca', '-version']).decode()
|
llvm_mca = "LLVM version 12.0.0" in check_output(["llvm-mca", "-version"]).decode()
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
llvm_mca = False
|
llvm_mca = False
|
||||||
|
|
||||||
build_mark_run_all_kernels(measurements='--no-measurements' not in sys.argv, llvm_mca=llvm_mca)
|
build_mark_run_all_kernels(measurements="--no-measurements" not in sys.argv, llvm_mca=llvm_mca)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user