mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2026-01-07 11:40:06 +01:00
Validating of OSACA predictions for IVB, SKX, ZEN1, ZEN2, A64FX and TX2 with different kernels. build_and_run.py contains the configuration used at RRZE's testcluster and UR's qpace4, Analysis.ipynb contains the analysis script and results. Raw data from measurements (122MB) will be attached to next OSACA release. For now, find the raw data here: https://hawo.net/~sijuhamm/d/UPIhBOtz/validation-data.tar.gz The analysis report can be viewed at https://nbviewer.jupyter.org/github/RRZE-HPC/OSACA/blob/validation/validation/Analysis.ipynb Quite a few changes on OSACA included: Feature: register change tracking via semantic understanding of operations Feature: recording LCD latency along path and exposing this to frontend Feature: support for memory reference aliases Feature: store throughput scaling (similar to load throughput scaling) Fix: model importer works with latest uops.info export Fix: immediate type tracking on ARM now preserves type in internal representaion Removed unused KerncraftAPI
300 lines
11 KiB
Python
Executable File
300 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import os.path
|
|
import sys
|
|
import xml.etree.ElementTree as ET
|
|
from distutils.version import StrictVersion
|
|
|
|
from osaca.parser import get_parser
|
|
from osaca.semantics import MachineModel
|
|
|
|
intel_archs = [
|
|
"CON",
|
|
"WOL",
|
|
"NHM",
|
|
"WSM",
|
|
"SNB",
|
|
"IVB",
|
|
"HSW",
|
|
"BDW",
|
|
"SKL",
|
|
"SKX",
|
|
"KBL",
|
|
"CFL",
|
|
"CNL",
|
|
"ICL",
|
|
]
|
|
amd_archs = ["ZEN1", "ZEN+", "ZEN2"]
|
|
|
|
|
|
def port_pressure_from_tag_attributes(attrib):
|
|
# '1*p015+1*p1+1*p23+1*p4+3*p5' ->
|
|
# [[1, '015'], [1, '1'], [1, '23'], [1, '4'], [3, '5']]
|
|
port_occupation = []
|
|
for p in attrib["ports"].split("+"):
|
|
cycles, ports = p.split("*")
|
|
ports = ports.lstrip("p")
|
|
ports = ports.lstrip("FP")
|
|
port_occupation.append([int(cycles), ports])
|
|
|
|
# Also consider div on DIV pipeline
|
|
if "div_cycles" in attrib:
|
|
port_occupation.append([int(attrib["div_cycles"]), ["DIV"]])
|
|
|
|
return port_occupation
|
|
|
|
|
|
def extract_paramters(instruction_tag, parser, isa):
|
|
# Extract parameter components
|
|
parameters = [] # used to store string representations
|
|
parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib["idx"]))
|
|
for parameter_tag in parameter_tags:
|
|
parameter = {}
|
|
# Ignore parameters with suppressed=1
|
|
if int(parameter_tag.attrib.get("suppressed", "0")):
|
|
continue
|
|
|
|
p_type = parameter_tag.attrib["type"]
|
|
if p_type == "imm":
|
|
parameter["class"] = "immediate"
|
|
parameter["imd"] = "int"
|
|
parameters.append(parameter)
|
|
elif p_type == "mem":
|
|
parameter["class"] = "memory"
|
|
parameter["base"] = "*"
|
|
parameter["offset"] = "*"
|
|
parameter["index"] = "*"
|
|
parameter["scale"] = "*"
|
|
parameters.append(parameter)
|
|
elif p_type == "reg":
|
|
parameter["class"] = "register"
|
|
possible_regs = [parser.parse_register("%" + r) for r in parameter_tag.text.split(",")]
|
|
if possible_regs[0] is None:
|
|
raise ValueError(
|
|
"Unknown register type for {} with {}.".format(
|
|
parameter_tag.attrib, parameter_tag.text
|
|
)
|
|
)
|
|
if isa == "x86":
|
|
if parser.is_vector_register(possible_regs[0]["register"]):
|
|
possible_regs[0]["register"]["name"] = possible_regs[0]["register"][
|
|
"name"
|
|
].lower()[:3]
|
|
if "mask" in possible_regs[0]["register"]:
|
|
possible_regs[0]["register"]["mask"] = True
|
|
else:
|
|
possible_regs[0]["register"]["name"] = "gpr"
|
|
elif isa == "aarch64":
|
|
del possible_regs["register"]["name"]
|
|
for key in possible_regs[0]["register"]:
|
|
parameter[key] = possible_regs[0]["register"][key]
|
|
parameters.append(parameter)
|
|
elif p_type == "relbr":
|
|
parameter["class"] = "identifier"
|
|
parameters.append(parameter)
|
|
elif p_type == "agen":
|
|
parameter["class"] = "memory"
|
|
parameter["base"] = "*"
|
|
parameter["offset"] = "*"
|
|
parameter["index"] = "*"
|
|
parameter["scale"] = "*"
|
|
parameters.append(parameter)
|
|
else:
|
|
raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib))
|
|
return parameters
|
|
|
|
|
|
def extract_model(tree, arch, skip_mem=True):
|
|
try:
|
|
isa = MachineModel.get_isa_for_arch(arch)
|
|
except Exception:
|
|
print("Skipping...", file=sys.stderr)
|
|
return None
|
|
mm = MachineModel(isa=isa)
|
|
parser = get_parser(isa)
|
|
|
|
for instruction_tag in tree.findall(".//instruction"):
|
|
ignore = False
|
|
|
|
mnemonic = instruction_tag.attrib["asm"]
|
|
iform = instruction_tag.attrib["iform"]
|
|
# reduce to second part if mnemonic contain space (e.g., "REX CRC32")
|
|
if " " in mnemonic:
|
|
mnemonic = mnemonic.split(" ", 1)[1]
|
|
|
|
# Extract parameter components
|
|
try:
|
|
parameters = extract_paramters(instruction_tag, parser, isa)
|
|
if isa == "x86":
|
|
parameters.reverse()
|
|
except ValueError as e:
|
|
print(e, file=sys.stderr)
|
|
|
|
# Extract port occupation, throughput and latency
|
|
port_pressure, throughput, latency, uops = [], None, None, None
|
|
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
|
|
if arch_tag is None:
|
|
continue
|
|
# skip any instructions without port utilization
|
|
if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]):
|
|
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
|
|
continue
|
|
# skip if measured TP is smaller than computed
|
|
if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]),
|
|
float(x.attrib["TP_unrolled"]))
|
|
for x in arch_tag.findall("measurement")][0]:
|
|
print(
|
|
"Calculated TP is greater than measured TP.",
|
|
iform,
|
|
file=sys.stderr,
|
|
)
|
|
# skip if instruction contains memory operand
|
|
if skip_mem and any(
|
|
[x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")]
|
|
):
|
|
print("Contains memory operand, skip: ", iform, file=sys.stderr)
|
|
continue
|
|
# We collect all measurement and IACA information and compare them later
|
|
for measurement_tag in arch_tag.iter("measurement"):
|
|
if "TP_ports" in measurement_tag.attrib:
|
|
throughput = float(measurement_tag.attrib["TP_ports"])
|
|
else:
|
|
throughput = min(
|
|
measurement_tag.attrib.get("TP_loop", float('inf')),
|
|
measurement_tag.attrib.get("TP_unroll", float('inf')),
|
|
measurement_tag.attrib.get("TP", float('inf')),
|
|
)
|
|
if throughput == float('inf'):
|
|
throughput = None
|
|
uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
|
if "ports" in measurement_tag.attrib:
|
|
port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
|
|
latencies = [
|
|
int(l_tag.attrib["cycles"])
|
|
for l_tag in measurement_tag.iter("latency")
|
|
if "cycles" in l_tag.attrib
|
|
]
|
|
if len(latencies) == 0:
|
|
latencies = [
|
|
int(l_tag.attrib["max_cycles"])
|
|
for l_tag in measurement_tag.iter("latency")
|
|
if "max_cycles" in l_tag.attrib
|
|
]
|
|
if latencies[1:] != latencies[:-1]:
|
|
print(
|
|
"Contradicting latencies found, using smallest:",
|
|
iform,
|
|
latencies,
|
|
file=sys.stderr,
|
|
)
|
|
if latencies:
|
|
latency = min(latencies)
|
|
if ignore:
|
|
continue
|
|
|
|
# Ordered by IACA version (newest last)
|
|
for iaca_tag in sorted(
|
|
arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"])
|
|
):
|
|
if "ports" in iaca_tag.attrib:
|
|
port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib))
|
|
|
|
# Check if all are equal
|
|
if port_pressure:
|
|
if port_pressure[1:] != port_pressure[:-1]:
|
|
print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr)
|
|
port_pressure = port_pressure[-1]
|
|
else:
|
|
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
|
continue
|
|
|
|
# Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
|
|
if arch.upper() in intel_archs and not arch.upper() in ["ICL"]:
|
|
if any([p["class"] == "memory" for p in parameters]):
|
|
# We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
|
|
# TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
|
|
port_23 = False
|
|
port_4 = False
|
|
for i, pp in enumerate(port_pressure):
|
|
if "2" in pp[1] and "3" in pp[1]:
|
|
port_23 = True
|
|
if "4" in pp[1]:
|
|
port_4 = True
|
|
# Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
|
|
if port_23 and not port_4:
|
|
if arch.upper() in ["SNB", "IVB"] and any(
|
|
[p.get('name', '') == 'ymm' for p in parameters]) and \
|
|
not '128' in mnemonic:
|
|
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
|
# instruction name
|
|
port2D3D_pressure = 2
|
|
else:
|
|
# otherwiese x = 1
|
|
port2D3D_pressure = 1
|
|
port_pressure.append((port2D3D_pressure, ["2D", "3D"]))
|
|
|
|
# Add missing ports:
|
|
for ports in [pp[1] for pp in port_pressure]:
|
|
for p in ports:
|
|
mm.add_port(p)
|
|
|
|
throughput = max(mm.average_port_pressure(port_pressure))
|
|
mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops)
|
|
# TODO eliminate entries which could be covered by automatic load / store expansion
|
|
return mm
|
|
|
|
|
|
def rhs_comment(uncommented_string, comment):
|
|
max_length = max([len(line) for line in uncommented_string.split("\n")])
|
|
|
|
commented_string = ""
|
|
for line in uncommented_string.split("\n"):
|
|
commented_string += ("{:<" + str(max_length) + "} # {}\n").format(line, comment)
|
|
return commented_string
|
|
|
|
|
|
def architectures(tree):
|
|
return set([a.attrib["name"] for a in tree.findall(".//architecture")])
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("xml", help="path of instructions.xml from http://uops.info")
|
|
parser.add_argument(
|
|
"arch",
|
|
nargs="?",
|
|
help="architecture to extract, use IACA abbreviations (e.g., SNB). "
|
|
"if not given, all will be extracted and saved to file in CWD.",
|
|
)
|
|
parser.add_argument(
|
|
"--mem",
|
|
dest="skip_mem",
|
|
action="store_false",
|
|
help="add instruction forms including memory addressing operands, which are "
|
|
"skipped by default",
|
|
)
|
|
args = parser.parse_args()
|
|
basename = os.path.basename(__file__)
|
|
|
|
tree = ET.parse(args.xml)
|
|
print("# Available architectures:", ", ".join(architectures(tree)))
|
|
if args.arch:
|
|
print("# Chosen architecture: {}".format(args.arch))
|
|
model = extract_model(tree, args.arch, args.skip_mem)
|
|
if model is not None:
|
|
print(rhs_comment(model.dump(), "uops.info import"))
|
|
else:
|
|
for arch in architectures(tree):
|
|
print(arch, end="")
|
|
model = extract_model(tree, arch.lower(), args.skip_mem)
|
|
if model:
|
|
model_string = rhs_comment(model.dump(), basename + " " + arch)
|
|
|
|
with open("{}.yml".format(arch.lower()), "w") as f:
|
|
f.write(model_string)
|
|
print(".")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|