mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-15 16:40:05 +01:00
Restore deleted files
This commit is contained in:
401
osaca/data/a72/mapping_pmevo.json
Normal file
401
osaca/data/a72/mapping_pmevo.json
Normal file
File diff suppressed because one or more lines are too long
180
osaca/data/create_db_entry.py
Normal file
180
osaca/data/create_db_entry.py
Normal file
@@ -0,0 +1,180 @@
|
||||
#!/usr/bin/env python3
|
||||
from collections import defaultdict
|
||||
from fractions import Fraction
|
||||
|
||||
|
||||
class EntryBuilder:
|
||||
@staticmethod
|
||||
def compute_throughput(port_pressure):
|
||||
port_occupancy = defaultdict(Fraction)
|
||||
for uops, ports in port_pressure:
|
||||
for p in ports:
|
||||
port_occupancy[p] += Fraction(uops, len(ports))
|
||||
return float(max(list(port_occupancy.values()) + [0]))
|
||||
|
||||
@staticmethod
|
||||
def classify(operands_types):
|
||||
load = "mem" in operands_types[:-1]
|
||||
store = "mem" in operands_types[-1:]
|
||||
vec = False
|
||||
if any([vecr in operands_types for vecr in ["mm", "xmm", "ymm", "zmm"]]):
|
||||
vec = True
|
||||
assert not (load and store), "Can not process a combined load-store instruction."
|
||||
return load, store, vec
|
||||
|
||||
def build_description(
|
||||
self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None
|
||||
):
|
||||
if comment:
|
||||
comment = " # " + comment
|
||||
else:
|
||||
comment = ""
|
||||
description = "- name: {}{}\n operands: {}\n".format(
|
||||
instruction_name, comment, "[]" if len(operand_types) == 0 else ""
|
||||
)
|
||||
|
||||
for ot in operand_types:
|
||||
if ot == "imd":
|
||||
description += " - class: immediate\n imd: int\n"
|
||||
elif ot.startswith("mem"):
|
||||
description += " - class: memory\n" ' base: "*"\n' ' offset: "*"\n'
|
||||
if ot == "mem_simple":
|
||||
description += " index: ~\n"
|
||||
elif ot == "mem_complex":
|
||||
description += " index: gpr\n"
|
||||
else:
|
||||
description += ' index: "*"\n'
|
||||
description += ' scale: "*"\n'
|
||||
else:
|
||||
if "{k}" in ot:
|
||||
description += " - class: register\n name: {}\n mask: True\n".format(
|
||||
ot.replace("{k}", "")
|
||||
)
|
||||
else:
|
||||
description += " - class: register\n name: {}\n".format(ot)
|
||||
|
||||
description += (
|
||||
" latency: {latency}\n"
|
||||
" port_pressure: {port_pressure!r}\n"
|
||||
" throughput: {throughput}\n"
|
||||
" uops: {uops}\n"
|
||||
).format(
|
||||
latency=latency,
|
||||
port_pressure=port_pressure,
|
||||
throughput=self.compute_throughput(port_pressure),
|
||||
uops=sum([i for i, p in port_pressure]),
|
||||
)
|
||||
return description
|
||||
|
||||
def parse_port_pressure(self, port_pressure_str):
|
||||
"""
|
||||
Example:
|
||||
1*p45+2*p0+2*p10,11 -> [[1, '45'], [2, '0'], [2, ['10', '11']]]
|
||||
"""
|
||||
port_pressure = []
|
||||
if port_pressure_str:
|
||||
for p in port_pressure_str.split("+"):
|
||||
cycles, ports = p.split("*p")
|
||||
ports = ports.split(",")
|
||||
if len(ports) == 1:
|
||||
ports = ports[0]
|
||||
else:
|
||||
ports = list(filter(lambda p: len(p) > 0, ports))
|
||||
|
||||
port_pressure.append([int(cycles), ports])
|
||||
return port_pressure
|
||||
|
||||
def process_item(self, instruction_form, resources):
|
||||
"""
|
||||
Example:
|
||||
('mov xmm mem', ('1*p45+2*p0', 7) -> ('mov', ['xmm', 'mem'], [[1, '45'], [2, '0']], 7)
|
||||
"""
|
||||
if instruction_form.startswith("[") and "]" in instruction_form:
|
||||
instr_elements = instruction_form.split("]")
|
||||
instr_elements = [instr_elements[0] + "]"] + instr_elements[1].strip().split(" ")
|
||||
else:
|
||||
instr_elements = instruction_form.split(" ")
|
||||
latency = int(resources[1])
|
||||
port_pressure = self.parse_port_pressure(resources[0])
|
||||
instruction_name = instr_elements[0]
|
||||
operand_types = instr_elements[1:]
|
||||
return self.build_description(instruction_name, operand_types, port_pressure, latency)
|
||||
|
||||
|
||||
class ArchEntryBuilder(EntryBuilder):
|
||||
def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
|
||||
# Intel ICX
|
||||
# LD_pressure = [[1, "23"], [1, ["2D", "3D"]]]
|
||||
# LD_pressure_vec = LD_pressure
|
||||
# ST_pressure = [[1, "79"], [1, "48"]]
|
||||
# ST_pressure_vec = ST_pressure
|
||||
# LD_lat = 5
|
||||
# ST_lat = 0
|
||||
# Zen3
|
||||
LD_pressure = [[1, ["11", "12", "13"]]]
|
||||
LD_pressure_vec = [[1, ["11", "12"]]]
|
||||
ST_pressure = [[1, ["12", "13"]]]
|
||||
ST_pressure_vec = [[1, ["4"]], [1, ["13"]]]
|
||||
LD_lat = 4
|
||||
ST_lat = 0
|
||||
|
||||
load, store, vec = self.classify(operand_types)
|
||||
|
||||
if load:
|
||||
if vec:
|
||||
port_pressure += LD_pressure_vec
|
||||
else:
|
||||
port_pressure += LD_pressure
|
||||
latency += LD_lat
|
||||
comment = "with load"
|
||||
return EntryBuilder.build_description(
|
||||
self, instruction_name, operand_types, port_pressure, latency, comment
|
||||
)
|
||||
if store:
|
||||
if vec:
|
||||
port_pressure = port_pressure + ST_pressure_vec
|
||||
else:
|
||||
port_pressure = port_pressure + ST_pressure
|
||||
operands = ["mem" if o == "mem" else o for o in operand_types]
|
||||
latency += ST_lat
|
||||
return EntryBuilder.build_description(
|
||||
self,
|
||||
instruction_name,
|
||||
operands,
|
||||
port_pressure,
|
||||
latency,
|
||||
"with store",
|
||||
)
|
||||
|
||||
# Register only:
|
||||
return EntryBuilder.build_description(
|
||||
self, instruction_name, operand_types, port_pressure, latency
|
||||
)
|
||||
|
||||
|
||||
def get_description(instruction_form, port_pressure, latency, rhs_comment=None):
|
||||
entry = ArchEntryBuilder().process_item(instruction_form, (port_pressure, latency))
|
||||
|
||||
if rhs_comment is not None:
|
||||
max_length = max([len(line) for line in entry.split("\n")])
|
||||
|
||||
commented_entry = ""
|
||||
for line in entry.split("\n"):
|
||||
commented_entry += ("{:<" + str(max_length) + "} # {}\n").format(line, rhs_comment)
|
||||
entry = commented_entry
|
||||
|
||||
return entry
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) != 4 and len(sys.argv) != 5:
|
||||
print("Usage: {} <INSTRUCTION> <PORT_PRESSURE> <LATENCY> [COMMENT]".format(sys.argv[0]))
|
||||
sys.exit(0)
|
||||
|
||||
try:
|
||||
print(get_description(*sys.argv[1:]))
|
||||
except KeyError:
|
||||
print("Unknown architecture.")
|
||||
sys.exit(1)
|
||||
1644
osaca/data/generate_mov_entries.py
Normal file
1644
osaca/data/generate_mov_entries.py
Normal file
File diff suppressed because it is too large
Load Diff
309
osaca/data/model_importer.py
Normal file
309
osaca/data/model_importer.py
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import os.path
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from distutils.version import StrictVersion
|
||||
|
||||
from osaca.parser import get_parser
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
intel_archs = [
|
||||
"CON",
|
||||
"WOL",
|
||||
"NHM",
|
||||
"WSM",
|
||||
"SNB",
|
||||
"IVB",
|
||||
"HSW",
|
||||
"BDW",
|
||||
"SKL",
|
||||
"SKX",
|
||||
"KBL",
|
||||
"CFL",
|
||||
"CNL",
|
||||
"ICL",
|
||||
]
|
||||
amd_archs = ["ZEN1", "ZEN+", "ZEN2"]
|
||||
|
||||
|
||||
def port_pressure_from_tag_attributes(attrib):
|
||||
# '1*p015+1*p1+1*p23+1*p4+3*p5' ->
|
||||
# [[1, '015'], [1, '1'], [1, '23'], [1, '4'], [3, '5']]
|
||||
port_occupation = []
|
||||
for p in attrib["ports"].split("+"):
|
||||
cycles, ports = p.split("*")
|
||||
ports = ports.lstrip("p")
|
||||
ports = ports.lstrip("FP")
|
||||
port_occupation.append([int(cycles), ports])
|
||||
|
||||
# Also consider div on DIV pipeline
|
||||
if "div_cycles" in attrib:
|
||||
port_occupation.append([int(attrib["div_cycles"]), ["DIV"]])
|
||||
|
||||
return port_occupation
|
||||
|
||||
|
||||
def extract_paramters(instruction_tag, parser, isa):
|
||||
# Extract parameter components
|
||||
parameters = [] # used to store string representations
|
||||
parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib["idx"]))
|
||||
for parameter_tag in parameter_tags:
|
||||
parameter = {}
|
||||
# Ignore parameters with suppressed=1
|
||||
if int(parameter_tag.attrib.get("suppressed", "0")):
|
||||
continue
|
||||
|
||||
p_type = parameter_tag.attrib["type"]
|
||||
if p_type == "imm":
|
||||
parameter["class"] = "immediate"
|
||||
parameter["imd"] = "int"
|
||||
parameters.append(parameter)
|
||||
elif p_type == "mem":
|
||||
parameter["class"] = "memory"
|
||||
parameter["base"] = "*"
|
||||
parameter["offset"] = "*"
|
||||
parameter["index"] = "*"
|
||||
parameter["scale"] = "*"
|
||||
parameters.append(parameter)
|
||||
elif p_type == "reg":
|
||||
parameter["class"] = "register"
|
||||
possible_regs = [parser.parse_register("%" + r) for r in parameter_tag.text.split(",")]
|
||||
if possible_regs[0] is None:
|
||||
raise ValueError(
|
||||
"Unknown register type for {} with {}.".format(
|
||||
parameter_tag.attrib, parameter_tag.text
|
||||
)
|
||||
)
|
||||
if isa == "x86":
|
||||
if parser.is_vector_register(possible_regs[0]["register"]):
|
||||
possible_regs[0]["register"]["name"] = possible_regs[0]["register"][
|
||||
"name"
|
||||
].lower()[:3]
|
||||
if "mask" in possible_regs[0]["register"]:
|
||||
possible_regs[0]["register"]["mask"] = True
|
||||
else:
|
||||
possible_regs[0]["register"]["name"] = "gpr"
|
||||
elif isa == "aarch64":
|
||||
del possible_regs["register"]["name"]
|
||||
for key in possible_regs[0]["register"]:
|
||||
parameter[key] = possible_regs[0]["register"][key]
|
||||
parameters.append(parameter)
|
||||
elif p_type == "relbr":
|
||||
parameter["class"] = "identifier"
|
||||
parameters.append(parameter)
|
||||
elif p_type == "agen":
|
||||
parameter["class"] = "memory"
|
||||
parameter["base"] = "*"
|
||||
parameter["offset"] = "*"
|
||||
parameter["index"] = "*"
|
||||
parameter["scale"] = "*"
|
||||
parameters.append(parameter)
|
||||
else:
|
||||
raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib))
|
||||
return parameters
|
||||
|
||||
|
||||
def extract_model(tree, arch, skip_mem=True):
|
||||
try:
|
||||
isa = MachineModel.get_isa_for_arch(arch)
|
||||
except Exception:
|
||||
print("Skipping...", file=sys.stderr)
|
||||
return None
|
||||
mm = MachineModel(isa=isa)
|
||||
parser = get_parser(isa)
|
||||
|
||||
for instruction_tag in tree.findall(".//instruction"):
|
||||
ignore = False
|
||||
|
||||
mnemonic = instruction_tag.attrib["asm"]
|
||||
iform = instruction_tag.attrib["iform"]
|
||||
# reduce to second part if mnemonic contain space (e.g., "REX CRC32")
|
||||
if " " in mnemonic:
|
||||
mnemonic = mnemonic.split(" ", 1)[1]
|
||||
|
||||
# Extract parameter components
|
||||
try:
|
||||
parameters = extract_paramters(instruction_tag, parser, isa)
|
||||
if isa == "x86":
|
||||
parameters.reverse()
|
||||
except ValueError as e:
|
||||
print(e, file=sys.stderr)
|
||||
|
||||
# Extract port occupation, throughput and latency
|
||||
port_pressure, throughput, latency, uops = [], None, None, None
|
||||
arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
|
||||
if arch_tag is None:
|
||||
continue
|
||||
# skip any instructions without port utilization
|
||||
if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]):
|
||||
print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
|
||||
continue
|
||||
# skip if measured TP is smaller than computed
|
||||
if [
|
||||
float(x.attrib["TP_ports"])
|
||||
> min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
|
||||
for x in arch_tag.findall("measurement")
|
||||
][0]:
|
||||
print(
|
||||
"Calculated TP is greater than measured TP.",
|
||||
iform,
|
||||
file=sys.stderr,
|
||||
)
|
||||
# skip if instruction contains memory operand
|
||||
if skip_mem and any(
|
||||
[x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")]
|
||||
):
|
||||
print("Contains memory operand, skip: ", iform, file=sys.stderr)
|
||||
continue
|
||||
# We collect all measurement and IACA information and compare them later
|
||||
for measurement_tag in arch_tag.iter("measurement"):
|
||||
if "TP_ports" in measurement_tag.attrib:
|
||||
throughput = float(measurement_tag.attrib["TP_ports"])
|
||||
else:
|
||||
throughput = min(
|
||||
measurement_tag.attrib.get("TP_loop", float("inf")),
|
||||
measurement_tag.attrib.get("TP_unroll", float("inf")),
|
||||
measurement_tag.attrib.get("TP", float("inf")),
|
||||
)
|
||||
if throughput == float("inf"):
|
||||
throughput = None
|
||||
uops = (
|
||||
int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
|
||||
)
|
||||
if "ports" in measurement_tag.attrib:
|
||||
port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
|
||||
latencies = [
|
||||
int(l_tag.attrib["cycles"])
|
||||
for l_tag in measurement_tag.iter("latency")
|
||||
if "cycles" in l_tag.attrib
|
||||
]
|
||||
if len(latencies) == 0:
|
||||
latencies = [
|
||||
int(l_tag.attrib["max_cycles"])
|
||||
for l_tag in measurement_tag.iter("latency")
|
||||
if "max_cycles" in l_tag.attrib
|
||||
]
|
||||
if latencies[1:] != latencies[:-1]:
|
||||
print(
|
||||
"Contradicting latencies found, using smallest:",
|
||||
iform,
|
||||
latencies,
|
||||
file=sys.stderr,
|
||||
)
|
||||
if latencies:
|
||||
latency = min(latencies)
|
||||
if ignore:
|
||||
continue
|
||||
|
||||
# Ordered by IACA version (newest last)
|
||||
for iaca_tag in sorted(
|
||||
arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"])
|
||||
):
|
||||
if "ports" in iaca_tag.attrib:
|
||||
port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib))
|
||||
|
||||
# Check if all are equal
|
||||
if port_pressure:
|
||||
if port_pressure[1:] != port_pressure[:-1]:
|
||||
print(
|
||||
"Contradicting port occupancies, using latest IACA:",
|
||||
iform,
|
||||
file=sys.stderr,
|
||||
)
|
||||
port_pressure = port_pressure[-1]
|
||||
else:
|
||||
# print("No data available for this architecture:", mnemonic, file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
|
||||
if arch.upper() in intel_archs and not arch.upper() in ["ICL"]:
|
||||
if any([p["class"] == "memory" for p in parameters]):
|
||||
# We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
|
||||
# TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
|
||||
port_23 = False
|
||||
port_4 = False
|
||||
for i, pp in enumerate(port_pressure):
|
||||
if "2" in pp[1] and "3" in pp[1]:
|
||||
port_23 = True
|
||||
if "4" in pp[1]:
|
||||
port_4 = True
|
||||
# Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
|
||||
if port_23 and not port_4:
|
||||
if (
|
||||
arch.upper() in ["SNB", "IVB"]
|
||||
and any([p.get("name", "") == "ymm" for p in parameters])
|
||||
and not ("128" in mnemonic)
|
||||
):
|
||||
# x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
|
||||
# instruction name
|
||||
port2D3D_pressure = 2
|
||||
else:
|
||||
# otherwiese x = 1
|
||||
port2D3D_pressure = 1
|
||||
port_pressure.append((port2D3D_pressure, ["2D", "3D"]))
|
||||
|
||||
# Add missing ports:
|
||||
for ports in [pp[1] for pp in port_pressure]:
|
||||
for p in ports:
|
||||
mm.add_port(p)
|
||||
|
||||
throughput = max(mm.average_port_pressure(port_pressure))
|
||||
mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops)
|
||||
# TODO eliminate entries which could be covered by automatic load / store expansion
|
||||
return mm
|
||||
|
||||
|
||||
def rhs_comment(uncommented_string, comment):
|
||||
max_length = max([len(line) for line in uncommented_string.split("\n")])
|
||||
|
||||
commented_string = ""
|
||||
for line in uncommented_string.split("\n"):
|
||||
commented_string += ("{:<" + str(max_length) + "} # {}\n").format(line, comment)
|
||||
return commented_string
|
||||
|
||||
|
||||
def architectures(tree):
|
||||
return set([a.attrib["name"] for a in tree.findall(".//architecture")])
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("xml", help="path of instructions.xml from http://uops.info")
|
||||
parser.add_argument(
|
||||
"arch",
|
||||
nargs="?",
|
||||
help="architecture to extract, use IACA abbreviations (e.g., SNB). "
|
||||
"if not given, all will be extracted and saved to file in CWD.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mem",
|
||||
dest="skip_mem",
|
||||
action="store_false",
|
||||
help="add instruction forms including memory addressing operands, which are "
|
||||
"skipped by default",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
basename = os.path.basename(__file__)
|
||||
|
||||
tree = ET.parse(args.xml)
|
||||
print("# Available architectures:", ", ".join(architectures(tree)))
|
||||
if args.arch:
|
||||
print("# Chosen architecture: {}".format(args.arch))
|
||||
model = extract_model(tree, args.arch, args.skip_mem)
|
||||
if model is not None:
|
||||
print(rhs_comment(model.dump(), "uops.info import"))
|
||||
else:
|
||||
for arch in architectures(tree):
|
||||
print(arch, end="")
|
||||
model = extract_model(tree, arch.lower(), args.skip_mem)
|
||||
if model:
|
||||
model_string = rhs_comment(model.dump(), basename + " " + arch)
|
||||
|
||||
with open("{}.yml".format(arch.lower()), "w") as f:
|
||||
f.write(model_string)
|
||||
print(".")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
321
osaca/data/pmevo_importer.py
Normal file
321
osaca/data/pmevo_importer.py
Normal file
@@ -0,0 +1,321 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import sys
|
||||
|
||||
from asmbench import bench, op
|
||||
from osaca.semantics import MachineModel
|
||||
|
||||
|
||||
def build_bench_instruction(name, operands):
|
||||
# Converts an OSACA model instruction to an asmbench one.
|
||||
# Returns `None` in case something went wrong.
|
||||
asmbench_inst = name
|
||||
direction = "dst"
|
||||
separator = " "
|
||||
shift = ""
|
||||
for operand in operands:
|
||||
if operand["class"] == "register" or operand["class"] == "register_shift":
|
||||
if operand["prefix"] == "x":
|
||||
shape = "i64"
|
||||
constraint = "r"
|
||||
elif operand["prefix"] == "s":
|
||||
shape = "float"
|
||||
constraint = "w"
|
||||
elif operand["prefix"] == "d":
|
||||
shape = "double"
|
||||
constraint = "w"
|
||||
elif operand["prefix"] == "v":
|
||||
constraint = "w"
|
||||
if operand["shape"] == "b":
|
||||
shape = "<16 x i8>"
|
||||
elif operand["shape"] == "h":
|
||||
shape = "<8 x i16>"
|
||||
elif operand["shape"] == "s":
|
||||
shape = "<4 x float>"
|
||||
elif operand["shape"] == "d":
|
||||
shape = "<2 x double>"
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
if operand["class"] == "register_shift":
|
||||
shift = ", {}".format(operand["shift_op"])
|
||||
if operand["shift"] is not None:
|
||||
shift += " {}".format(operand["shift"])
|
||||
elif operand["class"] == "immediate" or operand["class"] == "immediate_shift":
|
||||
shape = "i32"
|
||||
# Different instructions have different ranges for literaly,
|
||||
# so need to pick something "reasonable" for each.
|
||||
if name in [
|
||||
"cmeq",
|
||||
"cmge",
|
||||
"cmgt",
|
||||
"cmle",
|
||||
"cmlt",
|
||||
"fcmeq",
|
||||
"fcmge",
|
||||
"fcmgt",
|
||||
"fcmle",
|
||||
"fcmlt",
|
||||
"fcmp",
|
||||
]:
|
||||
constraint = "0"
|
||||
elif name in ["and", "ands", "eor", "eors", "orr", "orrs"]:
|
||||
constraint = "255"
|
||||
elif name in ["bfi", "extr", "sbfiz", "sbfx", "shl", "sshr", "ubfiz", "ubfx", "ushr"]:
|
||||
constraint = "7"
|
||||
else:
|
||||
constraint = "42"
|
||||
if operand["class"] == "immediate_shift":
|
||||
shift = ", {}".format(operand["shift_op"])
|
||||
if operand["shift"] is not None:
|
||||
shift += " {}".format(operand["shift"])
|
||||
else:
|
||||
return None
|
||||
asmbench_inst += "{}{{{}:{}:{}}}{}".format(separator, direction, shape, constraint, shift)
|
||||
direction = "src"
|
||||
separator = ", "
|
||||
return asmbench_inst
|
||||
|
||||
|
||||
def bench_instruction(name, operands):
|
||||
# Converts an OSACA model instruction to an asmbench one and benchmarks it.
|
||||
# Returned tuple may contain a `None` in case something went wrong.
|
||||
asmbench_inst = build_bench_instruction(name, operands)
|
||||
if asmbench_inst is None:
|
||||
return (None, None)
|
||||
return bench.bench_instructions([op.Instruction.from_string(asmbench_inst)])
|
||||
|
||||
|
||||
def round_cycles(value):
|
||||
if value < 0.9:
|
||||
# Frequently found, so we might want to include them.
|
||||
# Measurements over-estimate a lot here, hence the high bound.
|
||||
return 0.5
|
||||
else:
|
||||
# Measurements usually over-estimate, so usually round down,
|
||||
# but still allow slightly smaller values.
|
||||
return float(math.floor(value + 0.1))
|
||||
|
||||
|
||||
def operand_parse(op, state):
|
||||
# Parses an operand from an PMEvo instruction and emits an OSACA model one.
|
||||
# State object is used to keep track of types for future operands, e.g. literals.
|
||||
# Future invocations may also modify previously returned objects.
|
||||
parameter = {}
|
||||
|
||||
if op.startswith("_((REG:"):
|
||||
parts = op.split(".")
|
||||
register = parts[0][7:-2]
|
||||
read_write, register_type, bits = register.split(":")
|
||||
|
||||
parameter["class"] = "register"
|
||||
if register_type == "G":
|
||||
if bits == "32":
|
||||
parameter["prefix"] = "r"
|
||||
elif bits == "64":
|
||||
parameter["prefix"] = "x"
|
||||
else:
|
||||
raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
|
||||
elif register_type == "F":
|
||||
if bits == "32":
|
||||
parameter["prefix"] = "s"
|
||||
state["type"] = "float"
|
||||
elif bits == "64":
|
||||
parameter["prefix"] = "d"
|
||||
state["type"] = "double"
|
||||
elif bits == "128":
|
||||
parameter["prefix"] = "q"
|
||||
elif bits == "VEC":
|
||||
vec_shape = parts[1]
|
||||
parameter["prefix"] = "v"
|
||||
if vec_shape == "16b":
|
||||
parameter["shape"] = "b"
|
||||
elif vec_shape == "8h":
|
||||
parameter["shape"] = "h"
|
||||
elif vec_shape == "4s":
|
||||
parameter["shape"] = "s"
|
||||
state["type"] = "float"
|
||||
elif vec_shape == "2d":
|
||||
parameter["shape"] = "d"
|
||||
state["type"] = "double"
|
||||
else:
|
||||
raise ValueError("Invalid vector shape {}".format(vec_shape))
|
||||
else:
|
||||
raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
|
||||
else:
|
||||
raise ValueError("Unknown register type {}".format(register_type))
|
||||
elif op.startswith("_[((MEM:"):
|
||||
bits = op[8:-2].split(":")[0]
|
||||
if bits == "64":
|
||||
state["memory_base"] = "x"
|
||||
else:
|
||||
raise ValueError("Invalid register bits for MEM {}".format(bits))
|
||||
return None
|
||||
elif op.startswith("_((MIMM:"):
|
||||
bits = op[8:-3].split(":")[0]
|
||||
if bits == "16":
|
||||
parameter["class"] = "memory"
|
||||
parameter["base"] = state["memory_base"]
|
||||
parameter["offset"] = "imd"
|
||||
parameter["index"] = "*"
|
||||
parameter["scale"] = "*"
|
||||
parameter["post-indexed"] = False
|
||||
parameter["pre-indexed"] = False
|
||||
else:
|
||||
raise ValueError("Invalid register bits for MEM {}".format(bits))
|
||||
elif re.fullmatch("_#?-?(0x)?[0-9a-f]+", op):
|
||||
parameter["class"] = "immediate"
|
||||
parameter["imd"] = "int"
|
||||
elif re.fullmatch("_#?-?[0-9]*\\.[0-9]*", op):
|
||||
parameter["class"] = "immediate"
|
||||
parameter["imd"] = state["type"]
|
||||
elif re.fullmatch("_((sxt|uxt)[bhw]|lsl|lsr|asr|rol|ror)(_[0-9]+)?", op):
|
||||
# split = op[1:].split('_')
|
||||
# shift_op = split[0]
|
||||
# shift = None
|
||||
# if len(split) >= 2:
|
||||
# shift = split[1]
|
||||
# state['previous']['class'] += '_shift'
|
||||
# state['previous']['shift_op'] = shift_op
|
||||
# if shift != None:
|
||||
# state['previous']['shift'] = shift
|
||||
# return None
|
||||
raise ValueError("Skipping instruction with shift operand: {}".format(op))
|
||||
else:
|
||||
raise ValueError("Unknown operand {}".format(op))
|
||||
|
||||
state["previous"] = parameter
|
||||
return parameter
|
||||
|
||||
|
||||
def port_convert(ports):
|
||||
# Try to merge repeated entries together and emit in OSACA's format.
|
||||
# FIXME: This does not handle having more than 10 ports.
|
||||
pressures = []
|
||||
previous = None
|
||||
cycles = 0
|
||||
|
||||
for entry in ports:
|
||||
possible_ports = "".join(entry)
|
||||
|
||||
if possible_ports != previous:
|
||||
if previous is not None:
|
||||
pressures.append([cycles, previous])
|
||||
previous = possible_ports
|
||||
cycles = 0
|
||||
|
||||
cycles += 1
|
||||
|
||||
if previous is not None:
|
||||
pressures.append([cycles, previous])
|
||||
|
||||
return pressures
|
||||
|
||||
|
||||
def throughput_guess(ports):
|
||||
# Minimum amount of possible ports per cycle should determine throughput
|
||||
# to some degree of accuracy. (THIS IS *NOT* ALWAYS TRUE!)
|
||||
bottleneck_ports = min(map(lambda it: len(it), ports))
|
||||
return float(len(ports)) / bottleneck_ports
|
||||
|
||||
|
||||
def latency_guess(ports):
|
||||
# Each entry in the ports array equates to one cycle on any of the ports.
|
||||
# So this is about as good as it is going to get.
|
||||
return float(len(ports))
|
||||
|
||||
|
||||
def extract_model(mapping, arch, template_model, asmbench):
|
||||
try:
|
||||
isa = MachineModel.get_isa_for_arch(arch)
|
||||
except ValueError:
|
||||
print("Skipping...", file=sys.stderr)
|
||||
return None
|
||||
if template_model is None:
|
||||
mm = MachineModel(isa=isa)
|
||||
else:
|
||||
mm = template_model
|
||||
|
||||
for port in mapping["arch"]["ports"]:
|
||||
mm.add_port(port)
|
||||
|
||||
for insn in mapping["arch"]["insns"]:
|
||||
try:
|
||||
ports = mapping["assignment"][insn]
|
||||
|
||||
# Parse instruction
|
||||
insn_split = insn.split("_")
|
||||
name = insn_split[1]
|
||||
insn_parts = list(("_" + "_".join(insn_split[2:])).split(","))
|
||||
operands = []
|
||||
state = {}
|
||||
for operand in insn_parts:
|
||||
parsed = operand_parse(operand, state)
|
||||
if parsed is not None:
|
||||
operands.append(parsed)
|
||||
|
||||
# Port pressures from mapping
|
||||
port_pressure = port_convert(ports)
|
||||
|
||||
# Initial guessed throughput and latency
|
||||
throughput = throughput_guess(ports)
|
||||
latency = latency_guess(ports)
|
||||
|
||||
# Benchmark with asmbench
|
||||
# print(build_bench_instruction(name, operands))
|
||||
if asmbench:
|
||||
bench_latency, bench_throughput = bench_instruction(name, operands)
|
||||
if bench_throughput is not None:
|
||||
throughput = round_cycles(bench_throughput)
|
||||
else:
|
||||
print("Failed to measure throughput for instruction {}.".format(insn))
|
||||
if bench_latency is not None:
|
||||
latency = round_cycles(bench_latency)
|
||||
else:
|
||||
print("Failed to measure latency for instruction {}.".format(insn))
|
||||
|
||||
# No u-ops data available
|
||||
uops = None
|
||||
|
||||
# Insert instruction if not already found (can happen with template)
|
||||
if mm.get_instruction(name, operands) is None:
|
||||
mm.set_instruction(name, operands, latency, port_pressure, throughput, uops)
|
||||
except ValueError as e:
|
||||
print("Failed to parse instruction {}: {}.".format(insn, e))
|
||||
|
||||
return mm
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("json", help="path of mapping.json")
|
||||
parser.add_argument("yaml", help="path of template.yml", nargs="?")
|
||||
parser.add_argument(
|
||||
"--asmbench", help="Benchmark latency and throughput using asmbench.", action="store_true"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
json_file = open(args.json, "r")
|
||||
mapping = json.load(json_file)
|
||||
arch = mapping["arch"]["name"].lower()
|
||||
json_file.close()
|
||||
|
||||
template_model = None
|
||||
if args.yaml is not None:
|
||||
template_model = MachineModel(path_to_yaml=args.yaml)
|
||||
|
||||
if args.asmbench:
|
||||
bench.setup_llvm()
|
||||
|
||||
model = extract_model(mapping, arch, template_model, args.asmbench)
|
||||
|
||||
with open("{}.yml".format(arch.lower()), "w") as f:
|
||||
f.write(model.dump())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user