Merge pull request #60 from RRZE-HPC/a72

Add support for ARM Cortex-A72
This commit is contained in:
Jan
2021-10-14 18:10:36 +02:00
committed by GitHub
7 changed files with 5714 additions and 2 deletions

View File

@@ -95,7 +95,7 @@ The usage of OSACA can be listed as:
--arch ARCH
needs to be replaced with the target architecture abbreviation.
Possible options are ``SNB``, ``IVB``, ``HSW``, ``BDW``, ``SKX``, ``CSX`` and ``ICL`` for the latest Intel micro architectures starting from Intel Sandy Bridge and ``ZEN1``, ``ZEN2`` for AMD Zen architectures.
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse and ``A64FX`` for Fujitsu's HPC ARM architecture are available.
Furthermore, ``TX2`` for Marvell`s ARM-based ThunderX2 , ``N1`` for ARM's Neoverse, ``A72`` for ARM Cortex-A72 and ``A64FX`` for Fujitsu's HPC ARM architecture are available.
If no micro-architecture is given, OSACA assumes a default architecture for x86/AArch64.
--fixed
Run the throughput analysis with fixed port utilization for all suitable ports per instruction.

4179
osaca/data/a72.yml Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

808
osaca/data/a72/template.yml Normal file
View File

@@ -0,0 +1,808 @@
osaca_version: 0.3.11
micro_architecture: Cortex A-72
arch_code: a72
isa: aarch64
hidden_loads: false
ports: ['0', '1', '2', '3', '4', '5', '6', '7']
port_model_scheme: |
+-------------------------------------------------------------------------------------+
| scheduler |
+-------------------------------------------------------------------------------------+
0 |I 1 |L 2 |M 3 |S 4 |F1 5 |I 6 |F0 7 |B
\/ \/ \/ \/ \/ \/ \/ \/
+-------+ +-------+ +-------+ +-------+ +-----------+ +-------+ +---------+ +-------+
|INT ALU| | LOAD | | MUL | | STORE | | ASIMD | |INT ALU| | ASIMD | | Branch|
+-------+ +-------+ +-------+ +-------+ +-----------+ +-------+ +---------+ +-------+
+-------+ +-------+ +-----------+ +-------+ +---------+
| AGU | | DIV | | FP ALU | | AGU | |ASIMD MUL|
+-------+ +-------+ +-----------+ +-------+ +---------+
+-------+ +-----------+ +---------+
| SHIFT | | FP MUL | | FP ALU |
+-------+ +-----------+ +---------+
+-------+ +-----------+ +---------+
| CRC | | FP DIV | | FP MUL |
+-------+ +-----------+ +---------+
+-------+ +-----------+ +---------+
| USAD | | FP SQRT | | FP DIV |
+-------+ +-----------+ +---------+
+-----------+ +---------+
|ASIMD SHIFT| | FP CONV |
+-----------+ +---------+
+---------+
| CRYPTO |
+---------+
# The port pressues do not always correctly match this schema, because most
# instructions are imported from an experimentally determined mapping, which
# is not always correct.
load_latency: {x: 4.0, s: 5.0, d: 5.0, h: 6.0, q: 6.0}
load_throughput: []
load_throughput_default: [[1, '1']]
store_throughput: []
store_throughput_default: [[2, '3']]
instruction_forms:
# Branch
- name: b
operands:
- class: identifier
latency: 1.0
port_pressure: [[1, '7']]
throughput: 1.0
- name: bne
operands:
- class: identifier
latency: 1.0
port_pressure: [[1, '7']]
throughput: 1.0
- name: b.ne
operands:
- class: identifier
latency: 1.0
port_pressure: [[1, '7']]
throughput: 1.0
- name: br
operands:
- class: register
prefix: x
latency: 1.0
port_pressure: [[1, '7']]
throughput: 1.0
- name: ret
operands:
- class: register
prefix: x
latency: 1.0
port_pressure: [[1, '7']]
throughput: 1.0
- name: bl
operands:
- class: identifier
latency: 1.0
port_pressure: [[1, '05'], [1, '7']]
throughput: 1.0
- name: blr
operands:
- class: register
prefix: x
latency: 1.0
port_pressure: [[1, '05'], [1, '7']]
throughput: 1.0
# Load GPR
- name: ldr
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 4.0
port_pressure: [[1, '1']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 5.0
port_pressure: [[1, '1'], [1, '05']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 5.0
port_pressure: [[1, '3'], [1, '05']]
throughput: 1.0
# Load FP d
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 5.0
port_pressure: [[1, '1']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 5.0
port_pressure: [[1, '1'], [2, '05']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 5.0
port_pressure: [[1, '1'], [2, '05']]
throughput: 1.0
# Load FP q
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
post-indexed: false
pre-indexed: false
latency: 5.0
port_pressure: [[1, '1']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
post-indexed: true
pre-indexed: false
latency: 5.0
port_pressure: [[1, '1'], [1, '05']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
post-indexed: false
pre-indexed: true
latency: 5.0
port_pressure: [[1, '1'], [1, '05']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 6.0
port_pressure: [[1, '1'], [1, '05']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 6.0
port_pressure: [[1, '1'], [2, '05']]
throughput: 1.0
- name: ldr
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 6.0
port_pressure: [[1, '1'], [2, '05']]
throughput: 1.0
# Store GPR
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 1.0
port_pressure: [[1, '3']]
throughput: 1.0
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 1.0
port_pressure: [[1, '3'], [1, '05']]
throughput: 1.0
- name: str
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 1.0
port_pressure: [[1, '3'], [1, '05']]
throughput: 1.0
# Store FP d
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 1.0
port_pressure: [[1, '3'], [1, '05']]
throughput: 1.0
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 1.0
port_pressure: [[1, '3'], [1, '05']]
throughput: 1.0
- name: str
operands:
- class: register
prefix: d
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 1.0
port_pressure: [[1, '3'], [1, '05']]
throughput: 1.0
# Store FP q
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
post-indexed: false
pre-indexed: false
latency: 4.0
port_pressure: [[2, '3']]
throughput: 2.0
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
post-indexed: true
pre-indexed: false
latency: 4.0
port_pressure: [[2, '3'], [1, '05']]
throughput: 2.0
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: 1
post-indexed: false
pre-indexed: true
latency: 2.0
port_pressure: [[2, '3'], [1, '05']]
throughput: 2.0
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 4.0
port_pressure: [[2, '3'], [1, '05']]
throughput: 2.0
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 4.0
port_pressure: [[2, '3'], [2, '05']]
throughput: 2.0
- name: str
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 4.0
port_pressure: [[2, '3'], [2, '05']]
throughput: 2.0
# Load unscaled GPR
- name: ldur
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: '*'
pre-indexed: '*'
latency: 4.0
port_pressure: [[1, '1']]
throughput: 1.0
# Load unscaled FP q
- name: ldur
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: '*'
pre-indexed: '*'
latency: 5.0
port_pressure: [[1, '1']]
throughput: 1.0
# Store unscaled GPR
- name: stur
operands:
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: '*'
pre-indexed: '*'
latency: 1.0
port_pressure: [[1, '3']]
throughput: 1.0
# Store unscaled FP q
- name: stur
operands:
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: '*'
pre-indexed: '*'
latency: 2.0
port_pressure: [[2, '3']]
throughput: 2.0
# Load pair GPR
- name: ldp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 4.0
port_pressure: [[1, '1']]
throughput: 1.0
- name: ldp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 4.0
port_pressure: [[1, '1'], [1, '05']]
throughput: 1.0
- name: ldp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 4.0
port_pressure: [[1, '1'], [1, '05']]
throughput: 1.0
# Load pair FP q
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 6.0
port_pressure: [[2, '1']]
throughput: 2.0
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 6.0
port_pressure: [[2, '1'], [1, '05']]
throughput: 2.0
- name: ldp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 6.0
port_pressure: [[2, '1'], [1, '05']]
throughput: 2.0
# Store pair GPR
- name: stp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 2.0
port_pressure: [[2, '3']]
throughput: 2.0
- name: stp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 2.0
port_pressure: [[2, '3'], [1, '05']]
throughput: 2.0
- name: stp
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 2.0
port_pressure: [[2, '3'], [1, '05']]
throughput: 2.0
# Store pair FP q
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: false
latency: 4.0
port_pressure: [[4, '3'], [1, '05']]
throughput: 4.0
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: true
pre-indexed: false
latency: 4.0
port_pressure: [[4, '3'], [1, '05']]
throughput: 4.0
- name: stp
operands:
- class: register
prefix: q
- class: register
prefix: q
- class: memory
base: x
offset: '*'
index: '*'
scale: '*'
post-indexed: false
pre-indexed: true
latency: 4.0
port_pressure: [[4, '3'], [1, '05']]
throughput: 4.0
# Fast-forward (measures 4 cycles, but can be 3)
# Lower bound is used in order to ensure no over-estimates are possible.
# Ports do not match documentation, but "fixing" requires also "fixing" almost
# the entire rest of the model.
- name: fadd
operands:
- class: register
prefix: s
- class: register
prefix: s
- class: register
prefix: s
latency: 3.0
port_pressure: [[1, '45']]
throughput: 0.5
- name: fadd
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: register
prefix: d
latency: 3.0
port_pressure: [[1, '45']]
throughput: 0.5
- name: fadd
operands:
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
latency: 3.0
port_pressure: [[1, '5']]
throughput: 1.0
- name: fadd
operands:
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
latency: 3.0
port_pressure: [[1, '5']]
throughput: 1.0
- name: fsub
operands:
- class: register
prefix: s
- class: register
prefix: s
- class: register
prefix: s
latency: 3.0
port_pressure: [[1, '45']]
throughput: 0.5
- name: fsub
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: register
prefix: d
latency: 3.0
port_pressure: [[1, '45']]
throughput: 0.5
- name: fsub
operands:
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
- class: register
prefix: v
shape: s
latency: 3.0
port_pressure: [[1, '5']]
throughput: 1.0
- name: fsub
operands:
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
- class: register
prefix: v
shape: d
latency: 3.0
port_pressure: [[1, '5']]
throughput: 1.0
# Automatically generated instructions

321
osaca/data/pmevo_importer.py Executable file
View File

@@ -0,0 +1,321 @@
#!/usr/bin/env python3
import argparse
import json
import math
import re
import sys
from asmbench import bench, op
from osaca.semantics import MachineModel
def build_bench_instruction(name, operands):
# Converts an OSACA model instruction to an asmbench one.
# Returns `None` in case something went wrong.
asmbench_inst = name
direction = "dst"
separator = " "
shift = ""
for operand in operands:
if operand["class"] == "register" or operand["class"] == "register_shift":
if operand["prefix"] == "x":
shape = "i64"
constraint = "r"
elif operand["prefix"] == "s":
shape = "float"
constraint = "w"
elif operand["prefix"] == "d":
shape = "double"
constraint = "w"
elif operand["prefix"] == "v":
constraint = "w"
if operand["shape"] == "b":
shape = "<16 x i8>"
elif operand["shape"] == "h":
shape = "<8 x i16>"
elif operand["shape"] == "s":
shape = "<4 x float>"
elif operand["shape"] == "d":
shape = "<2 x double>"
else:
return None
else:
return None
if operand["class"] == "register_shift":
shift = ", {}".format(operand["shift_op"])
if operand["shift"] is not None:
shift += " {}".format(operand["shift"])
elif operand["class"] == "immediate" or operand["class"] == "immediate_shift":
shape = "i32"
# Different instructions have different ranges for literaly,
# so need to pick something "reasonable" for each.
if name in [
"cmeq",
"cmge",
"cmgt",
"cmle",
"cmlt",
"fcmeq",
"fcmge",
"fcmgt",
"fcmle",
"fcmlt",
"fcmp",
]:
constraint = "0"
elif name in ["and", "ands", "eor", "eors", "orr", "orrs"]:
constraint = "255"
elif name in ["bfi", "extr", "sbfiz", "sbfx", "shl", "sshr", "ubfiz", "ubfx", "ushr"]:
constraint = "7"
else:
constraint = "42"
if operand["class"] == "immediate_shift":
shift = ", {}".format(operand["shift_op"])
if operand["shift"] is not None:
shift += " {}".format(operand["shift"])
else:
return None
asmbench_inst += "{}{{{}:{}:{}}}{}".format(separator, direction, shape, constraint, shift)
direction = "src"
separator = ", "
return asmbench_inst
def bench_instruction(name, operands):
# Converts an OSACA model instruction to an asmbench one and benchmarks it.
# Returned tuple may contain a `None` in case something went wrong.
asmbench_inst = build_bench_instruction(name, operands)
if asmbench_inst is None:
return (None, None)
return bench.bench_instructions([op.Instruction.from_string(asmbench_inst)])
def round_cycles(value):
if value < 0.9:
# Frequently found, so we might want to include them.
# Measurements over-estimate a lot here, hence the high bound.
return 0.5
else:
# Measurements usually over-estimate, so usually round down,
# but still allow slightly smaller values.
return float(math.floor(value + 0.1))
def operand_parse(op, state):
# Parses an operand from an PMEvo instruction and emits an OSACA model one.
# State object is used to keep track of types for future operands, e.g. literals.
# Future invocations may also modify previously returned objects.
parameter = {}
if op.startswith("_((REG:"):
parts = op.split(".")
register = parts[0][7:-2]
read_write, register_type, bits = register.split(":")
parameter["class"] = "register"
if register_type == "G":
if bits == "32":
parameter["prefix"] = "r"
elif bits == "64":
parameter["prefix"] = "x"
else:
raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
elif register_type == "F":
if bits == "32":
parameter["prefix"] = "s"
state["type"] = "float"
elif bits == "64":
parameter["prefix"] = "d"
state["type"] = "double"
elif bits == "128":
parameter["prefix"] = "q"
elif bits == "VEC":
vec_shape = parts[1]
parameter["prefix"] = "v"
if vec_shape == "16b":
parameter["shape"] = "b"
elif vec_shape == "8h":
parameter["shape"] = "h"
elif vec_shape == "4s":
parameter["shape"] = "s"
state["type"] = "float"
elif vec_shape == "2d":
parameter["shape"] = "d"
state["type"] = "double"
else:
raise ValueError("Invalid vector shape {}".format(vec_shape))
else:
raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
else:
raise ValueError("Unknown register type {}".format(register_type))
elif op.startswith("_[((MEM:"):
bits = op[8:-2].split(":")[0]
if bits == "64":
state["memory_base"] = "x"
else:
raise ValueError("Invalid register bits for MEM {}".format(bits))
return None
elif op.startswith("_((MIMM:"):
bits = op[8:-3].split(":")[0]
if bits == "16":
parameter["class"] = "memory"
parameter["base"] = state["memory_base"]
parameter["offset"] = "imd"
parameter["index"] = "*"
parameter["scale"] = "*"
parameter["post-indexed"] = False
parameter["pre-indexed"] = False
else:
raise ValueError("Invalid register bits for MEM {}".format(bits))
elif re.fullmatch("_#?-?(0x)?[0-9a-f]+", op):
parameter["class"] = "immediate"
parameter["imd"] = "int"
elif re.fullmatch("_#?-?[0-9]*\\.[0-9]*", op):
parameter["class"] = "immediate"
parameter["imd"] = state["type"]
elif re.fullmatch("_((sxt|uxt)[bhw]|lsl|lsr|asr|rol|ror)(_[0-9]+)?", op):
# split = op[1:].split('_')
# shift_op = split[0]
# shift = None
# if len(split) >= 2:
# shift = split[1]
# state['previous']['class'] += '_shift'
# state['previous']['shift_op'] = shift_op
# if shift != None:
# state['previous']['shift'] = shift
# return None
raise ValueError("Skipping instruction with shift operand: {}".format(op))
else:
raise ValueError("Unknown operand {}".format(op))
state["previous"] = parameter
return parameter
def port_convert(ports):
# Try to merge repeated entries together and emit in OSACA's format.
# FIXME: This does not handle having more than 10 ports.
pressures = []
previous = None
cycles = 0
for entry in ports:
possible_ports = "".join(entry)
if possible_ports != previous:
if previous is not None:
pressures.append([cycles, previous])
previous = possible_ports
cycles = 0
cycles += 1
if previous is not None:
pressures.append([cycles, previous])
return pressures
def throughput_guess(ports):
# Minimum amount of possible ports per cycle should determine throughput
# to some degree of accuracy. (THIS IS *NOT* ALWAYS TRUE!)
bottleneck_ports = min(map(lambda it: len(it), ports))
return float(len(ports)) / bottleneck_ports
def latency_guess(ports):
# Each entry in the ports array equates to one cycle on any of the ports.
# So this is about as good as it is going to get.
return float(len(ports))
def extract_model(mapping, arch, template_model, asmbench):
try:
isa = MachineModel.get_isa_for_arch(arch)
except ValueError:
print("Skipping...", file=sys.stderr)
return None
if template_model is None:
mm = MachineModel(isa=isa)
else:
mm = template_model
for port in mapping["arch"]["ports"]:
mm.add_port(port)
for insn in mapping["arch"]["insns"]:
try:
ports = mapping["assignment"][insn]
# Parse instruction
insn_split = insn.split("_")
name = insn_split[1]
insn_parts = list(("_" + "_".join(insn_split[2:])).split(","))
operands = []
state = {}
for operand in insn_parts:
parsed = operand_parse(operand, state)
if parsed is not None:
operands.append(parsed)
# Port pressures from mapping
port_pressure = port_convert(ports)
# Initial guessed throughput and latency
throughput = throughput_guess(ports)
latency = latency_guess(ports)
# Benchmark with asmbench
# print(build_bench_instruction(name, operands))
if asmbench:
bench_latency, bench_throughput = bench_instruction(name, operands)
if bench_throughput is not None:
throughput = round_cycles(bench_throughput)
else:
print("Failed to measure throughput for instruction {}.".format(insn))
if bench_latency is not None:
latency = round_cycles(bench_latency)
else:
print("Failed to measure latency for instruction {}.".format(insn))
# No u-ops data available
uops = None
# Insert instruction if not already found (can happen with template)
if mm.get_instruction(name, operands) is None:
mm.set_instruction(name, operands, latency, port_pressure, throughput, uops)
except ValueError as e:
print("Failed to parse instruction {}: {}.".format(insn, e))
return mm
def main():
parser = argparse.ArgumentParser()
parser.add_argument("json", help="path of mapping.json")
parser.add_argument("yaml", help="path of template.yml", nargs="?")
parser.add_argument(
"--asmbench", help="Benchmark latency and throughput using asmbench.", action="store_true"
)
args = parser.parse_args()
json_file = open(args.json, "r")
mapping = json.load(json_file)
arch = mapping["arch"]["name"].lower()
json_file.close()
template_model = None
if args.yaml is not None:
template_model = MachineModel(path_to_yaml=args.yaml)
if args.asmbench:
bench.setup_llvm()
model = extract_model(mapping, arch, template_model, args.asmbench)
with open("{}.yml".format(arch.lower()), "w") as f:
f.write(model.dump())
if __name__ == "__main__":
main()

View File

@@ -32,6 +32,7 @@ SUPPORTED_ARCHS = [
"TX2",
"N1",
"A64FX",
"A72",
]
DEFAULT_ARCHS = {
"aarch64": "A64FX",
@@ -95,7 +96,7 @@ def create_parser(parser=None):
"--arch",
type=str,
help="Define architecture (SNB, IVB, HSW, BDW, SKX, CSX, ICL, ZEN1, ZEN2, TX2, N1, "
"A64FX). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.",
"A64FX, A72). If no architecture is given, OSACA assumes a default uarch for x86/AArch64.",
)
parser.add_argument(
"--fixed",

View File

@@ -266,11 +266,13 @@ class MachineModel(object):
"""Return ISA for given micro-arch ``arch``."""
arch_dict = {
"a64fx": "aarch64",
"a72": "aarch64",
"tx2": "aarch64",
"n1": "aarch64",
"zen1": "x86",
"zen+": "x86",
"zen2": "x86",
"icl": "x86",
"con": "x86", # Intel Conroe
"wol": "x86", # Intel Wolfdale
"snb": "x86",