Restore deleted files

2025-12-15 16:40:05 +01:00 · 2024-02-22 13:37:13 +01:00
parent ec798f61b2
commit 6df973d16a
5 changed files with 2855 additions and 0 deletions
--- a/osaca/data/a72/mapping_pmevo.json
+++ b/osaca/data/a72/mapping_pmevo.json
--- a/osaca/data/create_db_entry.py
+++ b/osaca/data/create_db_entry.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+from collections import defaultdict
+from fractions import Fraction
+
+
+class EntryBuilder:
+    @staticmethod
+    def compute_throughput(port_pressure):
+        port_occupancy = defaultdict(Fraction)
+        for uops, ports in port_pressure:
+            for p in ports:
+                port_occupancy[p] += Fraction(uops, len(ports))
+        return float(max(list(port_occupancy.values()) + [0]))
+
+    @staticmethod
+    def classify(operands_types):
+        load = "mem" in operands_types[:-1]
+        store = "mem" in operands_types[-1:]
+        vec = False
+        if any([vecr in operands_types for vecr in ["mm", "xmm", "ymm", "zmm"]]):
+            vec = True
+        assert not (load and store), "Can not process a combined load-store instruction."
+        return load, store, vec
+
+    def build_description(
+        self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None
+    ):
+        if comment:
+            comment = "  # " + comment
+        else:
+            comment = ""
+        description = "- name: {}{}\n  operands: {}\n".format(
+            instruction_name, comment, "[]" if len(operand_types) == 0 else ""
+        )
+
+        for ot in operand_types:
+            if ot == "imd":
+                description += "  - class: immediate\n    imd: int\n"
+            elif ot.startswith("mem"):
+                description += "  - class: memory\n" '    base: "*"\n' '    offset: "*"\n'
+                if ot == "mem_simple":
+                    description += "    index: ~\n"
+                elif ot == "mem_complex":
+                    description += "    index: gpr\n"
+                else:
+                    description += '    index: "*"\n'
+                description += '    scale: "*"\n'
+            else:
+                if "{k}" in ot:
+                    description += "  - class: register\n    name: {}\n    mask: True\n".format(
+                        ot.replace("{k}", "")
+                    )
+                else:
+                    description += "  - class: register\n    name: {}\n".format(ot)
+
+        description += (
+            "  latency: {latency}\n"
+            "  port_pressure: {port_pressure!r}\n"
+            "  throughput: {throughput}\n"
+            "  uops: {uops}\n"
+        ).format(
+            latency=latency,
+            port_pressure=port_pressure,
+            throughput=self.compute_throughput(port_pressure),
+            uops=sum([i for i, p in port_pressure]),
+        )
+        return description
+
+    def parse_port_pressure(self, port_pressure_str):
+        """
+        Example:
+        1*p45+2*p0+2*p10,11 -> [[1, '45'], [2, '0'], [2, ['10', '11']]]
+        """
+        port_pressure = []
+        if port_pressure_str:
+            for p in port_pressure_str.split("+"):
+                cycles, ports = p.split("*p")
+                ports = ports.split(",")
+                if len(ports) == 1:
+                    ports = ports[0]
+                else:
+                    ports = list(filter(lambda p: len(p) > 0, ports))
+
+                port_pressure.append([int(cycles), ports])
+        return port_pressure
+
+    def process_item(self, instruction_form, resources):
+        """
+        Example:
+        ('mov xmm mem', ('1*p45+2*p0', 7) -> ('mov', ['xmm', 'mem'], [[1, '45'], [2, '0']], 7)
+        """
+        if instruction_form.startswith("[") and "]" in instruction_form:
+            instr_elements = instruction_form.split("]")
+            instr_elements = [instr_elements[0] + "]"] + instr_elements[1].strip().split(" ")
+        else:
+            instr_elements = instruction_form.split(" ")
+        latency = int(resources[1])
+        port_pressure = self.parse_port_pressure(resources[0])
+        instruction_name = instr_elements[0]
+        operand_types = instr_elements[1:]
+        return self.build_description(instruction_name, operand_types, port_pressure, latency)
+
+
+class ArchEntryBuilder(EntryBuilder):
+    def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0):
+        # Intel ICX
+        # LD_pressure = [[1, "23"], [1, ["2D", "3D"]]]
+        # LD_pressure_vec = LD_pressure
+        # ST_pressure = [[1, "79"], [1, "48"]]
+        # ST_pressure_vec = ST_pressure
+        # LD_lat = 5
+        # ST_lat = 0
+        # Zen3
+        LD_pressure = [[1, ["11", "12", "13"]]]
+        LD_pressure_vec = [[1, ["11", "12"]]]
+        ST_pressure = [[1, ["12", "13"]]]
+        ST_pressure_vec = [[1, ["4"]], [1, ["13"]]]
+        LD_lat = 4
+        ST_lat = 0
+
+        load, store, vec = self.classify(operand_types)
+
+        if load:
+            if vec:
+                port_pressure += LD_pressure_vec
+            else:
+                port_pressure += LD_pressure
+            latency += LD_lat
+            comment = "with load"
+            return EntryBuilder.build_description(
+                self, instruction_name, operand_types, port_pressure, latency, comment
+            )
+        if store:
+            if vec:
+                port_pressure = port_pressure + ST_pressure_vec
+            else:
+                port_pressure = port_pressure + ST_pressure
+            operands = ["mem" if o == "mem" else o for o in operand_types]
+            latency += ST_lat
+            return EntryBuilder.build_description(
+                self,
+                instruction_name,
+                operands,
+                port_pressure,
+                latency,
+                "with store",
+            )
+
+        # Register only:
+        return EntryBuilder.build_description(
+            self, instruction_name, operand_types, port_pressure, latency
+        )
+
+
+def get_description(instruction_form, port_pressure, latency, rhs_comment=None):
+    entry = ArchEntryBuilder().process_item(instruction_form, (port_pressure, latency))
+
+    if rhs_comment is not None:
+        max_length = max([len(line) for line in entry.split("\n")])
+
+        commented_entry = ""
+        for line in entry.split("\n"):
+            commented_entry += ("{:<" + str(max_length) + "}  # {}\n").format(line, rhs_comment)
+        entry = commented_entry
+
+    return entry
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) != 4 and len(sys.argv) != 5:
+        print("Usage: {} <INSTRUCTION> <PORT_PRESSURE> <LATENCY> [COMMENT]".format(sys.argv[0]))
+        sys.exit(0)
+
+    try:
+        print(get_description(*sys.argv[1:]))
+    except KeyError:
+        print("Unknown architecture.")
+        sys.exit(1)
--- a/osaca/data/generate_mov_entries.py
+++ b/osaca/data/generate_mov_entries.py
--- a/osaca/data/model_importer.py
+++ b/osaca/data/model_importer.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+import argparse
+import os.path
+import sys
+import xml.etree.ElementTree as ET
+from distutils.version import StrictVersion
+
+from osaca.parser import get_parser
+from osaca.semantics import MachineModel
+
+intel_archs = [
+    "CON",
+    "WOL",
+    "NHM",
+    "WSM",
+    "SNB",
+    "IVB",
+    "HSW",
+    "BDW",
+    "SKL",
+    "SKX",
+    "KBL",
+    "CFL",
+    "CNL",
+    "ICL",
+]
+amd_archs = ["ZEN1", "ZEN+", "ZEN2"]
+
+
+def port_pressure_from_tag_attributes(attrib):
+    # '1*p015+1*p1+1*p23+1*p4+3*p5' ->
+    # [[1, '015'], [1, '1'], [1, '23'], [1, '4'], [3, '5']]
+    port_occupation = []
+    for p in attrib["ports"].split("+"):
+        cycles, ports = p.split("*")
+        ports = ports.lstrip("p")
+        ports = ports.lstrip("FP")
+        port_occupation.append([int(cycles), ports])
+
+    # Also consider div on DIV pipeline
+    if "div_cycles" in attrib:
+        port_occupation.append([int(attrib["div_cycles"]), ["DIV"]])
+
+    return port_occupation
+
+
+def extract_paramters(instruction_tag, parser, isa):
+    # Extract parameter components
+    parameters = []  # used to store string representations
+    parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib["idx"]))
+    for parameter_tag in parameter_tags:
+        parameter = {}
+        # Ignore parameters with suppressed=1
+        if int(parameter_tag.attrib.get("suppressed", "0")):
+            continue
+
+        p_type = parameter_tag.attrib["type"]
+        if p_type == "imm":
+            parameter["class"] = "immediate"
+            parameter["imd"] = "int"
+            parameters.append(parameter)
+        elif p_type == "mem":
+            parameter["class"] = "memory"
+            parameter["base"] = "*"
+            parameter["offset"] = "*"
+            parameter["index"] = "*"
+            parameter["scale"] = "*"
+            parameters.append(parameter)
+        elif p_type == "reg":
+            parameter["class"] = "register"
+            possible_regs = [parser.parse_register("%" + r) for r in parameter_tag.text.split(",")]
+            if possible_regs[0] is None:
+                raise ValueError(
+                    "Unknown register type for {} with {}.".format(
+                        parameter_tag.attrib, parameter_tag.text
+                    )
+                )
+            if isa == "x86":
+                if parser.is_vector_register(possible_regs[0]["register"]):
+                    possible_regs[0]["register"]["name"] = possible_regs[0]["register"][
+                        "name"
+                    ].lower()[:3]
+                    if "mask" in possible_regs[0]["register"]:
+                        possible_regs[0]["register"]["mask"] = True
+                else:
+                    possible_regs[0]["register"]["name"] = "gpr"
+            elif isa == "aarch64":
+                del possible_regs["register"]["name"]
+            for key in possible_regs[0]["register"]:
+                parameter[key] = possible_regs[0]["register"][key]
+            parameters.append(parameter)
+        elif p_type == "relbr":
+            parameter["class"] = "identifier"
+            parameters.append(parameter)
+        elif p_type == "agen":
+            parameter["class"] = "memory"
+            parameter["base"] = "*"
+            parameter["offset"] = "*"
+            parameter["index"] = "*"
+            parameter["scale"] = "*"
+            parameters.append(parameter)
+        else:
+            raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib))
+    return parameters
+
+
+def extract_model(tree, arch, skip_mem=True):
+    try:
+        isa = MachineModel.get_isa_for_arch(arch)
+    except Exception:
+        print("Skipping...", file=sys.stderr)
+        return None
+    mm = MachineModel(isa=isa)
+    parser = get_parser(isa)
+
+    for instruction_tag in tree.findall(".//instruction"):
+        ignore = False
+
+        mnemonic = instruction_tag.attrib["asm"]
+        iform = instruction_tag.attrib["iform"]
+        # reduce to second part if mnemonic contain space (e.g., "REX CRC32")
+        if " " in mnemonic:
+            mnemonic = mnemonic.split(" ", 1)[1]
+
+        # Extract parameter components
+        try:
+            parameters = extract_paramters(instruction_tag, parser, isa)
+            if isa == "x86":
+                parameters.reverse()
+        except ValueError as e:
+            print(e, file=sys.stderr)
+
+        # Extract port occupation, throughput and latency
+        port_pressure, throughput, latency, uops = [], None, None, None
+        arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]')
+        if arch_tag is None:
+            continue
+        # skip any instructions without port utilization
+        if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]):
+            print("Couldn't find port utilization, skip: ", iform, file=sys.stderr)
+            continue
+        # skip if measured TP is smaller than computed
+        if [
+            float(x.attrib["TP_ports"])
+            > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"]))
+            for x in arch_tag.findall("measurement")
+        ][0]:
+            print(
+                "Calculated TP is greater than measured TP.",
+                iform,
+                file=sys.stderr,
+            )
+        # skip if instruction contains memory operand
+        if skip_mem and any(
+            [x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")]
+        ):
+            print("Contains memory operand, skip: ", iform, file=sys.stderr)
+            continue
+        # We collect all measurement and IACA information and compare them later
+        for measurement_tag in arch_tag.iter("measurement"):
+            if "TP_ports" in measurement_tag.attrib:
+                throughput = float(measurement_tag.attrib["TP_ports"])
+            else:
+                throughput = min(
+                    measurement_tag.attrib.get("TP_loop", float("inf")),
+                    measurement_tag.attrib.get("TP_unroll", float("inf")),
+                    measurement_tag.attrib.get("TP", float("inf")),
+                )
+                if throughput == float("inf"):
+                    throughput = None
+            uops = (
+                int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None
+            )
+            if "ports" in measurement_tag.attrib:
+                port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib))
+            latencies = [
+                int(l_tag.attrib["cycles"])
+                for l_tag in measurement_tag.iter("latency")
+                if "cycles" in l_tag.attrib
+            ]
+            if len(latencies) == 0:
+                latencies = [
+                    int(l_tag.attrib["max_cycles"])
+                    for l_tag in measurement_tag.iter("latency")
+                    if "max_cycles" in l_tag.attrib
+                ]
+            if latencies[1:] != latencies[:-1]:
+                print(
+                    "Contradicting latencies found, using smallest:",
+                    iform,
+                    latencies,
+                    file=sys.stderr,
+                )
+            if latencies:
+                latency = min(latencies)
+        if ignore:
+            continue
+
+        # Ordered by IACA version (newest last)
+        for iaca_tag in sorted(
+            arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"])
+        ):
+            if "ports" in iaca_tag.attrib:
+                port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib))
+
+        # Check if all are equal
+        if port_pressure:
+            if port_pressure[1:] != port_pressure[:-1]:
+                print(
+                    "Contradicting port occupancies, using latest IACA:",
+                    iform,
+                    file=sys.stderr,
+                )
+            port_pressure = port_pressure[-1]
+        else:
+            # print("No data available for this architecture:", mnemonic, file=sys.stderr)
+            continue
+
+        # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake:
+        if arch.upper() in intel_archs and not arch.upper() in ["ICL"]:
+            if any([p["class"] == "memory" for p in parameters]):
+                # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D
+                # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode
+                port_23 = False
+                port_4 = False
+                for i, pp in enumerate(port_pressure):
+                    if "2" in pp[1] and "3" in pp[1]:
+                        port_23 = True
+                    if "4" in pp[1]:
+                        port_4 = True
+                # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4)
+                if port_23 and not port_4:
+                    if (
+                        arch.upper() in ["SNB", "IVB"]
+                        and any([p.get("name", "") == "ymm" for p in parameters])
+                        and not ("128" in mnemonic)
+                    ):
+                        # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in
+                        # instruction name
+                        port2D3D_pressure = 2
+                    else:
+                        # otherwiese x = 1
+                        port2D3D_pressure = 1
+                    port_pressure.append((port2D3D_pressure, ["2D", "3D"]))
+
+        # Add missing ports:
+        for ports in [pp[1] for pp in port_pressure]:
+            for p in ports:
+                mm.add_port(p)
+
+        throughput = max(mm.average_port_pressure(port_pressure))
+        mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops)
+    # TODO eliminate entries which could be covered by automatic load / store expansion
+    return mm
+
+
+def rhs_comment(uncommented_string, comment):
+    max_length = max([len(line) for line in uncommented_string.split("\n")])
+
+    commented_string = ""
+    for line in uncommented_string.split("\n"):
+        commented_string += ("{:<" + str(max_length) + "}  # {}\n").format(line, comment)
+    return commented_string
+
+
+def architectures(tree):
+    return set([a.attrib["name"] for a in tree.findall(".//architecture")])
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("xml", help="path of instructions.xml from http://uops.info")
+    parser.add_argument(
+        "arch",
+        nargs="?",
+        help="architecture to extract, use IACA abbreviations (e.g., SNB). "
+        "if not given, all will be extracted and saved to file in CWD.",
+    )
+    parser.add_argument(
+        "--mem",
+        dest="skip_mem",
+        action="store_false",
+        help="add instruction forms including memory addressing operands, which are "
+        "skipped by default",
+    )
+    args = parser.parse_args()
+    basename = os.path.basename(__file__)
+
+    tree = ET.parse(args.xml)
+    print("# Available architectures:", ", ".join(architectures(tree)))
+    if args.arch:
+        print("# Chosen architecture: {}".format(args.arch))
+        model = extract_model(tree, args.arch, args.skip_mem)
+        if model is not None:
+            print(rhs_comment(model.dump(), "uops.info import"))
+    else:
+        for arch in architectures(tree):
+            print(arch, end="")
+            model = extract_model(tree, arch.lower(), args.skip_mem)
+            if model:
+                model_string = rhs_comment(model.dump(), basename + " " + arch)
+
+                with open("{}.yml".format(arch.lower()), "w") as f:
+                    f.write(model_string)
+                print(".")
+
+
+if __name__ == "__main__":
+    main()
--- a/osaca/data/pmevo_importer.py
+++ b/osaca/data/pmevo_importer.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import math
+import re
+import sys
+
+from asmbench import bench, op
+from osaca.semantics import MachineModel
+
+
+def build_bench_instruction(name, operands):
+    # Converts an OSACA model instruction to an asmbench one.
+    # Returns `None` in case something went wrong.
+    asmbench_inst = name
+    direction = "dst"
+    separator = " "
+    shift = ""
+    for operand in operands:
+        if operand["class"] == "register" or operand["class"] == "register_shift":
+            if operand["prefix"] == "x":
+                shape = "i64"
+                constraint = "r"
+            elif operand["prefix"] == "s":
+                shape = "float"
+                constraint = "w"
+            elif operand["prefix"] == "d":
+                shape = "double"
+                constraint = "w"
+            elif operand["prefix"] == "v":
+                constraint = "w"
+                if operand["shape"] == "b":
+                    shape = "<16 x i8>"
+                elif operand["shape"] == "h":
+                    shape = "<8 x i16>"
+                elif operand["shape"] == "s":
+                    shape = "<4 x float>"
+                elif operand["shape"] == "d":
+                    shape = "<2 x double>"
+                else:
+                    return None
+            else:
+                return None
+            if operand["class"] == "register_shift":
+                shift = ", {}".format(operand["shift_op"])
+                if operand["shift"] is not None:
+                    shift += " {}".format(operand["shift"])
+        elif operand["class"] == "immediate" or operand["class"] == "immediate_shift":
+            shape = "i32"
+            # Different instructions have different ranges for literaly,
+            # so need to pick something "reasonable" for each.
+            if name in [
+                "cmeq",
+                "cmge",
+                "cmgt",
+                "cmle",
+                "cmlt",
+                "fcmeq",
+                "fcmge",
+                "fcmgt",
+                "fcmle",
+                "fcmlt",
+                "fcmp",
+            ]:
+                constraint = "0"
+            elif name in ["and", "ands", "eor", "eors", "orr", "orrs"]:
+                constraint = "255"
+            elif name in ["bfi", "extr", "sbfiz", "sbfx", "shl", "sshr", "ubfiz", "ubfx", "ushr"]:
+                constraint = "7"
+            else:
+                constraint = "42"
+            if operand["class"] == "immediate_shift":
+                shift = ", {}".format(operand["shift_op"])
+                if operand["shift"] is not None:
+                    shift += " {}".format(operand["shift"])
+        else:
+            return None
+        asmbench_inst += "{}{{{}:{}:{}}}{}".format(separator, direction, shape, constraint, shift)
+        direction = "src"
+        separator = ", "
+    return asmbench_inst
+
+
+def bench_instruction(name, operands):
+    # Converts an OSACA model instruction to an asmbench one and benchmarks it.
+    # Returned tuple may contain a `None` in case something went wrong.
+    asmbench_inst = build_bench_instruction(name, operands)
+    if asmbench_inst is None:
+        return (None, None)
+    return bench.bench_instructions([op.Instruction.from_string(asmbench_inst)])
+
+
+def round_cycles(value):
+    if value < 0.9:
+        # Frequently found, so we might want to include them.
+        # Measurements over-estimate a lot here, hence the high bound.
+        return 0.5
+    else:
+        # Measurements usually over-estimate, so usually round down,
+        # but still allow slightly smaller values.
+        return float(math.floor(value + 0.1))
+
+
+def operand_parse(op, state):
+    # Parses an operand from an PMEvo instruction and emits an OSACA model one.
+    # State object is used to keep track of types for future operands, e.g. literals.
+    # Future invocations may also modify previously returned objects.
+    parameter = {}
+
+    if op.startswith("_((REG:"):
+        parts = op.split(".")
+        register = parts[0][7:-2]
+        read_write, register_type, bits = register.split(":")
+
+        parameter["class"] = "register"
+        if register_type == "G":
+            if bits == "32":
+                parameter["prefix"] = "r"
+            elif bits == "64":
+                parameter["prefix"] = "x"
+            else:
+                raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
+        elif register_type == "F":
+            if bits == "32":
+                parameter["prefix"] = "s"
+                state["type"] = "float"
+            elif bits == "64":
+                parameter["prefix"] = "d"
+                state["type"] = "double"
+            elif bits == "128":
+                parameter["prefix"] = "q"
+            elif bits == "VEC":
+                vec_shape = parts[1]
+                parameter["prefix"] = "v"
+                if vec_shape == "16b":
+                    parameter["shape"] = "b"
+                elif vec_shape == "8h":
+                    parameter["shape"] = "h"
+                elif vec_shape == "4s":
+                    parameter["shape"] = "s"
+                    state["type"] = "float"
+                elif vec_shape == "2d":
+                    parameter["shape"] = "d"
+                    state["type"] = "double"
+                else:
+                    raise ValueError("Invalid vector shape {}".format(vec_shape))
+            else:
+                raise ValueError("Invalid register bits for {} {}".format(register_type, bits))
+        else:
+            raise ValueError("Unknown register type {}".format(register_type))
+    elif op.startswith("_[((MEM:"):
+        bits = op[8:-2].split(":")[0]
+        if bits == "64":
+            state["memory_base"] = "x"
+        else:
+            raise ValueError("Invalid register bits for MEM {}".format(bits))
+        return None
+    elif op.startswith("_((MIMM:"):
+        bits = op[8:-3].split(":")[0]
+        if bits == "16":
+            parameter["class"] = "memory"
+            parameter["base"] = state["memory_base"]
+            parameter["offset"] = "imd"
+            parameter["index"] = "*"
+            parameter["scale"] = "*"
+            parameter["post-indexed"] = False
+            parameter["pre-indexed"] = False
+        else:
+            raise ValueError("Invalid register bits for MEM {}".format(bits))
+    elif re.fullmatch("_#?-?(0x)?[0-9a-f]+", op):
+        parameter["class"] = "immediate"
+        parameter["imd"] = "int"
+    elif re.fullmatch("_#?-?[0-9]*\\.[0-9]*", op):
+        parameter["class"] = "immediate"
+        parameter["imd"] = state["type"]
+    elif re.fullmatch("_((sxt|uxt)[bhw]|lsl|lsr|asr|rol|ror)(_[0-9]+)?", op):
+        # split = op[1:].split('_')
+        # shift_op = split[0]
+        # shift = None
+        # if len(split) >= 2:
+        #     shift = split[1]
+        # state['previous']['class'] += '_shift'
+        # state['previous']['shift_op'] = shift_op
+        # if shift != None:
+        #     state['previous']['shift'] = shift
+        # return None
+        raise ValueError("Skipping instruction with shift operand: {}".format(op))
+    else:
+        raise ValueError("Unknown operand {}".format(op))
+
+    state["previous"] = parameter
+    return parameter
+
+
+def port_convert(ports):
+    # Try to merge repeated entries together and emit in OSACA's format.
+    # FIXME: This does not handle having more than 10 ports.
+    pressures = []
+    previous = None
+    cycles = 0
+
+    for entry in ports:
+        possible_ports = "".join(entry)
+
+        if possible_ports != previous:
+            if previous is not None:
+                pressures.append([cycles, previous])
+            previous = possible_ports
+            cycles = 0
+
+        cycles += 1
+
+    if previous is not None:
+        pressures.append([cycles, previous])
+
+    return pressures
+
+
+def throughput_guess(ports):
+    # Minimum amount of possible ports per cycle should determine throughput
+    # to some degree of accuracy. (THIS IS *NOT* ALWAYS TRUE!)
+    bottleneck_ports = min(map(lambda it: len(it), ports))
+    return float(len(ports)) / bottleneck_ports
+
+
+def latency_guess(ports):
+    # Each entry in the ports array equates to one cycle on any of the ports.
+    # So this is about as good as it is going to get.
+    return float(len(ports))
+
+
+def extract_model(mapping, arch, template_model, asmbench):
+    try:
+        isa = MachineModel.get_isa_for_arch(arch)
+    except ValueError:
+        print("Skipping...", file=sys.stderr)
+        return None
+    if template_model is None:
+        mm = MachineModel(isa=isa)
+    else:
+        mm = template_model
+
+    for port in mapping["arch"]["ports"]:
+        mm.add_port(port)
+
+    for insn in mapping["arch"]["insns"]:
+        try:
+            ports = mapping["assignment"][insn]
+
+            # Parse instruction
+            insn_split = insn.split("_")
+            name = insn_split[1]
+            insn_parts = list(("_" + "_".join(insn_split[2:])).split(","))
+            operands = []
+            state = {}
+            for operand in insn_parts:
+                parsed = operand_parse(operand, state)
+                if parsed is not None:
+                    operands.append(parsed)
+
+            # Port pressures from mapping
+            port_pressure = port_convert(ports)
+
+            # Initial guessed throughput and latency
+            throughput = throughput_guess(ports)
+            latency = latency_guess(ports)
+
+            # Benchmark with asmbench
+            # print(build_bench_instruction(name, operands))
+            if asmbench:
+                bench_latency, bench_throughput = bench_instruction(name, operands)
+                if bench_throughput is not None:
+                    throughput = round_cycles(bench_throughput)
+                else:
+                    print("Failed to measure throughput for instruction {}.".format(insn))
+                if bench_latency is not None:
+                    latency = round_cycles(bench_latency)
+                else:
+                    print("Failed to measure latency for instruction {}.".format(insn))
+
+            # No u-ops data available
+            uops = None
+
+            # Insert instruction if not already found (can happen with template)
+            if mm.get_instruction(name, operands) is None:
+                mm.set_instruction(name, operands, latency, port_pressure, throughput, uops)
+        except ValueError as e:
+            print("Failed to parse instruction {}: {}.".format(insn, e))
+
+    return mm
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("json", help="path of mapping.json")
+    parser.add_argument("yaml", help="path of template.yml", nargs="?")
+    parser.add_argument(
+        "--asmbench", help="Benchmark latency and throughput using asmbench.", action="store_true"
+    )
+    args = parser.parse_args()
+
+    json_file = open(args.json, "r")
+    mapping = json.load(json_file)
+    arch = mapping["arch"]["name"].lower()
+    json_file.close()
+
+    template_model = None
+    if args.yaml is not None:
+        template_model = MachineModel(path_to_yaml=args.yaml)
+
+    if args.asmbench:
+        bench.setup_llvm()
+
+    model = extract_model(mapping, arch, template_model, args.asmbench)
+
+    with open("{}.yml".format(arch.lower()), "w") as f:
+        f.write(model.dump())
+
+
+if __name__ == "__main__":
+    main()