From 7cd380e7b8c21b56a9610a81c51d93b3593de176 Mon Sep 17 00:00:00 2001
From: Stephen Nicholas Swatman <stephen@v25.nl>
Date: Sat, 12 Aug 2023 19:40:44 +0200
Subject: [PATCH 1/7] Add IMUL instruction for Zen 3 architectures

This commit adds data on the IMUL (r, r) instruction on the AMD Zen 3
microarchitecture.
---
 osaca/data/zen3.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/osaca/data/zen3.yml b/osaca/data/zen3.yml
index f3c2488..75e77a8 100644
--- a/osaca/data/zen3.yml
+++ b/osaca/data/zen3.yml
@@ -5515,3 +5515,13 @@ instruction_forms:
   latency: 3
   port_pressure: [[1, '7']]
   throughput: 1.0
+- name: imul # uops.info
+  operands:
+  - class: register
+    name: gpr
+  - class: register
+    name: gpr
+  latency: 3
+  port_pressure: [[1, '7']]
+  throughput: 1.0
+  uops: 1

From e1ce402133c5f5d1392b40e8df51a7a29c1aeccc Mon Sep 17 00:00:00 2001
From: Stephen Nicholas Swatman <stephen@v25.nl>
Date: Sat, 12 Aug 2023 19:42:11 +0200
Subject: [PATCH 2/7] Add support for structured YAML output

This commit adds a new `--yaml-out` flag to OSACA which allows the user
to dump the results of an analysis to a YAML file, allowing them to
analyse the file more easily. I have tried to make the output as
comprehensive as possible.
---
 osaca/frontend.py | 80 +++++++++++++++++++++++++++++++++++++++++++++++
 osaca/osaca.py    | 20 ++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/osaca/frontend.py b/osaca/frontend.py
index 19814c3..06b55e7 100755
--- a/osaca/frontend.py
+++ b/osaca/frontend.py
@@ -201,6 +201,78 @@ class Frontend(object):
             + self.loopcarried_dependencies(kernel_dg.get_loopcarried_dependencies())
         )
 
+    def full_analysis_dict(
+        self,
+        kernel,
+        kernel_dg: KernelDG,
+        arch_warning=False,
+        length_warning=False,
+        lcd_warning=False,
+    ):
+        warnings = []
+
+        if arch_warning:
+            warnings.append("ArchWarning")
+
+        if length_warning:
+            warnings.append("LengthWarning")
+
+        if lcd_warning:
+            warnings.append("LcdWarning")
+
+        if INSTR_FLAGS.TP_UNKWN in [flag for instr in kernel for flag in instr["flags"]]:
+            warnings.append("UnknownInstrWarning")
+
+        tp_sum = ArchSemantics.get_throughput_sum(kernel) or kernel[0]["port_pressure"]
+        cp_kernel = kernel_dg.get_critical_path()
+
+        dep_dict = kernel_dg.get_loopcarried_dependencies()
+        lcd_sum = 0.0
+        if dep_dict:
+            longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"])
+            lcd_sum = dep_dict[longest_lcd]["latency"]
+
+        return {
+            "Header": self._header_report_dict(),
+            "Warnings": warnings,
+            "Kernel": [
+                {
+                    "Line": re.sub(r"\s+", " ", x["line"].strip()),
+                    "Flags": list(x["flags"]),
+                    "Instruction": x["instruction"],
+                    "Label": x["label"],
+                    "Latency": x["latency"],
+                    "LatencyCP": x["latency_cp"],
+                    "LatencyLCD": x["latency_lcd"],
+                    "Throughput": float(x["throughput"]),
+                    "LatencyWithoutLoad": x["latency_wo_load"],
+                    "PortPressure": {
+                        self._machine_model.get_ports()[i]: v
+                        for i, v in enumerate(x["port_pressure"])
+                    },
+                    "PortUops": [
+                        {
+                            "Ports": list(y[1]),
+                            "Cycles": y[0],
+                        }
+                        for y in x["port_uops"]
+                    ],
+                }
+                for x in kernel
+            ],
+            "Summary": {
+                "PortPressure": {
+                    self._machine_model.get_ports()[i]: v for i, v in enumerate(tp_sum)
+                },
+                "CriticalPath": sum([x["latency_cp"] for x in cp_kernel]),
+                "LCD": lcd_sum,
+            },
+            "Target": {
+                "Name": self._arch.upper(),
+                "Ports": list(self._machine_model.get_ports()),
+            },
+        }
+
     def combined_view(
         self,
         kernel,
@@ -449,6 +521,14 @@ class Frontend(object):
         )
         return header + "\n"
 
+    def _header_report_dict(self):
+        """Return header information in a dictionary format"""
+        return {
+            "Version": _get_version("__init__.py"),
+            "FileName": self._filename,
+            "Timestamp": dt.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
+        }
+
     def _symbol_map(self):
         """Prints instruction flag map."""
         symbol_dict = {
diff --git a/osaca/osaca.py b/osaca/osaca.py
index f5f9bcf..a6e57e9 100755
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -7,6 +7,8 @@ import re
 import sys
 from functools import lru_cache
 
+import ruamel.yaml
+
 from osaca.db_interface import import_benchmark_output, sanity_check
 from osaca.frontend import Frontend
 from osaca.parser import BaseParser, ParserAArch64, ParserX86ATT
@@ -188,6 +190,13 @@ def create_parser(parser=None):
         type=argparse.FileType("w"),
         help="Write analysis to this file (default to stdout).",
     )
+    parser.add_argument(
+        "--yaml-out",
+        default=None,
+        dest="yaml_out",
+        type=argparse.FileType("w"),
+        help="Write YAML analysis to this file",
+    )
     parser.add_argument(
         "file",
         type=argparse.FileType("r"),
@@ -360,6 +369,17 @@ def inspect(args, output_file=sys.stdout):
         ),
         file=output_file,
     )
+    if args.yaml_out is not None:
+        ruamel.yaml.dump(
+            frontend.full_analysis_dict(
+                kernel,
+                kernel_graph,
+                arch_warning=print_arch_warning,
+                length_warning=print_length_warning,
+                lcd_warning=kernel_graph.timed_out,
+            ),
+            args.yaml_out,
+        )
 
 
 def run(args, output_file=sys.stdout):

From ab10febe74151fd431bac7fdae551c17d948f290 Mon Sep 17 00:00:00 2001
From: JanLJL <jan.laukemann@fau.de>
Date: Tue, 15 Aug 2023 14:01:11 +0200
Subject: [PATCH 3/7] enhanced YAML output to include all kernel objects and no
 ruamel.yaml-specific data types

---
 osaca/frontend.py         | 35 +++++++++++++++++++++++++++++------
 osaca/osaca.py            |  2 +-
 osaca/parser/attr_dict.py | 24 ++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/osaca/frontend.py b/osaca/frontend.py
index 06b55e7..9a5521d 100755
--- a/osaca/frontend.py
+++ b/osaca/frontend.py
@@ -8,6 +8,7 @@ import re
 from datetime import datetime as dt
 
 from osaca.semantics import INSTR_FLAGS, ArchSemantics, KernelDG, MachineModel
+from osaca.parser import AttrDict
 
 
 def _get_version(*file_paths):
@@ -209,6 +210,23 @@ class Frontend(object):
         length_warning=False,
         lcd_warning=False,
     ):
+        """
+        Create a dictionary of the full analysis for machine-readable output.
+
+        :param kernel: kernel to report on
+        :type kernel: list
+        :param kernel_dg: directed graph containing CP and LCD
+        :type kernel_dg: :class:`~osaca.semantics.KernelDG`
+        :param arch_warning: flag for additional user warning to specify micro-arch
+        :type arch_warning: boolean, optional
+        :param length_warning: flag for additional user warning to specify kernel length with
+                                     --lines
+        :type length_warning: boolean, optional
+        :param lcd_warning: flag for additional user warning due to LCD analysis timed out
+        :type lcd_warning: boolean, optional
+
+        :returns: dict -- a dict of the analysis
+        """
         warnings = []
 
         if arch_warning:
@@ -218,7 +236,7 @@ class Frontend(object):
             warnings.append("LengthWarning")
 
         if lcd_warning:
-            warnings.append("LcdWarning")
+            warnings.append("LCDWarning")
 
         if INSTR_FLAGS.TP_UNKWN in [flag for instr in kernel for flag in instr["flags"]]:
             warnings.append("UnknownInstrWarning")
@@ -231,21 +249,24 @@ class Frontend(object):
         if dep_dict:
             longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]["latency"])
             lcd_sum = dep_dict[longest_lcd]["latency"]
-
         return {
             "Header": self._header_report_dict(),
             "Warnings": warnings,
             "Kernel": [
                 {
                     "Line": re.sub(r"\s+", " ", x["line"].strip()),
+                    "LineNumber": x["line_number"],
                     "Flags": list(x["flags"]),
                     "Instruction": x["instruction"],
+                    "Operands": AttrDict.get_dict(x["operands"]),
+                    "SemanticOperands": AttrDict.get_dict(x["semantic_operands"]),
                     "Label": x["label"],
-                    "Latency": x["latency"],
-                    "LatencyCP": x["latency_cp"],
-                    "LatencyLCD": x["latency_lcd"],
+                    "Directive": x["directive"],
+                    "Latency": float(x["latency"]),
+                    "LatencyCP": float(x["latency_cp"]),
+                    "LatencyLCD": float(x["latency_lcd"]),
                     "Throughput": float(x["throughput"]),
-                    "LatencyWithoutLoad": x["latency_wo_load"],
+                    "LatencyWithoutLoad": float(x["latency_wo_load"]),
                     "PortPressure": {
                         self._machine_model.get_ports()[i]: v
                         for i, v in enumerate(x["port_pressure"])
@@ -257,6 +278,7 @@ class Frontend(object):
                         }
                         for y in x["port_uops"]
                     ],
+                    "Comment": x["comment"],
                 }
                 for x in kernel
             ],
@@ -526,6 +548,7 @@ class Frontend(object):
         return {
             "Version": _get_version("__init__.py"),
             "FileName": self._filename,
+            "Architecture": self._arch,
             "Timestamp": dt.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
         }
 
diff --git a/osaca/osaca.py b/osaca/osaca.py
index a6e57e9..51d46b6 100755
--- a/osaca/osaca.py
+++ b/osaca/osaca.py
@@ -195,7 +195,7 @@ def create_parser(parser=None):
         default=None,
         dest="yaml_out",
         type=argparse.FileType("w"),
-        help="Write YAML analysis to this file",
+        help="Write analysis as YAML representation to this file",
     )
     parser.add_argument(
         "file",
diff --git a/osaca/parser/attr_dict.py b/osaca/parser/attr_dict.py
index 6348599..35f6c40 100755
--- a/osaca/parser/attr_dict.py
+++ b/osaca/parser/attr_dict.py
@@ -27,3 +27,27 @@ class AttrDict(dict):
                     dictionary[key] = [AttrDict.convert_dict(x) for x in entry]
             return AttrDict(dictionary)
         return dictionary
+
+    @staticmethod
+    def get_dict(attrdict):
+        """
+        Convert given `AttrDict` to a standard dictionary.
+
+        :param attrdict: `AttrDict` to be converted
+        :type attrdict: `AttrDict`
+        :returns: `dict` representation of ``AttrDict``
+        """
+        if isinstance(attrdict, type(list())):
+            return [AttrDict.get_dict(x) for x in attrdict]
+        if isinstance(attrdict, type(AttrDict())):
+            newdict = {}
+            for key in list(attrdict.keys()):
+                entry = attrdict[key]
+                if isinstance(entry, type(dict())) or isinstance(entry, type(AttrDict())):
+                    newdict[key] = AttrDict.get_dict(attrdict[key])
+                elif isinstance(entry, type(list())):
+                    newdict[key] = [AttrDict.get_dict(x) for x in entry]
+                else:
+                    newdict[key] = entry
+            return newdict
+        return attrdict

From f856c578bf8d9fdd79e59dce40b9f95ae699334c Mon Sep 17 00:00:00 2001
From: JanLJL <jan.laukemann@fau.de>
Date: Tue, 15 Aug 2023 14:01:15 +0200
Subject: [PATCH 4/7] added tests for dict output of analysis

---
 tests/test_frontend.py | 70 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/tests/test_frontend.py b/tests/test_frontend.py
index 30c7a46..58780da 100755
--- a/tests/test_frontend.py
+++ b/tests/test_frontend.py
@@ -8,7 +8,7 @@ import unittest
 
 from osaca.frontend import Frontend
 from osaca.parser import ParserAArch64, ParserX86ATT
-from osaca.semantics import ArchSemantics, KernelDG, MachineModel
+from osaca.semantics import ArchSemantics, KernelDG, MachineModel, reduce_to_section
 
 
 class TestFrontend(unittest.TestCase):
@@ -81,6 +81,74 @@ class TestFrontend(unittest.TestCase):
         fe.full_analysis(self.kernel_AArch64, dg, verbose=True)
         # TODO compare output with checked string
 
+    def test_dict_output_x86(self):
+        dg = KernelDG(self.kernel_x86, self.parser_x86, self.machine_model_csx, self.semantics_csx)
+        fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "csx.yml"))
+        analysis_dict = fe.full_analysis_dict(self.kernel_x86, dg)
+        self.assertEqual(len(self.kernel_x86), len(analysis_dict["Kernel"]))
+        self.assertEqual("csx", analysis_dict["Header"]["Architecture"])
+        self.assertEqual(len(analysis_dict["Warnings"]), 0)
+        for i, line in enumerate(self.kernel_x86):
+            self.assertEqual(line["throughput"], analysis_dict["Kernel"][i]["Throughput"])
+            self.assertEqual(line["latency"], analysis_dict["Kernel"][i]["Latency"])
+            self.assertEqual(
+                line["latency_wo_load"], analysis_dict["Kernel"][i]["LatencyWithoutLoad"]
+            )
+            self.assertEqual(line["latency_cp"], analysis_dict["Kernel"][i]["LatencyCP"])
+            self.assertEqual(line["instruction"], analysis_dict["Kernel"][i]["Instruction"])
+            self.assertEqual(len(line["operands"]), len(analysis_dict["Kernel"][i]["Operands"]))
+            self.assertEqual(
+                len(line["semantic_operands"]["source"]),
+                len(analysis_dict["Kernel"][i]["SemanticOperands"]["source"]),
+            )
+            self.assertEqual(
+                len(line["semantic_operands"]["destination"]),
+                len(analysis_dict["Kernel"][i]["SemanticOperands"]["destination"]),
+            )
+            self.assertEqual(
+                len(line["semantic_operands"]["src_dst"]),
+                len(analysis_dict["Kernel"][i]["SemanticOperands"]["src_dst"]),
+            )
+            self.assertEqual(line["flags"], analysis_dict["Kernel"][i]["Flags"])
+            self.assertEqual(line["line_number"], analysis_dict["Kernel"][i]["LineNumber"])
+
+    def test_dict_output_AArch64(self):
+        reduced_kernel = reduce_to_section(self.kernel_AArch64, self.semantics_tx2._isa)
+        dg = KernelDG(
+            reduced_kernel,
+            self.parser_AArch64,
+            self.machine_model_tx2,
+            self.semantics_tx2,
+        )
+        fe = Frontend(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, "tx2.yml"))
+        analysis_dict = fe.full_analysis_dict(reduced_kernel, dg)
+        self.assertEqual(len(reduced_kernel), len(analysis_dict["Kernel"]))
+        self.assertEqual("tx2", analysis_dict["Header"]["Architecture"])
+        self.assertEqual(len(analysis_dict["Warnings"]), 0)
+        for i, line in enumerate(reduced_kernel):
+            self.assertEqual(line["throughput"], analysis_dict["Kernel"][i]["Throughput"])
+            self.assertEqual(line["latency"], analysis_dict["Kernel"][i]["Latency"])
+            self.assertEqual(
+                line["latency_wo_load"], analysis_dict["Kernel"][i]["LatencyWithoutLoad"]
+            )
+            self.assertEqual(line["latency_cp"], analysis_dict["Kernel"][i]["LatencyCP"])
+            self.assertEqual(line["instruction"], analysis_dict["Kernel"][i]["Instruction"])
+            self.assertEqual(len(line["operands"]), len(analysis_dict["Kernel"][i]["Operands"]))
+            self.assertEqual(
+                len(line["semantic_operands"]["source"]),
+                len(analysis_dict["Kernel"][i]["SemanticOperands"]["source"]),
+            )
+            self.assertEqual(
+                len(line["semantic_operands"]["destination"]),
+                len(analysis_dict["Kernel"][i]["SemanticOperands"]["destination"]),
+            )
+            self.assertEqual(
+                len(line["semantic_operands"]["src_dst"]),
+                len(analysis_dict["Kernel"][i]["SemanticOperands"]["src_dst"]),
+            )
+            self.assertEqual(line["flags"], analysis_dict["Kernel"][i]["Flags"])
+            self.assertEqual(line["line_number"], analysis_dict["Kernel"][i]["LineNumber"])
+
     ##################
     # Helper functions
     ##################

From c2ee27660955c5be6b19fdd7012082b79ff2e2e3 Mon Sep 17 00:00:00 2001
From: Jan <20126033+JanLJL@users.noreply.github.com>
Date: Tue, 15 Aug 2023 14:33:22 +0200
Subject: [PATCH 5/7] Added --yaml-out flag

---
 README.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index eb41174..d0ff0e6 100644
--- a/README.rst
+++ b/README.rst
@@ -91,7 +91,7 @@ The usage of OSACA can be listed as:
     	  [--ignore-unknown] [--lcd-timeout SECONDS]
     	  [--db-check] [--import MICROBENCH] [--insert-marker]
           [--export-graph GRAPHNAME] [--consider-flag-deps]
-          [--out OUT] [--verbose]
+          [--out OUT] [--yaml-out YAML_OUT] [--verbose]
           FILEPATH
 
 -h, --help
@@ -133,6 +133,8 @@ The usage of OSACA can be listed as:
   Increases verbosity level
 -o OUT, --out OUT
   Write analysis to this file (default to stdout)
+--yaml-out YAML_OUT
+  Write analysis as YAML representation to this file
 
 The **FILEPATH** describes the filepath to the file to work with and is always necessary, use "-" to read from stdin.
 

From 6d275a1207d2b3873da0a20dedeebc466c43ba30 Mon Sep 17 00:00:00 2001
From: Jan <20126033+JanLJL@users.noreply.github.com>
Date: Tue, 15 Aug 2023 14:55:10 +0200
Subject: [PATCH 6/7] Update modules used in GH actions

---
 .github/workflows/test-n-publish.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml
index 9eaaf3c..77783e6 100644
--- a/.github/workflows/test-n-publish.yml
+++ b/.github/workflows/test-n-publish.yml
@@ -7,10 +7,10 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9, "3.10"]
+        python-version: [3.8, 3.9, "3.10", "3.11"]
     steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v2
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v4
       name: Set up Python ${{ matrix.python-version }}
       with:
         python-version: ${{ matrix.python-version }}
@@ -27,7 +27,7 @@ jobs:
     - name: Test
       run: |
         coverage run -p tests/all_tests.py
-    - uses: codecov/codecov-action@v1
+    - uses: codecov/codecov-action@v3
     - name: Build package
       run: |
         python setup.py build sdist bdist_wheel

From 8cc408a307da48b8c8df368304c7c966ec33922e Mon Sep 17 00:00:00 2001
From: JanLJL <jan.laukemann@fau.de>
Date: Wed, 16 Aug 2023 00:58:23 +0200
Subject: [PATCH 7/7] version bump

---
 osaca/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osaca/__init__.py b/osaca/__init__.py
index e3b8535..f0021ac 100644
--- a/osaca/__init__.py
+++ b/osaca/__init__.py
@@ -1,6 +1,6 @@
 """Open Source Architecture Code Analyzer"""
 name = "osaca"
-__version__ = "0.5.1"
+__version__ = "0.5.2"
 
 # To trigger travis deployment to pypi, do the following:
 # 1. Increment __version___