From 625d814dcea50faea44df946404f9cdc5eafd8e2 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 26 Sep 2019 21:39:56 +0200 Subject: [PATCH] new dynamic tp and lt values for LD instructions --- osaca/api/kerncraft_interface.py | 20 +- osaca/data/csx.yml | 270 +++++++----------------- osaca/data/vulcan.yml | 172 ++++++---------- osaca/data/zen1.yml | 297 ++++----------------------- osaca/frontend.py | 6 +- osaca/semantics/hw_model.py | 19 +- osaca/semantics/kernel_dg.py | 58 ++++-- osaca/semantics/semanticsAppender.py | 112 ++++++++-- tests/test_kerncraftAPI.py | 2 +- 9 files changed, 346 insertions(+), 610 deletions(-) diff --git a/osaca/api/kerncraft_interface.py b/osaca/api/kerncraft_interface.py index d21a32d..686e727 100755 --- a/osaca/api/kerncraft_interface.py +++ b/osaca/api/kerncraft_interface.py @@ -48,10 +48,18 @@ class KerncraftAPI(object): return max(self.semantics.get_throughput_sum(kernel)) def get_latency(self, kernel): + return (self.get_lcd(kernel), self.get_cp(kernel)) + + def get_cp(self, kernel): kernel_graph = KernelDG(kernel, self.parser, self.machine_model) - return sum( - [ - x['latency'] if x['latency'] is not None else 0 - for x in kernel_graph.get_critical_path() - ] - ) + kernel_cp = kernel_graph.get_critical_path() + return sum([x['latency_cp'] for x in kernel_cp]) + + def get_lcd(self, kernel): + kernel_graph = KernelDG(kernel, self.parser, self.machine_model) + lcd_dict = kernel_graph.get_loopcarried_dependencies() + lcd = 0.0 + for dep in lcd_dict: + lcd_tmp = sum([x['latency_lcd'] for x in lcd_dict[dep]['dependencies']]) + lcd = lcd_tmp if lcd_tmp > lcd else lcd + return lcd diff --git a/osaca/data/csx.yml b/osaca/data/csx.yml index 0a916c3..6546c47 100644 --- a/osaca/data/csx.yml +++ b/osaca/data/csx.yml @@ -7,6 +7,15 @@ retired_uOps_per_cycle: 4 scheduler_size: 97 hidden_loads: false load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0, zmm: 4.0} +load_throughput: + - {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [0,0,0,0.5,0.5,0.5,0.5,0,0,0,0]} + - {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [0,0,0,0.5,0.5,0.5,0.5,0,0,0,0]} + - {base: gpr, index: ~, offset: imd, scale: 1, port_pressure: [0,0,0,0.5,0.5,0.5,0.5,0,0,0,0]} + - {base: gpr, index: ~, offset: imd, scale: 8, port_pressure: [0,0,0,0.5,0.5,0.5,0.5,0,0,0,0]} + - {base: gpr, index: gpr, offset: ~, scale: 1, port_pressure: [0,0,0,0.5,0.5,0.5,0.5,0,0,0,0]} + - {base: gpr, index: gpr, offset: ~, scale: 8, port_pressure: [0,0,0,0.5,0.5,0.5,0.5,0,0,0,0]} + - {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [0,0,0,0.5,0.5,0.5,0.5,0,0,0,0]} + - {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [0,0,0,0.5,0.5,0.5,0.5,0,0,0,0]} ports: ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"] port_model_scheme: | ┌------------------------------------------------------------------------┐ @@ -99,18 +108,6 @@ instruction_forms: throughput: 0.25 latency: ~ # 0 0DV 1 2 2D 3 3D 4 5 6 7 port_pressure: [0.25, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.25, 0.0] - - name: cmpq - operands: - - class: "register" - name: "gpr" - - class: "memory" - base: "gpr" - offset: "imd" - index: ~ - scale: 1 - throughput: 0.5 - latency: ~ # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.25, 0.0, 0.25, 0.5, 0.5, 0.5, 0.5, 0.0, 0.25, 0.25, 0.0] - name: incq operands: - class: "register" @@ -203,60 +200,15 @@ instruction_forms: port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vaddpd operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 0.5 - latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vaddpd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 0.5 - latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vaddpd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 + name: "xmm" - class: "register" name: "xmm" - class: "register" name: "xmm" throughput: 0.5 latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vaddsd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - - class: "register" - name: "xmm" - - class: "register" - name: "xmm" - throughput: 0.5 - latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vaddsd operands: - class: "register" @@ -303,88 +255,70 @@ instruction_forms: port_pressure: [1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vfmadd213pd operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 + - class: "register" + name: "ymm" - class: "register" name: "ymm" - class: "register" name: "ymm" throughput: 0.5 latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + - name: vfmadd132pd + operands: + - class: "register" + name: "ymm" + - class: "register" + name: "ymm" + - class: "register" + name: "ymm" + throughput: 0.5 + latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 + port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + - name: vfmadd231pd + operands: + - class: "register" + name: "ymm" + - class: "register" + name: "ymm" + - class: "register" + name: "ymm" + throughput: 0.5 + latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 + port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + - name: vfmadd132pd + operands: + - class: "register" + name: "xmm" + - class: "register" + name: "xmm" + - class: "register" + name: "xmm" + throughput: 0.5 + latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 + port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vfmadd213pd operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - class: "register" - name: "ymm" + name: "xmm" - class: "register" - name: "ymm" + name: "xmm" + - class: "register" + name: "xmm" throughput: 0.5 latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vfmadd231pd operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - class: "register" - name: "ymm" + name: "xmm" - class: "register" - name: "ymm" + name: "xmm" + - class: "register" + name: "xmm" throughput: 0.5 latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vfmadd231pd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 0.5 - latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vfmadd132pd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 0.5 - latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vfmadd132pd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 0.5 - latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmulsd operands: - class: "register" @@ -418,46 +352,15 @@ instruction_forms: throughput: 0.5 latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 port_pressure: [0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - - name: vmulpd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 0.5 - latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vmulpd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 0.5 - latency: 4.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - name: vmovapd operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - class: "register" name: "xmm" - throughput: 0.5 + - class: "register" + name: "xmm" + throughput: 0.0 latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmovapd operands: - class: "register" @@ -472,28 +375,13 @@ instruction_forms: port_pressure: [0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 1.0, 0.0, 0.0, 0.0] - name: vmovapd operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - class: "register" name: "ymm" - throughput: 0.5 - latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vmovapd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - class: "register" name: "ymm" - throughput: 0.5 + throughput: 0.0 latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmovapd operands: - class: "register" @@ -544,40 +432,22 @@ instruction_forms: port_pressure: [0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 1.0, 0.0, 0.0, 0.0] - name: vmovupd operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - class: "register" name: "ymm" - throughput: 0.5 - latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] - - name: vmovupd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - class: "register" name: "ymm" - throughput: 0.5 + throughput: 0.0 latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmovsd operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - class: "register" name: "xmm" - throughput: 0.5 - latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 - port_pressure: [0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0] + - class: "register" + name: "xmm" + throughput: 0.0 + latency: 0.0 # 0 0DV 1 2 2D 3 3D 4 5 6 7 + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmovsd operands: - class: "register" diff --git a/osaca/data/vulcan.yml b/osaca/data/vulcan.yml index 259a4eb..d10234b 100644 --- a/osaca/data/vulcan.yml +++ b/osaca/data/vulcan.yml @@ -7,6 +7,39 @@ retired_uOps_per_cycle: 4 scheduler_size: 60 hidden_loads: false load_latency: {w: 4.0, x: 4.0, b: 4.0, h: 4.0, s: 4.0, d: 4.0, q: 4.0, v: 4.0} +load_throughput: + - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: ~, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: ~, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: ~, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 1, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 1, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 8, pre-indexed: false, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 8, pre-indexed: false, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: true, port_pressure: [0,0,0,0,0.5,0.5,0]} + - {base: x, index: x, offset: imd, scale: 8, pre-indexed: true, post-indexed: false, port_pressure: [0,0,0,0,0.5,0.5,0]} ports: ["0", "0DV", "1", "1DV", "2", "3", "4", "5"] port_model_scheme: | ┌-----------------------------------------------------------┐ @@ -332,6 +365,22 @@ instruction_forms: throughput: 1.0 latency: ~ # 0 0DV 1 1DV 2 3 4 5 port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0] + - name: "ldp" + operands: + - class: "register" + prefix: "q" + - class: "register" + prefix: "q" + - class: "memory" + base: "x" + offset: ~ + index: ~ + scale: 1 + pre-indexed: false + post-indexed: false + throughput: 1.0 + latency: ~ # 0 0DV 1 1DV 2 3 4 5 + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0] - name: "ldp" operands: - class: "register" @@ -368,114 +417,29 @@ instruction_forms: operands: - class: "register" prefix: "x" - - class: "memory" - base: "x" - offset: ~ - index: "x" - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 0.5 - latency: 4.0 # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] + - class: "register" + prefix: "x" + throughput: 0.0 + latency: 0.0 # 0 0DV 1 1DV 2 3 4 5 + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: "ldr" operands: - class: "register" prefix: "q" - - class: "memory" - base: "x" - offset: ~ - index: "x" - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 0.5 + - class: "register" + prefix: "q" + throughput: 0.0 latency: 0.0 # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: "ldr" operands: - class: "register" prefix: "d" - - class: "memory" - base: "x" - offset: "imd" - index: ~ - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 0.5 - latency: 0.0 # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] - - name: "ldr" - operands: - class: "register" prefix: "d" - - class: "memory" - base: "x" - offset: ~ - index: "x" - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 0.5 + throughput: 0.0 latency: 0.0 # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] - - name: "ldr" - operands: - - class: "register" - prefix: "d" - - class: "memory" - base: "x" - offset: ~ - index: ~ - scale: 1 - pre-indexed: false - post-indexed: true - throughput: 0.5 - latency: 0.0 # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] - - name: "ldr" - operands: - - class: "register" - prefix: "d" - - class: "memory" - base: "x" - offset: ~ - index: "x" - scale: 8 - pre-indexed: false - post-indexed: false - throughput: 0.5 - latency: 0.0 # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] - - name: "ldr" - operands: - - class: "register" - prefix: "d" - - class: "memory" - base: "x" - offset: ~ - index: ~ - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 0.5 - latency: 0.0 # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] - - name: "ldr" - operands: - - class: "register" - prefix: "x" - - class: "memory" - base: "x" - offset: ~ - index: ~ - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 0.5 - latency: 0.0 # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: "mov" operands: - class: "register" @@ -576,22 +540,6 @@ instruction_forms: throughput: 2.0 latency: ~ # 0 0DV 1 1DV 2 3 4 5 port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0] - - name: "ldp" - operands: - - class: "register" - prefix: "q" - - class: "register" - prefix: "q" - - class: "memory" - base: "x" - offset: ~ - index: ~ - scale: 1 - pre-indexed: false - post-indexed: false - throughput: 1.0 - latency: ~ # 0 0DV 1 1DV 2 3 4 5 - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0] - name: "stp" operands: - class: "register" diff --git a/osaca/data/zen1.yml b/osaca/data/zen1.yml index 199620c..7e1482f 100644 --- a/osaca/data/zen1.yml +++ b/osaca/data/zen1.yml @@ -2,7 +2,17 @@ osaca_version: 0.3.0 micro_architecture: "AMD Zen (family 17h)" arch_code: "ZEN1" isa: "x86" -load_latency: {gpr: 4.0, xmm: 4.0, ymm: '5.0'} +load_latency: {gpr: 4.0, xmm: 4.0, ymm: 4.0} +# TODO ADD YMM latencies [0,0,0,0,0,0,0,0,0,1,1,1,1,0] +load_throughput: + - {base: gpr, index: ~, offset: ~, scale: 1, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} + - {base: gpr, index: ~, offset: ~, scale: 8, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} + - {base: gpr, index: ~, offset: imd, scale: 1, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} + - {base: gpr, index: ~, offset: imd, scale: 8, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} + - {base: gpr, index: gpr, offset: ~, scale: 1, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} + - {base: gpr, index: gpr, offset: ~, scale: 8, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} + - {base: gpr, index: gpr, offset: imd, scale: 1, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} + - {base: gpr, index: gpr, offset: imd, scale: 8, port_pressure: [0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,0.5,0]} hidden_loads: false ports: ["0", "1", "2", "3", "3DV", "4", "5", "6", "7", "8", "9", "8D", "9D", "ST"] port_model_scheme: | @@ -80,18 +90,6 @@ instruction_forms: throughput: 0.25 latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0] - - name: cmpq - operands: - - class: "register" - name: "gpr" - - class: "memory" - base: "gpr" - offset: "imd" - index: ~ - scale: 1 - throughput: 0.25 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.25, 0.25, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0] - name: incq operands: - class: "register" @@ -131,16 +129,13 @@ instruction_forms: port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0] - name: movl operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: ~ - scale: 1 - class: "register" name: "gpr" - throughput: 0.5 + - class: "register" + name: "gpr" + throughput: 0.0 latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8LD 9LD ST - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: mulsd operands: - class: "register" @@ -215,34 +210,6 @@ instruction_forms: throughput: 1.0 latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST port_pressure: [0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - - name: vaddpd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 1.0 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] - - name: vaddpd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 1.0 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] - name: vaddsd operands: - class: "register" @@ -254,62 +221,6 @@ instruction_forms: throughput: 0.5 latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - - name: vaddsd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: ~ - scale: 1 - - class: "register" - name: "xmm" - - class: "register" - name: "xmm" - throughput: 0.5 - latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] - - name: vaddsd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: ~ - scale: 1 - - class: "register" - name: "xmm" - - class: "register" - name: "xmm" - throughput: 0.5 - latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] - - name: vaddsd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - - class: "register" - name: "xmm" - - class: "register" - name: "xmm" - throughput: 0.5 - latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] - - name: vaddsd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 8 - - class: "register" - name: "xmm" - - class: "register" - name: "xmm" - throughput: 0.5 - latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] - name: vaddss operands: - class: "register" @@ -345,88 +256,37 @@ instruction_forms: port_pressure: [0.0, 0.0, 0.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vfmadd213pd operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 + - class: "register" + name: "ymm" - class: "register" name: "ymm" - class: "register" name: "ymm" throughput: 1.0 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] - - name: vfmadd213pd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 1.0 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] + latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST + port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vfmadd231pd operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 + - class: "register" + name: "ymm" - class: "register" name: "ymm" - class: "register" name: "ymm" throughput: 1.0 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] - - name: vfmadd231pd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 1.0 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] + latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST + port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vfmadd132pd operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 + - class: "register" + name: "ymm" - class: "register" name: "ymm" - class: "register" name: "ymm" throughput: 1.0 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] - - name: vfmadd132pd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 1.0 - latency: ~ # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] + latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST + port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmulsd operands: - class: "register" @@ -465,18 +325,15 @@ instruction_forms: port_pressure: [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] - name: vmulpd operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 + - class: "register" + name: "xmm" - class: "register" name: "xmm" - class: "register" name: "xmm" throughput: 0.5 latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] + port_pressure: [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmulpd operands: - class: "register" @@ -488,46 +345,15 @@ instruction_forms: throughput: 1.0 latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - - name: vmulpd - operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 1.0 - latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] - - name: vmulpd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - - class: "register" - name: "ymm" - - class: "register" - name: "ymm" - throughput: 1.0 - latency: 4.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] - name: vmovapd operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - class: "register" name: "xmm" - throughput: 0.5 + - class: "register" + name: "xmm" + throughput: 0.0 latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmovapd operands: - class: "register" @@ -542,28 +368,13 @@ instruction_forms: port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1.0] - name: vmovapd operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 1 - class: "register" name: "ymm" - throughput: 1.0 - latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] - - name: vmovapd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - class: "register" name: "ymm" - throughput: 1.0 + throughput: 0.0 latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmovapd operands: - class: "register" @@ -638,52 +449,34 @@ instruction_forms: port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0] - name: vmovupd operands: + - class: "register" + name: "ymm" - class: "memory" base: "gpr" offset: ~ index: "gpr" scale: 1 - - class: "register" - name: "ymm" throughput: 2.0 - latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST + latency: 3.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0] - name: vmovupd operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - class: "register" name: "ymm" - throughput: 2.0 + - class: "register" + name: "ymm" + throughput: 0.0 latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmovsd operands: - - class: "memory" - base: "gpr" - offset: ~ - index: "gpr" - scale: 8 - class: "register" name: "xmm" - throughput: 0.5 - latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] - - name: vmovsd - operands: - - class: "memory" - base: "gpr" - offset: "imd" - index: "gpr" - scale: 1 - class: "register" name: "xmm" - throughput: 0.5 + throughput: 0.0 latency: 0.0 # 0 1 2 3 3DV 4 5 6 7 8 9 8D 9D ST - port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0] + port_pressure: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - name: vmovsd operands: - class: "register" diff --git a/osaca/frontend.py b/osaca/frontend.py index c1023de..8902e65 100755 --- a/osaca/frontend.py +++ b/osaca/frontend.py @@ -133,7 +133,7 @@ class Frontend(object): '{:4d} {} {:4.1f} {}{}{} {}'.format( instruction_form['line_number'], separator, - instruction_form['latency'], + instruction_form['latency_cp'], separator, 'X' if INSTR_FLAGS.LT_UNKWN in instruction_form['flags'] else ' ', separator, @@ -144,7 +144,7 @@ class Frontend(object): '\n{:4} {} {:4.1f}'.format( ' ' * max([len(str(instr_form['line_number'])) for instr_form in cp_kernel]), ' ' * len(separator), - sum([instr_form['latency'] for instr_form in cp_kernel]), + sum([instr_form['latency_cp'] for instr_form in cp_kernel]), ) ) @@ -161,7 +161,7 @@ class Frontend(object): separator, sum( [ - instr_form['latency'] if instr_form['latency'] is not None else 0 + instr_form['latency_lcd'] for instr_form in dep_dict[dep]['dependencies'] ] ), diff --git a/osaca/semantics/hw_model.py b/osaca/semantics/hw_model.py index 655e3d4..bf71fd3 100755 --- a/osaca/semantics/hw_model.py +++ b/osaca/semantics/hw_model.py @@ -81,6 +81,21 @@ class MachineModel(object): return self._data['hidden_loads'] return False + def get_load_latency(self, reg_type): + return self._data['load_latency'][reg_type] + + def get_load_throughput(self, memory): + ld_tp = [m for m in self._data['load_throughput'] if self._match_mem_entries(memory, m)] + if len(ld_tp) > 0: + return ld_tp[0]['port_pressure'] + return None + + def _match_mem_entries(self, mem, i_mem): + if self._data['isa'].lower() == 'aarch64': + return self._is_AArch64_mem_type(i_mem, mem) + if self._data['isa'].lower() == 'x86': + return self._is_x86_mem_type(i_mem, mem) + def get_data_ports(self): data_port = re.compile(r'^[0-9]+D$') data_ports = [x for x in filter(data_port.match, self._data['ports'])] @@ -248,7 +263,7 @@ class MachineModel(object): and mem['index']['prefix'] == i_mem['index'] ) ) - and mem['scale'] == i_mem['scale'] + and (mem['scale'] == i_mem['scale'] or (mem['scale'] != 1 and i_mem['scale'] != 1)) and (('pre_indexed' in mem) == (i_mem['pre-indexed'])) and (('post_indexed' in mem) == (i_mem['post-indexed'])) ): @@ -285,7 +300,7 @@ class MachineModel(object): and self._is_x86_reg_type(i_mem['index'], mem['index']) ) ) - and mem['scale'] == i_mem['scale'] + and (mem['scale'] == i_mem['scale'] or (mem['scale'] != 1 and i_mem['scale'] != 1)) ): return True return False diff --git a/osaca/semantics/kernel_dg.py b/osaca/semantics/kernel_dg.py index 2b02f44..f8c684e 100755 --- a/osaca/semantics/kernel_dg.py +++ b/osaca/semantics/kernel_dg.py @@ -27,18 +27,6 @@ class KernelDG(nx.DiGraph): dg.nodes[instruction_form['line_number']]['instruction_form'] = instruction_form # add load as separate node if existent if 'performs_load' in instruction_form['flags']: - regs = [ - op for op in instruction_form['operands']['destination'] if 'register' in op - ] - if ( - len(regs) > 1 - and len(set([self.parser.get_reg_type(x['register']) for x in regs])) != 1 - ): - load_lat = max(self.model['load_latency'].values()) - else: - load_lat = self.model['load_latency'][ - self.parser.get_reg_type(regs[0]['register']) - ] # add new node dg.add_node(instruction_form['line_number'] + 0.1) dg.nodes[instruction_form['line_number'] + 0.1][ @@ -48,13 +36,16 @@ class KernelDG(nx.DiGraph): dg.add_edge( instruction_form['line_number'] + 0.1, instruction_form['line_number'], - latency=load_lat, + latency=instruction_form['latency'] - instruction_form['latency_wo_load'], + ) + for dep in self.find_depending(instruction_form, kernel[i + 1 :]): + edge_weight = ( + instruction_form['latency'] + if 'latency_wo_load' not in instruction_form + else instruction_form['latency_wo_load'] ) - for dep in self.find_depending(instruction_form, kernel[i + 1:]): dg.add_edge( - instruction_form['line_number'], - dep['line_number'], - latency=instruction_form['latency'], + instruction_form['line_number'], dep['line_number'], latency=edge_weight ) dg.nodes[dep['line_number']]['instruction_form'] = dep return dg @@ -99,7 +90,16 @@ class KernelDG(nx.DiGraph): tmp_list.append(dep) loopcarried_deps = tmp_list for dep in loopcarried_deps: - nodes = [self._get_node_by_lineno(n) for n in dep[1]] + nodes = [] + for n in dep[1]: + self._get_node_by_lineno(int(n))['latency_lcd'] = 0 + for n in dep[1]: + node = self._get_node_by_lineno(int(n)) + if int(n) != n and int(n) in dep[1]: + node['latency_lcd'] += node['latency'] - node['latency_wo_load'] + else: + node['latency_lcd'] += node['latency_wo_load'] + nodes.append(node) loopcarried_deps_dict[dep[0]] = { 'root': self._get_node_by_lineno(dep[0]), 'dependencies': nodes, @@ -113,12 +113,25 @@ class KernelDG(nx.DiGraph): def get_critical_path(self): if nx.algorithms.dag.is_directed_acyclic_graph(self.dg): longest_path = nx.algorithms.dag.dag_longest_path(self.dg, weight='latency') + for line_number in longest_path: + self._get_node_by_lineno(int(line_number))['latency_cp'] = 0 # add LD latency to instruction for line_number in longest_path: + node = self._get_node_by_lineno(int(line_number)) if line_number != int(line_number) and int(line_number) in longest_path: - self._get_node_by_lineno(int(line_number))['latency'] += self.dg.edges[ - (line_number, int(line_number)) - ]['latency'] + node['latency_cp'] += self.dg.edges[(line_number, int(line_number))]['latency'] + elif ( + line_number == int(line_number) + and 'mem_dep' in node + and self.dg.has_edge(node['mem_dep']['line_number'], line_number) + ): + node['latency_cp'] += node['latency'] + else: + node['latency_cp'] += ( + node['latency'] + if 'latency_wo_load' not in node + else node['latency_wo_load'] + ) return [x for x in self.kernel if x['line_number'] in longest_path] else: # split to DAG @@ -155,14 +168,17 @@ class KernelDG(nx.DiGraph): # Check for read of base register until overwrite for instr_form in kernel: if self.is_read(dst.memory.base, instr_form): + instr_form['mem_dep'] = instruction_form yield instr_form if self.is_written(dst.memory.base, instr_form): # operand in src_dst list if include_write: + instr_form['mem_dep'] = instruction_form yield instr_form break elif self.is_written(dst.memory.base, instr_form): if include_write: + instr_form['mem_dep'] = instruction_form yield instr_form break diff --git a/osaca/semantics/semanticsAppender.py b/osaca/semantics/semanticsAppender.py index a0c44d1..1beb384 100755 --- a/osaca/semantics/semanticsAppender.py +++ b/osaca/semantics/semanticsAppender.py @@ -4,7 +4,7 @@ import os import warnings from functools import reduce -from osaca.parser import AttrDict +from osaca.parser import AttrDict, ParserAArch64v81, ParserX86ATT from osaca.semantics import MachineModel @@ -25,11 +25,12 @@ class SemanticsAppender(object): def __init__(self, machine_model: MachineModel, path_to_yaml=None): self._machine_model = machine_model self._isa = machine_model.get_ISA().lower() - if path_to_yaml: - path = path_to_yaml - else: - path = self._find_file(self._isa) + path = self._find_file(self._isa) self._isa_model = MachineModel(path_to_yaml=path) + if self._isa == 'x86': + self._parser = ParserX86ATT() + elif self._isa == 'aarch64': + self._parser = ParserAArch64v81() def _find_file(self, isa): data_dir = os.path.expanduser('~/.osaca/data/isa') @@ -92,6 +93,7 @@ class SemanticsAppender(object): # No instruction (label, comment, ...) --> ignore throughput = 0.0 latency = 0.0 + latency_wo_load = latency instruction_form['port_pressure'] = [0.0 for i in range(port_number)] else: instruction_data = self._machine_model.get_instruction( @@ -120,17 +122,58 @@ class SemanticsAppender(object): throughput = 0.0 flags.append(INSTR_FLAGS.TP_UNKWN) latency = instruction_data['latency'] + latency_wo_load = latency if latency is None: # assume 0 cy and mark as unknown latency = 0.0 + latency_wo_load = latency flags.append(INSTR_FLAGS.LT_UNKWN) else: # instruction could not be found in DB - # --> mark as unknown and assume 0 cy for latency/throughput - throughput = 0.0 - latency = 0.0 - instruction_form['port_pressure'] = [0.0 for i in range(port_number)] - flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN] + assign_unknown = True + # check for equivalent register-operands DB entry if LD + if INSTR_FLAGS.HAS_LD in instruction_form['flags']: + # --> combine LD and reg form of instruction form + operands = self.substitute_mem_address(instruction_form['operands']) + instruction_data_reg = self._machine_model.get_instruction( + instruction_form['instruction'], operands + ) + if instruction_data_reg: + assign_unknown = False + load_port_pressure = self._machine_model.get_load_throughput( + [ + x['memory'] + for x in instruction_form['operands']['source'] + if 'memory' in x + ][0] + ) + if load_port_pressure is None: + import pdb; pdb.set_trace() + throughput = max( + max(load_port_pressure), instruction_data_reg['throughput'] + ) + latency = ( + self._machine_model.get_load_latency( + [ + self._parser.get_reg_type(op['register']) + for op in operands['operand_list'] + if 'register' in op + ][0] + ) + + instruction_data_reg['latency'] + ) + latency_wo_load = instruction_data_reg['latency'] + instruction_form['port_pressure'] = [ + sum(x) + for x in zip(load_port_pressure, instruction_data_reg['port_pressure']) + ] + if assign_unknown: + # --> mark as unknown and assume 0 cy for latency/throughput + throughput = 0.0 + latency = 0.0 + latency_wo_load = latency + instruction_form['port_pressure'] = [0.0 for i in range(port_number)] + flags += [INSTR_FLAGS.TP_UNKWN, INSTR_FLAGS.LT_UNKWN] # flatten flag list flags = list(set(flags)) if 'flags' not in instruction_form: @@ -139,8 +182,51 @@ class SemanticsAppender(object): instruction_form['flags'] += flags instruction_form['throughput'] = throughput instruction_form['latency'] = latency + instruction_form['latency_wo_load'] = latency_wo_load + # for later CP and loop-carried dependency analysis + instruction_form['latency_cp'] = 0 + instruction_form['latency_lcd'] = 0 - # get parser result and assign operands to + def substitute_mem_address(self, operands): + regs = [op for op in operands['operand_list'] if 'register' in op] + if ( + len(regs) > 1 + and len(set([self._parser.get_reg_type(x['register']) for x in regs])) != 1 + ): + warnings.warn('Load type could not be identified clearly.') + reg_type = self._parser.get_reg_type(regs[0]['register']) + + source = [ + operand if 'memory' not in operand else self.convert_mem_to_reg(operand, reg_type) + for operand in operands['source'] + ] + destination = [ + operand if 'memory' not in operand else self.convert_mem_to_reg(operand, reg_type) + for operand in operands['destination'] + ] + src_dst = [ + operand if 'memory' not in operand else self.convert_mem_to_reg(operand, reg_type) + for operand in operands['destination'] + ] + operand_list = [ + operand if 'memory' not in operand else self.convert_mem_to_reg(operand, reg_type) + for operand in operands['operand_list'] + ] + return { + 'source': source, + 'destination': destination, + 'src_dst': src_dst, + 'operand_list': operand_list, + } + + def convert_mem_to_reg(self, memory, reg_type, reg_id='0'): + if self._isa == 'x86': + register = {'register': {'name': reg_type + reg_id}} + elif self._isa == 'aarch64': + register = {'register': {'prefix': reg_type, 'name': reg_id}} + return register + + # get ;parser result and assign operands to # - source # - destination # - source/destination @@ -225,14 +311,14 @@ class SemanticsAppender(object): def _get_regular_source_x86ATT(self, instruction_form): # return all but last operand sources = [ - op for op in instruction_form['operands'][0:len(instruction_form['operands']) - 1] + op for op in instruction_form['operands'][0 : len(instruction_form['operands']) - 1] ] return sources def _get_regular_source_AArch64(self, instruction_form): # return all but first operand sources = [ - op for op in instruction_form['operands'][1:len(instruction_form['operands'])] + op for op in instruction_form['operands'][1 : len(instruction_form['operands'])] ] return sources diff --git a/tests/test_kerncraftAPI.py b/tests/test_kerncraftAPI.py index ef87844..ec7b27b 100755 --- a/tests/test_kerncraftAPI.py +++ b/tests/test_kerncraftAPI.py @@ -50,7 +50,7 @@ class TestKerncraftAPI(unittest.TestCase): ) self.assertEqual(kapi.get_port_occupation_cycles(kernel), port_occupation) self.assertEqual(kapi.get_total_throughput(kernel), 2.0) - self.assertEqual(kapi.get_latency(kernel), 10.0) + self.assertEqual(kapi.get_latency(kernel), (1.0, 13.0)) def test_kerncraft_API_AArch64(self): kapi = KerncraftAPI('vulcan')