mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-13 07:30:06 +01:00
Compare commits
12 Commits
9c97d32512
...
33fd0a0352
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
33fd0a0352 | ||
|
|
a17e79a3a9 | ||
|
|
de0b1fde64 | ||
|
|
d82bc8052b | ||
|
|
b854562a82 | ||
|
|
8c31c6ff77 | ||
|
|
e096cf4704 | ||
|
|
7d900fde38 | ||
|
|
28df996617 | ||
|
|
1eb82a6f0a | ||
|
|
b7e4acc905 | ||
|
|
b989145a36 |
@@ -2621,6 +2621,79 @@ instruction_forms:
|
||||
name: "ZF"
|
||||
source: true
|
||||
destination: true
|
||||
- name: ["comisd", "ucomisd"]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: false
|
||||
destination: true
|
||||
- name: ["comisd", "ucomisd"]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: false
|
||||
destination: true
|
||||
- name: dec
|
||||
operands:
|
||||
- class: "register"
|
||||
@@ -3613,7 +3686,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
destination: true
|
||||
- name: sbb
|
||||
operands:
|
||||
- class: "register"
|
||||
@@ -4342,7 +4415,7 @@ instruction_forms:
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [shl, shr, shlq, shrq]
|
||||
- name: [sal, sar, salq, sarq, shl, shr, shlq, shrq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import copy
|
||||
import time
|
||||
from itertools import chain
|
||||
from itertools import chain, groupby
|
||||
from multiprocessing import Manager, Process, cpu_count
|
||||
|
||||
import networkx as nx
|
||||
@@ -520,16 +520,13 @@ class KernelDG(nx.DiGraph):
|
||||
lcd_line_numbers = {}
|
||||
for dep in lcd:
|
||||
lcd_line_numbers[dep] = [x.line_number for x, lat in lcd[dep]["dependencies"]]
|
||||
# add color scheme
|
||||
graph.graph["node"] = {"colorscheme": "accent8"}
|
||||
graph.graph["edge"] = {"colorscheme": "accent8"}
|
||||
|
||||
# create LCD edges
|
||||
for dep in lcd_line_numbers:
|
||||
min_line_number = min(lcd_line_numbers[dep])
|
||||
max_line_number = max(lcd_line_numbers[dep])
|
||||
graph.add_edge(max_line_number, min_line_number)
|
||||
graph.edges[max_line_number, min_line_number]["latency"] = [
|
||||
graph.add_edge(min_line_number, max_line_number, dir="back")
|
||||
graph.edges[min_line_number, max_line_number]["latency"] = [
|
||||
lat for x, lat in lcd[dep]["dependencies"] if x.line_number == max_line_number
|
||||
]
|
||||
|
||||
@@ -541,21 +538,14 @@ class KernelDG(nx.DiGraph):
|
||||
for n in cp:
|
||||
graph.nodes[n.line_number]["instruction_form"].latency_cp = n.latency_cp
|
||||
|
||||
# color CP and LCD
|
||||
# Make the critical path bold.
|
||||
for n in graph.nodes:
|
||||
if n in cp_line_numbers:
|
||||
# graph.nodes[n]['color'] = 1
|
||||
graph.nodes[n]["style"] = "bold"
|
||||
graph.nodes[n]["penwidth"] = 4
|
||||
for col, dep in enumerate(lcd):
|
||||
if n in lcd_line_numbers[dep]:
|
||||
if "style" not in graph.nodes[n]:
|
||||
graph.nodes[n]["style"] = "filled"
|
||||
else:
|
||||
graph.nodes[n]["style"] += ",filled"
|
||||
graph.nodes[n]["fillcolor"] = 2 + col
|
||||
|
||||
# color edges
|
||||
# Make critical path edges bold.
|
||||
for e in graph.edges:
|
||||
if (
|
||||
graph.nodes[e[0]]["instruction_form"].line_number in cp_line_numbers
|
||||
@@ -569,12 +559,56 @@ class KernelDG(nx.DiGraph):
|
||||
if bold_edge:
|
||||
graph.edges[e]["style"] = "bold"
|
||||
graph.edges[e]["penwidth"] = 3
|
||||
for dep in lcd_line_numbers:
|
||||
if (
|
||||
graph.nodes[e[0]]["instruction_form"].line_number in lcd_line_numbers[dep]
|
||||
and graph.nodes[e[1]]["instruction_form"].line_number in lcd_line_numbers[dep]
|
||||
):
|
||||
graph.edges[e]["color"] = graph.nodes[e[1]]["fillcolor"]
|
||||
|
||||
# Color the cycles created by loop-carried dependencies, longest first, never recoloring
|
||||
# any node or edge, so that the longest LCD and most long chains that are involved in the
|
||||
# loop are legible.
|
||||
lcd_by_latencies = sorted(
|
||||
(
|
||||
(latency, list(deps))
|
||||
for latency, deps in groupby(lcd, lambda dep: lcd[dep]["latency"])
|
||||
),
|
||||
reverse=True
|
||||
)
|
||||
node_colors = {}
|
||||
edge_colors = {}
|
||||
colors_used = 0
|
||||
for i, (latency, deps) in enumerate(lcd_by_latencies):
|
||||
color = None
|
||||
for dep in deps:
|
||||
path = lcd_line_numbers[dep]
|
||||
for n in path:
|
||||
if n not in node_colors:
|
||||
if not color:
|
||||
color = colors_used + 1
|
||||
colors_used += 1
|
||||
node_colors[n] = color
|
||||
for u, v in zip(path, path[1:] + [path[0]]):
|
||||
if (u, v) not in edge_colors:
|
||||
# Don’t introduce a color just for an edge.
|
||||
if not color:
|
||||
color = colors_used
|
||||
edge_colors[u, v] = color
|
||||
max_color = min(11, colors_used)
|
||||
colorscheme = f"spectral{max(3, max_color)}"
|
||||
graph.graph["node"] = {"colorscheme" : colorscheme}
|
||||
graph.graph["edge"] = {"colorscheme" : colorscheme}
|
||||
for n, color in node_colors.items():
|
||||
if "style" not in graph.nodes[n]:
|
||||
graph.nodes[n]["style"] = "filled"
|
||||
else:
|
||||
graph.nodes[n]["style"] += ",filled"
|
||||
graph.nodes[n]["fillcolor"] = color
|
||||
if (
|
||||
(max_color >= 4 and color in (1, max_color)) or
|
||||
(max_color >= 10 and color in (1, 2, max_color - 1 , max_color))
|
||||
):
|
||||
graph.nodes[n]["fontcolor"] = "white"
|
||||
for (u, v), color in edge_colors.items():
|
||||
# The backward edge of the cycle is represented as the corresponding forward
|
||||
# edge with the attribute dir=back.
|
||||
edge = graph.edges[u, v] if (u, v) in graph.edges else graph.edges[v, u]
|
||||
edge["color"] = color
|
||||
|
||||
# rename node from [idx] to [idx mnemonic] and add shape
|
||||
mapping = {}
|
||||
|
||||
@@ -849,6 +849,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
instr_form_r_ymm = self.parser_x86_intel.parse_line("vmovapd ymm0, ymm1")
|
||||
self.semantics_csx_intel.normalize_instruction_form(instr_form_r_ymm)
|
||||
self.semantics_csx_intel.assign_src_dst(instr_form_r_ymm)
|
||||
instr_form_rw_sar = self.parser_x86_intel.parse_line("sar rcx, 43")
|
||||
self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_sar)
|
||||
self.semantics_csx_intel.assign_src_dst(instr_form_rw_sar)
|
||||
self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c))
|
||||
self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c))
|
||||
self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c))
|
||||
@@ -860,6 +863,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1))
|
||||
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
|
||||
self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
|
||||
self.assertTrue(dag.is_read(reg_rcx, instr_form_rw_sar))
|
||||
self.assertTrue(dag.is_written(reg_rcx, instr_form_rw_sar))
|
||||
|
||||
def test_is_read_is_written_AArch64(self):
|
||||
# independent form HW model
|
||||
|
||||
Reference in New Issue
Block a user