mirror of
https://github.com/RRZE-HPC/OSACA.git
synced 2025-12-16 09:00:05 +01:00
Compare commits
38 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6abea6249a | ||
|
|
187473b72c | ||
|
|
45847e69ff | ||
|
|
94cb3de6a1 | ||
|
|
63cb61b423 | ||
|
|
b68ce9afc1 | ||
|
|
c274a25e1b | ||
|
|
714319e613 | ||
|
|
590f915f85 | ||
|
|
b4978c724a | ||
|
|
88d3f1a7a0 | ||
|
|
5635d2d8df | ||
|
|
faa63ce95e | ||
|
|
4578eb00fa | ||
|
|
3456f6e24a | ||
|
|
df0351d087 | ||
|
|
969500d79f | ||
|
|
685ed1e1e1 | ||
|
|
af9c10f308 | ||
|
|
4255c11010 | ||
|
|
56fbe1d172 | ||
|
|
aeda9b1d33 | ||
|
|
33fd0a0352 | ||
|
|
a17e79a3a9 | ||
|
|
de0b1fde64 | ||
|
|
d82bc8052b | ||
|
|
b854562a82 | ||
|
|
8c31c6ff77 | ||
|
|
e096cf4704 | ||
|
|
7d900fde38 | ||
|
|
28df996617 | ||
|
|
1eb82a6f0a | ||
|
|
b7e4acc905 | ||
|
|
b989145a36 | ||
|
|
9c97d32512 | ||
|
|
9e6373a013 | ||
|
|
e99c3d935d | ||
|
|
edb32b38ca |
3
.github/workflows/test-n-publish.yml
vendored
3
.github/workflows/test-n-publish.yml
vendored
@@ -23,8 +23,7 @@ jobs:
|
||||
python -m pip install bs4
|
||||
sudo apt-get -y install graphviz libgraphviz-dev pkg-config
|
||||
python -m pip install pygraphviz
|
||||
#python -m pip install "kerncraft>=0.8.16"
|
||||
python -m pip install git+https://github.com/RRZE-HPC/kerncraft.git@7caff4e2ecdbef595013041ba0131e37ed33c72c
|
||||
python -m pip install "kerncraft>=0.8.17"
|
||||
python -m pip install -e .
|
||||
- name: Test
|
||||
run: |
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Open Source Architecture Code Analyzer"""
|
||||
|
||||
name = "osaca"
|
||||
__version__ = "0.7.0"
|
||||
__version__ = "0.7.1"
|
||||
|
||||
# To trigger travis deployment to pypi, do the following:
|
||||
# 1. Increment __version___
|
||||
|
||||
@@ -3,7 +3,8 @@ micro_architecture: Ice Lake Server
|
||||
arch_code: ICX
|
||||
isa: x86
|
||||
ROB_size: 352 # from wikichip
|
||||
retired_uOps_per_cycle: 10 # from wikichip
|
||||
dispatched_uOps_per_cycle: 5
|
||||
retired_uOps_per_cycle: 5
|
||||
scheduler_size: 97 # actually MORE than 97, number unknown
|
||||
hidden_loads: false
|
||||
load_latency: {gpr: 5.0, mm: 5.0, xmm: 5.0, ymm: 5.0, zmm: 5.0}
|
||||
|
||||
@@ -530,6 +530,190 @@ instruction_forms:
|
||||
name: "rax"
|
||||
source: false
|
||||
destination: true
|
||||
- name: [ja, jbe, jna, jnbe]
|
||||
operands:
|
||||
- class: identifier
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [jae, jb, jc, jnae, jnb, jnc]
|
||||
operands:
|
||||
- class: identifier
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [je, jne, jnz, jz]
|
||||
operands:
|
||||
- class: identifier
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [jg, jle, jng, jnle]
|
||||
operands:
|
||||
- class: identifier
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [jge, jl, jnge, jnl]
|
||||
operands:
|
||||
- class: identifier
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [jno, jo]
|
||||
operands:
|
||||
- class: identifier
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [jnp, jp, jpe, jpo]
|
||||
operands:
|
||||
- class: identifier
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [jns, js]
|
||||
operands:
|
||||
- class: identifier
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [seta, setbe, setna, setnbe]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [setae, setb, setc, setnae, setnb, setnc]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [sete, setne, setnz, setz]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [setg, setle, setng, setnle]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [setge, setl, setnge, setnl]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [setno, seto]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [setnp, setp, setpe, setpo]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: [setns, sets]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: false
|
||||
destination: true
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: true
|
||||
destination: false
|
||||
- name: cmova
|
||||
operands:
|
||||
- class: "register"
|
||||
@@ -2437,6 +2621,79 @@ instruction_forms:
|
||||
name: "ZF"
|
||||
source: true
|
||||
destination: true
|
||||
- name: ["comisd", "ucomisd"]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: false
|
||||
destination: true
|
||||
- name: ["comisd", "ucomisd"]
|
||||
operands:
|
||||
- class: "register"
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: false
|
||||
- class: "memory"
|
||||
base: "*"
|
||||
offset: "*"
|
||||
index: "*"
|
||||
scale: "*"
|
||||
source: true
|
||||
destination: false
|
||||
hidden_operands:
|
||||
- class: "flag"
|
||||
name: "CF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "OF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "SF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "ZF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "AF"
|
||||
source: false
|
||||
destination: true
|
||||
- class: "flag"
|
||||
name: "PF"
|
||||
source: false
|
||||
destination: true
|
||||
- name: dec
|
||||
operands:
|
||||
- class: "register"
|
||||
@@ -3429,7 +3686,7 @@ instruction_forms:
|
||||
- class: "register"
|
||||
name: "gpr"
|
||||
source: true
|
||||
destination: true
|
||||
destination: true
|
||||
- name: sbb
|
||||
operands:
|
||||
- class: "register"
|
||||
@@ -4158,7 +4415,7 @@ instruction_forms:
|
||||
name: "xmm"
|
||||
source: true
|
||||
destination: true
|
||||
- name: [shl, shr, shlq, shrq]
|
||||
- name: [sal, sar, salq, sarq, shl, shr, shlq, shrq]
|
||||
operands:
|
||||
- class: "immediate"
|
||||
imd: "int"
|
||||
|
||||
@@ -5004,6 +5004,54 @@ instruction_forms:
|
||||
port_pressure: [[1, '01']] # uops.info import
|
||||
throughput: 0.5 # uops.info import
|
||||
uops: 1 # uops.info import
|
||||
- name: [VCMPEQPD, VCMPLTPD, VCMPLEPD, VCMPUNORDPD, VCMPNEQPD, VCMPNLTPD, VCMPNLEPD, VCMPORDPD, VCMPEQ_UQPD, VCMPNGEPD, VCMPNGTPD, VCMPFALSEPD, VCMPNEQ_OQPD, VCMPGEPD, VCMPGTPD, VCMPTRUEPD, VCMPEQ_OSPD, VCMPLT_OQPD, VCMPLE_OQPD, VCMPUNORD_SPD, VCMPNEQ_USPD, VCMPNLT_UQPD, VCMPNLE_UQPD, VCMPORD_SPD, VCMPEQ_USPD, VCMPNGE_UQPD, VCMPNGT_UQPD, VCMPFALSE_OSPD, VCMPNEQ_OSPD, VCMPGE_OQPD, VCMPGT_OQPD, VCMPTRUE_USPD] # VCMPPD pseudo-op
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
latency: 4
|
||||
port_pressure: [[1, '01']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: [VCMPEQPD, VCMPLTPD, VCMPLEPD, VCMPUNORDPD, VCMPNEQPD, VCMPNLTPD, VCMPNLEPD, VCMPORDPD, VCMPEQ_UQPD, VCMPNGEPD, VCMPNGTPD, VCMPFALSEPD, VCMPNEQ_OQPD, VCMPGEPD, VCMPGTPD, VCMPTRUEPD, VCMPEQ_OSPD, VCMPLT_OQPD, VCMPLE_OQPD, VCMPUNORD_SPD, VCMPNEQ_USPD, VCMPNLT_UQPD, VCMPNLE_UQPD, VCMPORD_SPD, VCMPEQ_USPD, VCMPNGE_UQPD, VCMPNGT_UQPD, VCMPFALSE_OSPD, VCMPNEQ_OSPD, VCMPGE_OQPD, VCMPGT_OQPD, VCMPTRUE_USPD] # VCMPPD pseudo-op
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
latency: 4
|
||||
port_pressure: [[1, '01']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: [VCMPEQPS, VCMPLTPS, VCMPLEPS, VCMPUNORDPS, VCMPNEQPS, VCMPNLTPS, VCMPNLEPS, VCMPORDPS, VCMPEQ_UQPS, VCMPNGEPS, VCMPNGTPS, VCMPFALSEPS, VCMPNEQ_OQPS, VCMPGEPS, VCMPGTPS, VCMPTRUEPS, VCMPEQ_OSPS, VCMPLT_OQPS, VCMPLE_OQPS, VCMPUNORD_SPS, VCMPNEQ_USPS, VCMPNLT_UQPS, VCMPNLE_UQPS, VCMPORD_SPS, VCMPEQ_USPS, VCMPNGE_UQPS, VCMPNGT_UQPS, VCMPFALSE_OSPS, VCMPNEQ_OSPS, VCMPGE_OQPS, VCMPGT_OQPS, VCMPTRUE_USPS] # VCMPPS pseudo-op
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
latency: 4
|
||||
port_pressure: [[1, '01']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: [VCMPEQPS, VCMPLTPS, VCMPLEPS, VCMPUNORDPS, VCMPNEQPS, VCMPNLTPS, VCMPNLEPS, VCMPORDPS, VCMPEQ_UQPS, VCMPNGEPS, VCMPNGTPS, VCMPFALSEPS, VCMPNEQ_OQPS, VCMPGEPS, VCMPGTPS, VCMPTRUEPS, VCMPEQ_OSPS, VCMPLT_OQPS, VCMPLE_OQPS, VCMPUNORD_SPS, VCMPNEQ_USPS, VCMPNLT_UQPS, VCMPNLE_UQPS, VCMPORD_SPS, VCMPEQ_USPS, VCMPNGE_UQPS, VCMPNGT_UQPS, VCMPFALSE_OSPS, VCMPNEQ_OSPS, VCMPGE_OQPS, VCMPGT_OQPS, VCMPTRUE_USPS] # VCMPPS pseudo-op
|
||||
operands:
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
- class: register
|
||||
name: ymm
|
||||
latency: 4
|
||||
port_pressure: [[1, '01']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: VCMPSD # uops.info import
|
||||
operands: # uops.info import
|
||||
- class: immediate # uops.info import
|
||||
|
||||
@@ -3,7 +3,8 @@ micro_architecture: Sapphire Rapids
|
||||
arch_code: SPR
|
||||
isa: x86
|
||||
ROB_size: ~
|
||||
retired_uOps_per_cycle: ~
|
||||
dispatched_uOps_per_cycle: 6
|
||||
retired_uOps_per_cycle: 8
|
||||
scheduler_size: ~
|
||||
hidden_loads: false
|
||||
load_latency: {gpr: 5.0, mm: 5.0, xmm: 5.0, ymm: 5.0, zmm: 5.0}
|
||||
@@ -2788,7 +2789,17 @@ instruction_forms:
|
||||
port_pressure: [[1, '15']] # ibench
|
||||
throughput: 0.5 # ibench
|
||||
uops: 1 # ibench
|
||||
- name: vdivpd # ibench
|
||||
- name: divpd # ibench
|
||||
operands: # ibench
|
||||
- class: register # ibench
|
||||
name: xmm # ibench
|
||||
- class: register # ibench
|
||||
name: xmm # ibench
|
||||
latency: 14 # ibench
|
||||
port_pressure: [[1, '0'], [4, ['0DV']]] # ibench
|
||||
throughput: 4.0 # ibench
|
||||
uops: 4 # ibench
|
||||
- name: vdivpd # ibench
|
||||
operands: # ibench
|
||||
- class: register # ibench
|
||||
name: xmm # ibench
|
||||
@@ -3840,6 +3851,16 @@ instruction_forms:
|
||||
port_pressure: [[1, '01']] # ibench
|
||||
throughput: 0.5 # ibench
|
||||
uops: 1 # ibench
|
||||
- name: [paddd, paddq] # ibench
|
||||
operands: # ibench
|
||||
- class: register # ibench
|
||||
name: xmm # ibench
|
||||
- class: register # ibench
|
||||
name: xmm # ibench
|
||||
latency: 1 # ibench
|
||||
port_pressure: [[1, '015']] # ibench
|
||||
throughput: 0.3333333333333333 # ibench
|
||||
uops: 1 # ibench
|
||||
- name: [vpaddd, vpaddq] # ibench
|
||||
operands: # ibench
|
||||
- class: register # ibench
|
||||
@@ -4313,6 +4334,16 @@ instruction_forms:
|
||||
port_pressure: [[1, '05']] # ibench
|
||||
throughput: 0.5 # ibench
|
||||
uops: 1 # ibench
|
||||
- name: subsd # ibench
|
||||
operands: # ibench
|
||||
- class: register # ibench
|
||||
name: xmm # ibench
|
||||
- class: register # ibench
|
||||
name: xmm # ibench
|
||||
latency: 2 # ibench
|
||||
port_pressure: [[1, '15']] # ibench
|
||||
throughput: 0.5 # ibench
|
||||
uops: 1 # ibench
|
||||
- name: vsubsd # ibench
|
||||
operands: # ibench
|
||||
- class: register # ibench
|
||||
@@ -4562,6 +4593,16 @@ instruction_forms:
|
||||
port_pressure: [[1, '01'], [1, '5']]
|
||||
throughput: 1.0
|
||||
uops: 3
|
||||
- name: [cvtdq2pd, vcvtdq2pd]
|
||||
operands:
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
latency: 5
|
||||
port_pressure: [[1, '01'], [1, '5']]
|
||||
throughput: 1.0
|
||||
uops: 2
|
||||
- name: vcvtdq2pd
|
||||
operands:
|
||||
- class: register
|
||||
@@ -5866,6 +5907,18 @@ instruction_forms:
|
||||
port_pressure: [[1, '05']]
|
||||
throughput: 0.5
|
||||
uops: 1
|
||||
- name: [vpshufd, pshufd] # uops.info
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
- class: register
|
||||
name: xmm
|
||||
- class: register
|
||||
name: xmm
|
||||
latency: 1
|
||||
port_pressure: [[1, '15']]
|
||||
throughput: 1.0
|
||||
uops: 1
|
||||
- name: vshuff64x2
|
||||
operands:
|
||||
- class: immediate
|
||||
@@ -5992,7 +6045,7 @@ instruction_forms:
|
||||
port_pressure: [[1, '5']] # uops.info
|
||||
throughput: 1.0 # ibench
|
||||
uops: 1
|
||||
- name: [cltq, cdq, cdqe]
|
||||
- name: [cltq, cdq, cdqe]
|
||||
operands: []
|
||||
latency: 1 # uops.info
|
||||
port_pressure: [[1, '06']] # uops.info
|
||||
|
||||
@@ -35,9 +35,9 @@ port_model_scheme: |
|
||||
| BR | | BR | | ALU | | ALU | | ALU | | ALU | | ALU | | DV | | ALU | | DV | |SIMD/FP| |FPDV| |SIMD/FP| |SIMD/FP| |FPDV| |SIMD/FP| | LD | | LD | | LD | | ST | | ST |
|
||||
+----+ +----+ +------+ +------+ +------+ +------+ +------+ +----+ +------+ +----+ | ALU | +----+ | ALU | | ALU | +----+ | ALU | +-----+ +-----+ +-----+ +-----+ +-----+
|
||||
silly silly +------+ +------+ +-------+ +-------+ +-------+ +-------+ +-----+ +-----+
|
||||
| MUL | | MUL | +-------+ +-------+ +-------+ +-------+ | AGU | | AGU |
|
||||
+------+ +------+ |SIMD/FP| |SIMD/FP| |SIMD/FP| |SIMD/FP| +-----+ +-----+
|
||||
+------+ +------+ | MISC | | MISC | | MISC | | MISC |
|
||||
| MUL | | MUL | +-------+ +-------+ +-------+ +-------+ | ST | | ST |
|
||||
+------+ +------+ |SIMD/FP| |SIMD/FP| |SIMD/FP| |SIMD/FP| | AGU | | AGU |
|
||||
+------+ +------+ | MISC | | MISC | | MISC | | MISC | +-----+ +-----+
|
||||
| CRC | | CRC | +-------+ +-------+ +-------+ +-------+
|
||||
+------+ +------+ +-------+ +-------+ +-------+ +-------+
|
||||
+------+ +------+ | SIMD | | SIMD | | SIMD | | SIMD |
|
||||
@@ -119,6 +119,17 @@ instruction_forms:
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p2367
|
||||
port_pressure: [[1, '2367']]
|
||||
- name: addvl
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: immediate
|
||||
imd: int
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p67
|
||||
port_pressure: [[1, '67']]
|
||||
- name: adds
|
||||
operands:
|
||||
- class: register
|
||||
@@ -259,13 +270,13 @@ instruction_forms:
|
||||
throughput: 0.16666666
|
||||
latency: 1.0 # 1*p234567
|
||||
port_pressure: [[1, '234567']]
|
||||
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq]
|
||||
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq, bmi]
|
||||
operands:
|
||||
- class: identifier
|
||||
throughput: 0.5
|
||||
latency: 0.0
|
||||
port_pressure: [[1, '01']]
|
||||
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq]
|
||||
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq, bmi]
|
||||
operands:
|
||||
- class: immediate
|
||||
imd: int
|
||||
@@ -1410,7 +1421,7 @@ instruction_forms:
|
||||
throughput: 0.25
|
||||
latency: 1.0 # 1*p2367
|
||||
port_pressure: [[1, '2367']]
|
||||
- name: [incw, incd, inch]
|
||||
- name: [incw, incd, inch, incb]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: z
|
||||
@@ -1420,20 +1431,20 @@ instruction_forms:
|
||||
throughput: 0.25
|
||||
latency: 2.0 # 1*p8,9,10,11
|
||||
port_pressure: [[1, ['8', '9', '10', '11']]]
|
||||
- name: [incw, incd, inch]
|
||||
- name: [incw, incd, inch, incb]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
- class: identifier
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p67
|
||||
latency: 1.0 # 1*p67
|
||||
port_pressure: [[1, '67']]
|
||||
- name: [incw, incd, inch]
|
||||
- name: [incw, incd, inch, incb]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: x
|
||||
throughput: 0.5
|
||||
latency: 2.0 # 1*p67
|
||||
latency: 1.0 # 1*p67
|
||||
port_pressure: [[1, '67']]
|
||||
- name: [madd, msub] # NOTE: if the dependency is via the addend (fourth operand), the latency is only 1cy !!!
|
||||
operands:
|
||||
@@ -2939,6 +2950,15 @@ instruction_forms:
|
||||
throughput: 1.0
|
||||
latency: 1.0 # 1*p3
|
||||
port_pressure: [[1, '3']]
|
||||
- name: [fcmp, fcmpe]
|
||||
operands:
|
||||
- class: register
|
||||
prefix: '*'
|
||||
- class: immediate
|
||||
imd: '*'
|
||||
throughput: 1.0
|
||||
latency: 1.0 # 1*p3
|
||||
port_pressure: [[1, '3']]
|
||||
- name: [fccmp, fccmpe] # LT assumed from fcmp
|
||||
operands:
|
||||
- class: register
|
||||
|
||||
@@ -80,6 +80,8 @@ class Frontend(object):
|
||||
s += lineno_filler + self._get_port_number_line(port_len) + "\n"
|
||||
s += separator + "\n"
|
||||
for instruction_form in kernel:
|
||||
if KernelDG.is_load_line_number(instruction_form.line_number):
|
||||
continue
|
||||
line = "{:4d} {} {} {}".format(
|
||||
instruction_form.line_number,
|
||||
self._get_port_pressure(
|
||||
@@ -112,6 +114,8 @@ class Frontend(object):
|
||||
"""
|
||||
s = "\n\nLatency Analysis Report\n-----------------------\n"
|
||||
for instruction_form in cp_kernel:
|
||||
if KernelDG.is_load_line_number(instruction_form.line_number):
|
||||
continue
|
||||
s += (
|
||||
"{:4d} {} {:4.1f} {}{}{} {}".format(
|
||||
instruction_form.line_number,
|
||||
@@ -147,8 +151,11 @@ class Frontend(object):
|
||||
)
|
||||
# TODO find a way to overcome padding for different tab-lengths
|
||||
for dep in sorted(dep_dict.keys()):
|
||||
s += "{:4d} {} {:4.1f} {} {:36}{} {}\n".format(
|
||||
int(dep.split("-")[0]),
|
||||
dep0 = float(dep.split("-")[0])
|
||||
if KernelDG.is_load_line_number(dep0):
|
||||
continue
|
||||
s += "{:4.0f} {} {:4.1f} {} {:36}{} {}\n".format(
|
||||
dep0,
|
||||
separator,
|
||||
dep_dict[dep]["latency"],
|
||||
separator,
|
||||
@@ -356,6 +363,8 @@ class Frontend(object):
|
||||
if show_cmnts is False and self._is_comment(instruction_form):
|
||||
continue
|
||||
line_number = instruction_form.line_number
|
||||
if KernelDG.is_load_line_number(line_number):
|
||||
continue
|
||||
used_ports = [list(uops[1]) for uops in instruction_form.port_uops]
|
||||
used_ports = list(set([p for uops_ports in used_ports for p in uops_ports]))
|
||||
s += "{:4d} {}{} {} {}\n".format(
|
||||
|
||||
@@ -75,10 +75,19 @@ class ParserX86ATT(ParserX86):
|
||||
if not model:
|
||||
# Check for instruction without GAS suffix.
|
||||
if mnemonic[-1] in self.GAS_SUFFIXES:
|
||||
mnemonic = mnemonic[:-1]
|
||||
model = arch_model.get_instruction(mnemonic, instruction_form.operands)
|
||||
if model:
|
||||
instruction_form.mnemonic = mnemonic
|
||||
nongas_mnemonic = mnemonic[:-1]
|
||||
if arch_model.get_instruction(nongas_mnemonic, instruction_form.operands):
|
||||
mnemonic = nongas_mnemonic
|
||||
# Check for non-VEX version and vice-versa
|
||||
elif mnemonic[0] == "v":
|
||||
unvexed_mnemonic = mnemonic[1:]
|
||||
if arch_model.get_instruction(unvexed_mnemonic, len(instruction_form.operands)):
|
||||
mnemonic = unvexed_mnemonic
|
||||
else:
|
||||
vexed_mnemonic = "v" + mnemonic
|
||||
if arch_model.get_instruction(vexed_mnemonic, len(instruction_form.operands)):
|
||||
mnemonic = vexed_mnemonic
|
||||
instruction_form.mnemonic = mnemonic
|
||||
|
||||
def construct_parser(self):
|
||||
"""Create parser for x86 AT&T ISA."""
|
||||
@@ -365,10 +374,8 @@ class ParserX86ATT(ParserX86):
|
||||
return RegisterOperand(
|
||||
prefix=operand["prefix"].lower() if "prefix" in operand else None,
|
||||
name=operand["name"],
|
||||
shape=operand["shape"].lower() if "shape" in operand else None,
|
||||
lanes=operand["lanes"] if "lanes" in operand else None,
|
||||
index=operand["index"] if "index" in operand else None,
|
||||
predication=operand["predication"].lower() if "predication" in operand else None,
|
||||
mask=RegisterOperand(name=operand["mask"]) if "mask" in operand else None,
|
||||
)
|
||||
|
||||
def process_directive(self, directive):
|
||||
|
||||
@@ -160,12 +160,18 @@ class ParserX86Intel(ParserX86):
|
||||
binary_number = pp.Combine(pp.Word("01") + pp.CaselessLiteral("B"))
|
||||
octal_number = pp.Combine(pp.Word("01234567") + pp.CaselessLiteral("O"))
|
||||
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums))
|
||||
hex_number = pp.Combine(pp.Word(pp.hexnums) + pp.CaselessLiteral("H"))
|
||||
hex_number_suffix = pp.Combine(
|
||||
pp.Word(pp.hexnums) + (pp.CaselessLiteral("H") ^ pp.CaselessLiteral("R"))
|
||||
)
|
||||
hex_number_0x = pp.Combine(
|
||||
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
|
||||
)
|
||||
hex_number = hex_number_0x ^ hex_number_suffix
|
||||
float_number = pp.Combine(
|
||||
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + pp.Word(".", pp.nums)
|
||||
).setResultsName("value")
|
||||
integer_number = (
|
||||
binary_number ^ octal_number ^ decimal_number ^ hex_number
|
||||
hex_number ^ binary_number ^ octal_number ^ decimal_number
|
||||
).setResultsName("value")
|
||||
|
||||
# Comment.
|
||||
@@ -192,6 +198,7 @@ class ParserX86Intel(ParserX86):
|
||||
| pp.CaselessKeyword("WORD")
|
||||
| pp.CaselessKeyword("XMMWORD")
|
||||
| pp.CaselessKeyword("YMMWORD")
|
||||
| pp.CaselessKeyword("ZMMWORD")
|
||||
).setResultsName("data_type")
|
||||
|
||||
# Identifier. Note that $ is not mentioned in the ASM386 Assembly Language Reference,
|
||||
@@ -286,15 +293,14 @@ class ParserX86Intel(ParserX86):
|
||||
pp.CaselessKeyword("ST")
|
||||
+ pp.Optional(pp.Literal("(") + pp.Word("01234567") + pp.Literal(")"))
|
||||
).setResultsName("name")
|
||||
xmm_register = pp.Combine(pp.CaselessLiteral("XMM") + pp.Word(pp.nums)) | pp.Combine(
|
||||
pp.CaselessLiteral("XMM1") + pp.Word("012345")
|
||||
)
|
||||
simd_register = (
|
||||
pp.Combine(pp.CaselessLiteral("MM") + pp.Word("01234567"))
|
||||
| xmm_register
|
||||
pp.Combine(pp.CaselessLiteral("MM") + pp.Word(pp.nums))
|
||||
| pp.Combine(pp.CaselessLiteral("XMM") + pp.Word(pp.nums))
|
||||
| pp.Combine(pp.CaselessLiteral("YMM") + pp.Word(pp.nums))
|
||||
| pp.Combine(pp.CaselessLiteral("YMM1") + pp.Word("012345"))
|
||||
).setResultsName("name")
|
||||
| pp.Combine(pp.CaselessLiteral("ZMM") + pp.Word(pp.nums))
|
||||
).setResultsName("name") + pp.Optional(
|
||||
pp.Literal("{") + pp.Word(pp.alphanums).setResultsName("mask") + pp.Literal("}")
|
||||
)
|
||||
segment_register = (
|
||||
pp.CaselessKeyword("CS")
|
||||
| pp.CaselessKeyword("DS")
|
||||
@@ -395,7 +401,9 @@ class ParserX86Intel(ParserX86):
|
||||
+ pp.Optional(pp.Literal("+") + immediate.setResultsName("displacement"))
|
||||
).setResultsName("offset_expression")
|
||||
ptr_expression = pp.Group(
|
||||
data_type + pp.CaselessKeyword("PTR") + address_expression
|
||||
data_type
|
||||
+ (pp.CaselessKeyword("PTR") | pp.CaselessKeyword("BCST"))
|
||||
+ address_expression
|
||||
).setResultsName("ptr_expression")
|
||||
short_expression = pp.Group(pp.CaselessKeyword("SHORT") + identifier).setResultsName(
|
||||
"short_expression"
|
||||
@@ -659,7 +667,10 @@ class ParserX86Intel(ParserX86):
|
||||
return directive_new, directive.get("comment")
|
||||
|
||||
def process_register(self, operand):
|
||||
return RegisterOperand(name=operand.name)
|
||||
return RegisterOperand(
|
||||
name=operand.name,
|
||||
mask=RegisterOperand(name=operand.mask) if "mask" in operand else None,
|
||||
)
|
||||
|
||||
def process_register_expression(self, register_expression):
|
||||
base = register_expression.get("base")
|
||||
@@ -793,8 +804,10 @@ class ParserX86Intel(ParserX86):
|
||||
if isinstance(imd.value, str):
|
||||
if "." in imd.value:
|
||||
return float(imd.value)
|
||||
if imd.value.startswith("0x"):
|
||||
return int(imd.value, 0)
|
||||
# Now parse depending on the base.
|
||||
base = {"B": 2, "O": 8, "H": 16}.get(imd.value[-1], 10)
|
||||
base = {"B": 2, "O": 8, "H": 16, "R": 16}.get(imd.value[-1], 10)
|
||||
value = 0
|
||||
negative = imd.value[0] == "-"
|
||||
positive = imd.value[0] == "+"
|
||||
|
||||
@@ -13,7 +13,7 @@ class RegisterOperand(Operand):
|
||||
lanes=None,
|
||||
shape=None,
|
||||
index=None,
|
||||
mask=False,
|
||||
mask=None,
|
||||
zeroing=False,
|
||||
predication=None,
|
||||
source=False,
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import copy
|
||||
from enum import Enum
|
||||
import time
|
||||
from itertools import chain
|
||||
from itertools import chain, groupby
|
||||
from multiprocessing import Manager, Process, cpu_count
|
||||
|
||||
import networkx as nx
|
||||
from osaca.semantics import INSTR_FLAGS, ArchSemantics, MachineModel
|
||||
from osaca.parser.instruction_form import InstructionForm
|
||||
from osaca.parser.memory import MemoryOperand
|
||||
from osaca.parser.register import RegisterOperand
|
||||
from osaca.parser.immediate import ImmediateOperand
|
||||
@@ -17,6 +19,11 @@ class KernelDG(nx.DiGraph):
|
||||
# threshold for checking dependency graph sequential or in parallel
|
||||
INSTRUCTION_THRESHOLD = 50
|
||||
|
||||
class ReadKind(Enum):
|
||||
NOT_A_READ = 0
|
||||
READ_FOR_LOAD = 1
|
||||
OTHER_READ = 2
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parsed_kernel,
|
||||
@@ -46,6 +53,23 @@ class KernelDG(nx.DiGraph):
|
||||
dst_list.extend(tmp_list)
|
||||
# print('Thread [{}-{}] done'.format(kernel[0]['line_number'], kernel[-1]['line_number']))
|
||||
|
||||
@staticmethod
|
||||
def get_load_line_number(line_number):
|
||||
# The line number of the load must be less than the line number of the instruction. The
|
||||
# offset is irrelevant, but it must be a machine number with trailing zeroes to avoid silly
|
||||
# rounding issues.
|
||||
return line_number - 0.125
|
||||
|
||||
@staticmethod
|
||||
def is_load_line_number(line_number):
|
||||
return line_number != int(line_number)
|
||||
|
||||
@staticmethod
|
||||
def get_real_line_number(line_number):
|
||||
return (
|
||||
int(line_number + 0.125) if KernelDG.is_load_line_number(line_number) else line_number
|
||||
)
|
||||
|
||||
def create_DG(self, kernel, flag_dependencies=False):
|
||||
"""
|
||||
Create directed graph from given kernel
|
||||
@@ -57,10 +81,10 @@ class KernelDG(nx.DiGraph):
|
||||
:type flag_dependencies: boolean, optional
|
||||
:returns: :class:`~nx.DiGraph` -- directed graph object
|
||||
"""
|
||||
# 1. go through kernel instruction forms and add them as node attribute
|
||||
# 2. find edges (to dependend further instruction)
|
||||
# 3. get LT value and set as edge weight
|
||||
# Go through kernel instruction forms and add them as nodes of the graph. Create a LOAD
|
||||
# node for instructions that include a memory reference.
|
||||
dg = nx.DiGraph()
|
||||
loads = {}
|
||||
for i, instruction_form in enumerate(kernel):
|
||||
dg.add_node(instruction_form.line_number)
|
||||
dg.nodes[instruction_form.line_number]["instruction_form"] = instruction_form
|
||||
@@ -70,14 +94,22 @@ class KernelDG(nx.DiGraph):
|
||||
and INSTR_FLAGS.LD not in instruction_form.flags
|
||||
):
|
||||
# add new node
|
||||
dg.add_node(instruction_form.line_number + 0.1)
|
||||
dg.nodes[instruction_form.line_number + 0.1]["instruction_form"] = instruction_form
|
||||
load_line_number = KernelDG.get_load_line_number(instruction_form.line_number)
|
||||
loads[instruction_form.line_number] = load_line_number
|
||||
dg.add_node(load_line_number)
|
||||
dg.nodes[load_line_number]["instruction_form"] = InstructionForm(
|
||||
mnemonic="_LOAD_", line=instruction_form.line, line_number=load_line_number
|
||||
)
|
||||
# and set LD latency as edge weight
|
||||
dg.add_edge(
|
||||
instruction_form.line_number + 0.1,
|
||||
load_line_number,
|
||||
instruction_form.line_number,
|
||||
latency=instruction_form.latency - instruction_form.latency_wo_load,
|
||||
)
|
||||
|
||||
# 1. find edges (to dependend further instruction)
|
||||
# 2. get LT value and set as edge weight
|
||||
for i, instruction_form in enumerate(kernel):
|
||||
for dep, dep_flags in self.find_depending(
|
||||
instruction_form, kernel[i + 1 :], flag_dependencies
|
||||
):
|
||||
@@ -91,11 +123,18 @@ class KernelDG(nx.DiGraph):
|
||||
edge_weight += self.model.get("store_to_load_forward_latency", 0)
|
||||
if "p_indexed" in dep_flags and self.model is not None:
|
||||
edge_weight = self.model.get("p_index_latency", 1)
|
||||
dg.add_edge(
|
||||
instruction_form.line_number,
|
||||
dep.line_number,
|
||||
latency=edge_weight,
|
||||
)
|
||||
if "for_load" in dep_flags and self.model is not None and dep.line_number in loads:
|
||||
dg.add_edge(
|
||||
instruction_form.line_number,
|
||||
loads[dep.line_number],
|
||||
latency=edge_weight,
|
||||
)
|
||||
else:
|
||||
dg.add_edge(
|
||||
instruction_form.line_number,
|
||||
dep.line_number,
|
||||
latency=edge_weight,
|
||||
)
|
||||
|
||||
dg.nodes[dep.line_number]["instruction_form"] = dep
|
||||
return dg
|
||||
@@ -204,23 +243,17 @@ class KernelDG(nx.DiGraph):
|
||||
for lat_sum, involved_lines in loopcarried_deps:
|
||||
dict_key = "-".join([str(il[0]) for il in involved_lines])
|
||||
loopcarried_deps_dict[dict_key] = {
|
||||
"root": self._get_node_by_lineno(involved_lines[0][0]),
|
||||
"root": self._get_node_by_lineno(dg, involved_lines[0][0]),
|
||||
"dependencies": [
|
||||
(self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines
|
||||
(self._get_node_by_lineno(dg, ln), lat) for ln, lat in involved_lines
|
||||
],
|
||||
"latency": lat_sum,
|
||||
}
|
||||
return loopcarried_deps_dict
|
||||
|
||||
def _get_node_by_lineno(self, lineno, kernel=None, all=False):
|
||||
"""Return instruction form with line number ``lineno`` from kernel"""
|
||||
if kernel is None:
|
||||
kernel = self.kernel
|
||||
result = [instr for instr in kernel if instr.line_number == lineno]
|
||||
if not all:
|
||||
return result[0]
|
||||
else:
|
||||
return result
|
||||
def _get_node_by_lineno(self, dg, lineno):
|
||||
"""Return instruction form with line number ``lineno`` from dg"""
|
||||
return dg.nodes[lineno]["instruction_form"]
|
||||
|
||||
def get_critical_path(self):
|
||||
"""Find and return critical path after the creation of a directed graph."""
|
||||
@@ -229,21 +262,21 @@ class KernelDG(nx.DiGraph):
|
||||
longest_path = nx.algorithms.dag.dag_longest_path(self.dg, weight="latency")
|
||||
# TODO verify that we can remove the next two lince due to earlier initialization
|
||||
for line_number in longest_path:
|
||||
self._get_node_by_lineno(int(line_number)).latency_cp = 0
|
||||
self._get_node_by_lineno(self.dg, line_number).latency_cp = 0
|
||||
# set cp latency to instruction
|
||||
path_latency = 0.0
|
||||
for s, d in nx.utils.pairwise(longest_path):
|
||||
node = self._get_node_by_lineno(int(s))
|
||||
node = self._get_node_by_lineno(self.dg, s)
|
||||
node.latency_cp = self.dg.edges[(s, d)]["latency"]
|
||||
path_latency += node.latency_cp
|
||||
# add latency for last instruction
|
||||
node = self._get_node_by_lineno(int(longest_path[-1]))
|
||||
node = self._get_node_by_lineno(self.dg, longest_path[-1])
|
||||
node.latency_cp = node.latency
|
||||
if max_latency_instr.latency > path_latency:
|
||||
max_latency_instr.latency_cp = float(max_latency_instr.latency)
|
||||
return [max_latency_instr]
|
||||
else:
|
||||
return [x for x in self.kernel if x.line_number in longest_path]
|
||||
return [self._get_node_by_lineno(self.dg, x) for x in longest_path]
|
||||
else:
|
||||
# split to DAG
|
||||
raise NotImplementedError("Kernel is cyclic.")
|
||||
@@ -284,15 +317,18 @@ class KernelDG(nx.DiGraph):
|
||||
# print(" TO", instr_form.line, register_changes)
|
||||
if isinstance(dst, RegisterOperand):
|
||||
# read of register
|
||||
if self.is_read(dst, instr_form):
|
||||
read_kind = self._read_kind(dst, instr_form)
|
||||
if read_kind != KernelDG.ReadKind.NOT_A_READ:
|
||||
dep_flags = []
|
||||
if (
|
||||
dst.pre_indexed
|
||||
or dst.post_indexed
|
||||
or (isinstance(dst.post_indexed, dict))
|
||||
):
|
||||
yield instr_form, ["p_indexed"]
|
||||
else:
|
||||
yield instr_form, []
|
||||
dep_flags = ["p_indexed"]
|
||||
if read_kind == KernelDG.ReadKind.READ_FOR_LOAD:
|
||||
dep_flags += ["for_load"]
|
||||
yield instr_form, dep_flags
|
||||
# write to register -> abort
|
||||
if self.is_written(dst, instr_form):
|
||||
break
|
||||
@@ -363,11 +399,12 @@ class KernelDG(nx.DiGraph):
|
||||
return self.dg.successors(line_number)
|
||||
return iter([])
|
||||
|
||||
def is_read(self, register, instruction_form):
|
||||
"""Check if instruction form reads from given register"""
|
||||
def _read_kind(self, register, instruction_form):
|
||||
"""Check if instruction form reads from given register. Returns a ReadKind."""
|
||||
is_read = False
|
||||
for_load = False
|
||||
if instruction_form.semantic_operands is None:
|
||||
return is_read
|
||||
return KernelDG.ReadKind.NOT_A_READ
|
||||
for src in chain(
|
||||
instruction_form.semantic_operands["source"],
|
||||
instruction_form.semantic_operands["src_dst"],
|
||||
@@ -377,10 +414,15 @@ class KernelDG(nx.DiGraph):
|
||||
if isinstance(src, FlagOperand):
|
||||
is_read = self.parser.is_flag_dependend_of(register, src) or is_read
|
||||
if isinstance(src, MemoryOperand):
|
||||
is_memory_read = False
|
||||
if src.base is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.base) or is_read
|
||||
is_memory_read = self.parser.is_reg_dependend_of(register, src.base)
|
||||
if src.index is not None and isinstance(src.index, RegisterOperand):
|
||||
is_read = self.parser.is_reg_dependend_of(register, src.index) or is_read
|
||||
is_memory_read = (
|
||||
self.parser.is_reg_dependend_of(register, src.index) or is_memory_read
|
||||
)
|
||||
for_load = is_memory_read
|
||||
is_read = is_read or is_memory_read
|
||||
# Check also if read in destination memory address
|
||||
for dst in chain(
|
||||
instruction_form.semantic_operands["destination"],
|
||||
@@ -391,7 +433,16 @@ class KernelDG(nx.DiGraph):
|
||||
is_read = self.parser.is_reg_dependend_of(register, dst.base) or is_read
|
||||
if dst.index is not None:
|
||||
is_read = self.parser.is_reg_dependend_of(register, dst.index) or is_read
|
||||
return is_read
|
||||
if is_read:
|
||||
if for_load:
|
||||
return KernelDG.ReadKind.READ_FOR_LOAD
|
||||
else:
|
||||
return KernelDG.ReadKind.OTHER_READ
|
||||
else:
|
||||
return KernelDG.ReadKind.NOT_A_READ
|
||||
|
||||
def is_read(self, register, instruction_form):
|
||||
return self._read_kind(register, instruction_form) != KernelDG.ReadKind.NOT_A_READ
|
||||
|
||||
def is_memload(self, mem, instruction_form, register_changes={}):
|
||||
"""Check if instruction form loads from given location, assuming register_changes"""
|
||||
@@ -520,16 +571,13 @@ class KernelDG(nx.DiGraph):
|
||||
lcd_line_numbers = {}
|
||||
for dep in lcd:
|
||||
lcd_line_numbers[dep] = [x.line_number for x, lat in lcd[dep]["dependencies"]]
|
||||
# add color scheme
|
||||
graph.graph["node"] = {"colorscheme": "accent8"}
|
||||
graph.graph["edge"] = {"colorscheme": "accent8"}
|
||||
|
||||
# create LCD edges
|
||||
for dep in lcd_line_numbers:
|
||||
min_line_number = min(lcd_line_numbers[dep])
|
||||
max_line_number = max(lcd_line_numbers[dep])
|
||||
graph.add_edge(max_line_number, min_line_number)
|
||||
graph.edges[max_line_number, min_line_number]["latency"] = [
|
||||
graph.add_edge(min_line_number, max_line_number, dir="back")
|
||||
graph.edges[min_line_number, max_line_number]["latency"] = [
|
||||
lat for x, lat in lcd[dep]["dependencies"] if x.line_number == max_line_number
|
||||
]
|
||||
|
||||
@@ -541,59 +589,81 @@ class KernelDG(nx.DiGraph):
|
||||
for n in cp:
|
||||
graph.nodes[n.line_number]["instruction_form"].latency_cp = n.latency_cp
|
||||
|
||||
# color CP and LCD
|
||||
# Make the critical path bold.
|
||||
for n in graph.nodes:
|
||||
if n in cp_line_numbers:
|
||||
# graph.nodes[n]['color'] = 1
|
||||
graph.nodes[n]["style"] = "bold"
|
||||
graph.nodes[n]["penwidth"] = 4
|
||||
for col, dep in enumerate(lcd):
|
||||
if n in lcd_line_numbers[dep]:
|
||||
if "style" not in graph.nodes[n]:
|
||||
graph.nodes[n]["style"] = "filled"
|
||||
else:
|
||||
graph.nodes[n]["style"] += ",filled"
|
||||
graph.nodes[n]["fillcolor"] = 2 + col
|
||||
|
||||
# color edges
|
||||
for e in graph.edges:
|
||||
if (
|
||||
graph.nodes[e[0]]["instruction_form"].line_number in cp_line_numbers
|
||||
and graph.nodes[e[1]]["instruction_form"].line_number in cp_line_numbers
|
||||
and e[0] < e[1]
|
||||
# Make critical path edges bold.
|
||||
for u, v in zip(cp_line_numbers[:-1], cp_line_numbers[1:]):
|
||||
graph.edges[u, v]["style"] = "bold"
|
||||
graph.edges[u, v]["penwidth"] = 3
|
||||
|
||||
# Color the cycles created by loop-carried dependencies, longest first, never recoloring
|
||||
# any node or edge, so that the longest LCD and most long chains that are involved in the
|
||||
# loop are legible.
|
||||
lcd_by_latencies = sorted(
|
||||
(
|
||||
(latency, list(deps))
|
||||
for latency, deps in groupby(lcd, lambda dep: lcd[dep]["latency"])
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
node_colors = {}
|
||||
edge_colors = {}
|
||||
colors_used = 0
|
||||
for i, (latency, deps) in enumerate(lcd_by_latencies):
|
||||
color = None
|
||||
for dep in deps:
|
||||
path = lcd_line_numbers[dep]
|
||||
for n in path:
|
||||
if n not in node_colors:
|
||||
if not color:
|
||||
color = colors_used + 1
|
||||
colors_used += 1
|
||||
node_colors[n] = color
|
||||
for u, v in zip(path, path[1:] + [path[0]]):
|
||||
if (u, v) not in edge_colors:
|
||||
# Don’t introduce a color just for an edge.
|
||||
if not color:
|
||||
color = colors_used
|
||||
edge_colors[u, v] = color
|
||||
max_color = min(11, colors_used)
|
||||
colorscheme = f"spectral{max(3, max_color)}"
|
||||
graph.graph["node"] = {"colorscheme": colorscheme}
|
||||
graph.graph["edge"] = {"colorscheme": colorscheme}
|
||||
for n, color in node_colors.items():
|
||||
if "style" not in graph.nodes[n]:
|
||||
graph.nodes[n]["style"] = "filled"
|
||||
else:
|
||||
graph.nodes[n]["style"] += ",filled"
|
||||
graph.nodes[n]["fillcolor"] = color
|
||||
if (max_color >= 4 and color in (1, max_color)) or (
|
||||
max_color >= 10 and color in (1, 2, max_color - 1, max_color)
|
||||
):
|
||||
bold_edge = True
|
||||
for i in range(e[0] + 1, e[1]):
|
||||
if i in cp_line_numbers:
|
||||
bold_edge = False
|
||||
if bold_edge:
|
||||
graph.edges[e]["style"] = "bold"
|
||||
graph.edges[e]["penwidth"] = 3
|
||||
for dep in lcd_line_numbers:
|
||||
if (
|
||||
graph.nodes[e[0]]["instruction_form"].line_number in lcd_line_numbers[dep]
|
||||
and graph.nodes[e[1]]["instruction_form"].line_number in lcd_line_numbers[dep]
|
||||
):
|
||||
graph.edges[e]["color"] = graph.nodes[e[1]]["fillcolor"]
|
||||
graph.nodes[n]["fontcolor"] = "white"
|
||||
for (u, v), color in edge_colors.items():
|
||||
# The backward edge of the cycle is represented as the corresponding forward
|
||||
# edge with the attribute dir=back.
|
||||
edge = graph.edges[u, v] if (u, v) in graph.edges else graph.edges[v, u]
|
||||
edge["color"] = color
|
||||
|
||||
# rename node from [idx] to [idx mnemonic] and add shape
|
||||
mapping = {}
|
||||
for n in graph.nodes:
|
||||
if int(n) != n:
|
||||
mapping[n] = "{}: LOAD".format(int(n))
|
||||
node = graph.nodes[n]["instruction_form"]
|
||||
if node.mnemonic is not None:
|
||||
mapping[n] = "{}: {}".format(KernelDG.get_real_line_number(n), node.mnemonic)
|
||||
else:
|
||||
label = "label" if node.label is not None else None
|
||||
label = "directive" if node.directive is not None else label
|
||||
label = "comment" if node.comment is not None and label is None else label
|
||||
mapping[n] = "{}: {}".format(n, label)
|
||||
graph.nodes[n]["fontname"] = "italic"
|
||||
graph.nodes[n]["fontsize"] = 11.0
|
||||
else:
|
||||
node = graph.nodes[n]["instruction_form"]
|
||||
if node.mnemonic is not None:
|
||||
mapping[n] = "{}: {}".format(n, node.mnemonic)
|
||||
else:
|
||||
label = "label" if node.label is not None else None
|
||||
label = "directive" if node.directive is not None else label
|
||||
label = "comment" if node.comment is not None and label is None else label
|
||||
mapping[n] = "{}: {}".format(n, label)
|
||||
graph.nodes[n]["fontname"] = "italic"
|
||||
graph.nodes[n]["fontsize"] = 11.0
|
||||
if not KernelDG.is_load_line_number(n):
|
||||
graph.nodes[n]["shape"] = "rectangle"
|
||||
|
||||
nx.relabel.relabel_nodes(graph, mapping, copy=False)
|
||||
|
||||
@@ -103,6 +103,8 @@ class TestParserX86Intel(unittest.TestCase):
|
||||
instr12 = "\tvfmadd213sd xmm0, xmm1, QWORD PTR __real@bfc5555555555555"
|
||||
instr13 = "\tjmp\t$LN18@operator"
|
||||
instr14 = "vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]"
|
||||
instr15 = "vextractf128 xmm1, ymm2, 0x2"
|
||||
instr16 = "vmovupd xmm0, [rax+123R]"
|
||||
|
||||
parsed_1 = self.parser.parse_instruction(instr1)
|
||||
parsed_2 = self.parser.parse_instruction(instr2)
|
||||
@@ -118,6 +120,8 @@ class TestParserX86Intel(unittest.TestCase):
|
||||
parsed_12 = self.parser.parse_instruction(instr12)
|
||||
parsed_13 = self.parser.parse_instruction(instr13)
|
||||
parsed_14 = self.parser.parse_instruction(instr14)
|
||||
parsed_15 = self.parser.parse_instruction(instr15)
|
||||
parsed_16 = self.parser.parse_instruction(instr16)
|
||||
|
||||
self.assertEqual(parsed_1.mnemonic, "sub")
|
||||
self.assertEqual(parsed_1.operands[0], RegisterOperand(name="RSP"))
|
||||
@@ -221,6 +225,18 @@ class TestParserX86Intel(unittest.TestCase):
|
||||
),
|
||||
)
|
||||
|
||||
self.assertEqual(parsed_15.mnemonic, "vextractf128")
|
||||
self.assertEqual(parsed_15.operands[0], RegisterOperand(name="XMM1"))
|
||||
self.assertEqual(parsed_15.operands[1], RegisterOperand(name="YMM2"))
|
||||
self.assertEqual(parsed_15.operands[2], ImmediateOperand(value=2))
|
||||
|
||||
self.assertEqual(parsed_16.mnemonic, "vmovupd")
|
||||
self.assertEqual(parsed_16.operands[0], RegisterOperand(name="XMM0"))
|
||||
self.assertEqual(
|
||||
parsed_16.operands[1],
|
||||
MemoryOperand(base=RegisterOperand(name="RAX"), offset=ImmediateOperand(value=291)),
|
||||
)
|
||||
|
||||
def test_parse_line(self):
|
||||
line_comment = "; -- Begin main"
|
||||
line_instruction = "\tret\t0"
|
||||
|
||||
@@ -89,6 +89,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.machine_model_csx = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "csx.yml")
|
||||
)
|
||||
cls.machine_model_skx = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "skx.yml")
|
||||
)
|
||||
cls.machine_model_tx2 = MachineModel(
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")
|
||||
)
|
||||
@@ -107,6 +110,11 @@ class TestSemanticTools(unittest.TestCase):
|
||||
cls.machine_model_csx,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
|
||||
)
|
||||
cls.semantics_skx_intel = ArchSemantics(
|
||||
cls.parser_x86_intel,
|
||||
cls.machine_model_skx,
|
||||
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
|
||||
)
|
||||
cls.semantics_aarch64 = ISASemantics(cls.parser_AArch64)
|
||||
cls.semantics_tx2 = ArchSemantics(
|
||||
cls.parser_AArch64,
|
||||
@@ -136,10 +144,10 @@ class TestSemanticTools(unittest.TestCase):
|
||||
for i in range(len(cls.kernel_x86_intel)):
|
||||
cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel[i])
|
||||
cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel[i])
|
||||
cls.semantics_csx_intel.normalize_instruction_forms(cls.kernel_x86_intel_memdep)
|
||||
cls.semantics_skx_intel.normalize_instruction_forms(cls.kernel_x86_intel_memdep)
|
||||
for i in range(len(cls.kernel_x86_intel_memdep)):
|
||||
cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel_memdep[i])
|
||||
cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel_memdep[i])
|
||||
cls.semantics_skx_intel.assign_src_dst(cls.kernel_x86_intel_memdep[i])
|
||||
cls.semantics_skx_intel.assign_tp_lt(cls.kernel_x86_intel_memdep[i])
|
||||
cls.semantics_tx2.normalize_instruction_forms(cls.kernel_AArch64)
|
||||
for i in range(len(cls.kernel_AArch64)):
|
||||
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
|
||||
@@ -458,7 +466,7 @@ class TestSemanticTools(unittest.TestCase):
|
||||
# / /
|
||||
# 4 /
|
||||
# /
|
||||
# 5.1
|
||||
# 4.875
|
||||
#
|
||||
dg = KernelDG(
|
||||
self.kernel_x86_intel,
|
||||
@@ -473,8 +481,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=4)), 5)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5))), 1)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5)), 6)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5.1))), 1)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5.1)), 5)
|
||||
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=4.875))), 1)
|
||||
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=4.875)), 5)
|
||||
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=6)), [])
|
||||
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=7)), [])
|
||||
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=8)), [])
|
||||
@@ -502,12 +510,15 @@ class TestSemanticTools(unittest.TestCase):
|
||||
dg = KernelDG(
|
||||
self.kernel_x86_intel_memdep,
|
||||
self.parser_x86_intel,
|
||||
self.machine_model_csx,
|
||||
self.semantics_csx_intel,
|
||||
self.machine_model_skx,
|
||||
self.semantics_skx_intel,
|
||||
)
|
||||
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=18)), {18.875})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=18.875)), {19})
|
||||
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=19)), set())
|
||||
with self.assertRaises(ValueError):
|
||||
dg.get_dependent_instruction_forms()
|
||||
# test dot creation
|
||||
@@ -849,6 +860,9 @@ class TestSemanticTools(unittest.TestCase):
|
||||
instr_form_r_ymm = self.parser_x86_intel.parse_line("vmovapd ymm0, ymm1")
|
||||
self.semantics_csx_intel.normalize_instruction_form(instr_form_r_ymm)
|
||||
self.semantics_csx_intel.assign_src_dst(instr_form_r_ymm)
|
||||
instr_form_rw_sar = self.parser_x86_intel.parse_line("sar rcx, 43")
|
||||
self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_sar)
|
||||
self.semantics_csx_intel.assign_src_dst(instr_form_rw_sar)
|
||||
self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c))
|
||||
self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c))
|
||||
self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c))
|
||||
@@ -860,6 +874,8 @@ class TestSemanticTools(unittest.TestCase):
|
||||
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1))
|
||||
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
|
||||
self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
|
||||
self.assertTrue(dag.is_read(reg_rcx, instr_form_rw_sar))
|
||||
self.assertTrue(dag.is_written(reg_rcx, instr_form_rw_sar))
|
||||
|
||||
def test_is_read_is_written_AArch64(self):
|
||||
# independent form HW model
|
||||
|
||||
Reference in New Issue
Block a user