Compare commits

...

38 Commits

Author SHA1 Message Date
JanLJL
6abea6249a version bump 2025-09-08 16:36:20 +02:00
JanLJL
187473b72c fixed bugs in x86intel parser (ZMM and masking support) 2025-09-08 16:35:36 +02:00
JanLJL
45847e69ff formatting for black 2025-08-16 14:13:29 +02:00
JanLJL
94cb3de6a1 fix bug to support 0x.. and ..R hex values for intel syntax 2025-08-16 14:08:43 +02:00
JanLJL
63cb61b423 add pseudo-ops for vcmpps/vcmppd 2025-08-14 13:34:45 +02:00
JanLJL
b68ce9afc1 new instructions 2025-08-13 14:43:17 +02:00
JanLJL
c274a25e1b updated retired uops per cy 2025-08-13 14:42:45 +02:00
JanLJL
714319e613 new instructions 2025-08-13 14:42:30 +02:00
JanLJL
590f915f85 add fallback check w/ and w/o VEX prefix to AT&T to match intel syntax 2025-08-13 14:39:15 +02:00
Jan
b4978c724a Merge pull request #117 from pleroy/Load2
Properly track the dependencies of the LOAD phase of instructions
2025-08-12 16:14:34 +02:00
pleroy
88d3f1a7a0 Fix a Flake8 diagnostic. 2025-07-29 18:55:51 +02:00
pleroy
5635d2d8df Skip non-integer line numbers in frontend 2025-03-31 22:44:57 +02:00
pleroy
faa63ce95e Support non-integer line numbers in frontend 2025-03-31 22:35:09 +02:00
pleroy
4578eb00fa Flake8 2025-03-31 21:37:11 +02:00
pleroy
3456f6e24a After egg’s review. 2025-03-31 20:48:52 +02:00
pleroy
df0351d087 Readying. 2025-03-31 20:48:39 +02:00
pleroy
969500d79f Merge test 2025-03-31 20:47:46 +02:00
pleroy
685ed1e1e1 Graphing. 2025-03-31 20:45:20 +02:00
pleroy
af9c10f308 Cleanup. 2025-03-31 20:45:01 +02:00
pleroy
4255c11010 The tests are passing. 2025-03-31 20:44:36 +02:00
pleroy
56fbe1d172 Some more stuff. 2025-03-31 20:44:19 +02:00
pleroy
aeda9b1d33 Merge imports 2025-03-31 20:43:52 +02:00
Jan
33fd0a0352 Merge pull request #116 from eggrobin/graph-colouring
Improvements to graph layout and colouring
2025-03-31 11:38:23 +02:00
Jan
a17e79a3a9 Merge pull request #115 from pleroy/Comisd
Support for arithmetic shift and comparison instructions for x86
2025-03-31 11:18:09 +02:00
Robin Leroy
de0b1fde64 white on blue 2025-03-27 23:12:25 +01:00
Robin Leroy
d82bc8052b Less clever and more useful colouring 2025-03-27 23:12:19 +01:00
Robin Leroy
b854562a82 Improve dependency graph colouring 2025-03-27 23:11:57 +01:00
Robin Leroy
8c31c6ff77 Mark backward edges as backward so the graph is ordered like the code 2025-03-27 23:11:46 +01:00
Robin Leroy
e096cf4704 Don’t spam filled until dot breaks 2025-03-27 23:11:35 +01:00
Robin Leroy
7d900fde38 Don’t run out of colours 2025-03-27 23:11:21 +01:00
pleroy
28df996617 Moar colors. 2025-03-27 23:11:13 +01:00
pleroy
1eb82a6f0a Fix the x86 ISA description to indicate that the register of SAR and SAL is read/write. 2025-03-27 22:47:32 +01:00
Robin Leroy
b7e4acc905 ucomisd is like comisd 2025-03-27 22:46:48 +01:00
pleroy
b989145a36 Define comisd sources. 2025-03-27 22:46:38 +01:00
Jan
9c97d32512 Merge pull request #114 from eggrobin/setmeow-jmeow
Add the x86 SET* and J* instructions
2025-03-26 09:05:23 +01:00
Robin Leroy
9e6373a013 Configure the dependencies of the jmeow instructions on flags 2025-03-20 22:30:00 +01:00
Robin Leroy
e99c3d935d Add the setmeow instructions 2025-03-20 22:29:51 +01:00
JanLJL
edb32b38ca use pypi version of kerncraft for GH Actions 2025-03-19 14:36:49 +01:00
14 changed files with 640 additions and 131 deletions

View File

@@ -23,8 +23,7 @@ jobs:
python -m pip install bs4
sudo apt-get -y install graphviz libgraphviz-dev pkg-config
python -m pip install pygraphviz
#python -m pip install "kerncraft>=0.8.16"
python -m pip install git+https://github.com/RRZE-HPC/kerncraft.git@7caff4e2ecdbef595013041ba0131e37ed33c72c
python -m pip install "kerncraft>=0.8.17"
python -m pip install -e .
- name: Test
run: |

View File

@@ -1,7 +1,7 @@
"""Open Source Architecture Code Analyzer"""
name = "osaca"
__version__ = "0.7.0"
__version__ = "0.7.1"
# To trigger travis deployment to pypi, do the following:
# 1. Increment __version___

View File

@@ -3,7 +3,8 @@ micro_architecture: Ice Lake Server
arch_code: ICX
isa: x86
ROB_size: 352 # from wikichip
retired_uOps_per_cycle: 10 # from wikichip
dispatched_uOps_per_cycle: 5
retired_uOps_per_cycle: 5
scheduler_size: 97 # actually MORE than 97, number unknown
hidden_loads: false
load_latency: {gpr: 5.0, mm: 5.0, xmm: 5.0, ymm: 5.0, zmm: 5.0}

View File

@@ -530,6 +530,190 @@ instruction_forms:
name: "rax"
source: false
destination: true
- name: [ja, jbe, jna, jnbe]
operands:
- class: identifier
hidden_operands:
- class: "flag"
name: "CF"
source: true
destination: false
- class: "flag"
name: "ZF"
source: true
destination: false
- name: [jae, jb, jc, jnae, jnb, jnc]
operands:
- class: identifier
hidden_operands:
- class: "flag"
name: "CF"
source: true
destination: false
- name: [je, jne, jnz, jz]
operands:
- class: identifier
hidden_operands:
- class: "flag"
name: "ZF"
source: true
destination: false
- name: [jg, jle, jng, jnle]
operands:
- class: identifier
hidden_operands:
- class: "flag"
name: "ZF"
source: true
destination: false
- class: "flag"
name: "SF"
source: true
destination: false
- class: "flag"
name: "OF"
source: true
destination: false
- name: [jge, jl, jnge, jnl]
operands:
- class: identifier
hidden_operands:
- class: "flag"
name: "SF"
source: true
destination: false
- class: "flag"
name: "OF"
source: true
destination: false
- name: [jno, jo]
operands:
- class: identifier
hidden_operands:
- class: "flag"
name: "OF"
source: true
destination: false
- name: [jnp, jp, jpe, jpo]
operands:
- class: identifier
hidden_operands:
- class: "flag"
name: "PF"
source: true
destination: false
- name: [jns, js]
operands:
- class: identifier
hidden_operands:
- class: "flag"
name: "SF"
source: true
destination: false
- name: [seta, setbe, setna, setnbe]
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "flag"
name: "CF"
source: true
destination: false
- class: "flag"
name: "ZF"
source: true
destination: false
- name: [setae, setb, setc, setnae, setnb, setnc]
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "flag"
name: "CF"
source: true
destination: false
- name: [sete, setne, setnz, setz]
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "flag"
name: "ZF"
source: true
destination: false
- name: [setg, setle, setng, setnle]
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "flag"
name: "ZF"
source: true
destination: false
- class: "flag"
name: "SF"
source: true
destination: false
- class: "flag"
name: "OF"
source: true
destination: false
- name: [setge, setl, setnge, setnl]
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "flag"
name: "SF"
source: true
destination: false
- class: "flag"
name: "OF"
source: true
destination: false
- name: [setno, seto]
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "flag"
name: "OF"
source: true
destination: false
- name: [setnp, setp, setpe, setpo]
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "flag"
name: "PF"
source: true
destination: false
- name: [setns, sets]
operands:
- class: "register"
name: "gpr"
source: false
destination: true
hidden_operands:
- class: "flag"
name: "SF"
source: true
destination: false
- name: cmova
operands:
- class: "register"
@@ -2437,6 +2621,79 @@ instruction_forms:
name: "ZF"
source: true
destination: true
- name: ["comisd", "ucomisd"]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "register"
name: "xmm"
source: true
destination: false
hidden_operands:
- class: "flag"
name: "CF"
source: false
destination: true
- class: "flag"
name: "OF"
source: false
destination: true
- class: "flag"
name: "SF"
source: false
destination: true
- class: "flag"
name: "ZF"
source: false
destination: true
- class: "flag"
name: "AF"
source: false
destination: true
- class: "flag"
name: "PF"
source: false
destination: true
- name: ["comisd", "ucomisd"]
operands:
- class: "register"
name: "xmm"
source: true
destination: false
- class: "memory"
base: "*"
offset: "*"
index: "*"
scale: "*"
source: true
destination: false
hidden_operands:
- class: "flag"
name: "CF"
source: false
destination: true
- class: "flag"
name: "OF"
source: false
destination: true
- class: "flag"
name: "SF"
source: false
destination: true
- class: "flag"
name: "ZF"
source: false
destination: true
- class: "flag"
name: "AF"
source: false
destination: true
- class: "flag"
name: "PF"
source: false
destination: true
- name: dec
operands:
- class: "register"
@@ -3429,7 +3686,7 @@ instruction_forms:
- class: "register"
name: "gpr"
source: true
destination: true
destination: true
- name: sbb
operands:
- class: "register"
@@ -4158,7 +4415,7 @@ instruction_forms:
name: "xmm"
source: true
destination: true
- name: [shl, shr, shlq, shrq]
- name: [sal, sar, salq, sarq, shl, shr, shlq, shrq]
operands:
- class: "immediate"
imd: "int"

View File

@@ -5004,6 +5004,54 @@ instruction_forms:
port_pressure: [[1, '01']] # uops.info import
throughput: 0.5 # uops.info import
uops: 1 # uops.info import
- name: [VCMPEQPD, VCMPLTPD, VCMPLEPD, VCMPUNORDPD, VCMPNEQPD, VCMPNLTPD, VCMPNLEPD, VCMPORDPD, VCMPEQ_UQPD, VCMPNGEPD, VCMPNGTPD, VCMPFALSEPD, VCMPNEQ_OQPD, VCMPGEPD, VCMPGTPD, VCMPTRUEPD, VCMPEQ_OSPD, VCMPLT_OQPD, VCMPLE_OQPD, VCMPUNORD_SPD, VCMPNEQ_USPD, VCMPNLT_UQPD, VCMPNLE_UQPD, VCMPORD_SPD, VCMPEQ_USPD, VCMPNGE_UQPD, VCMPNGT_UQPD, VCMPFALSE_OSPD, VCMPNEQ_OSPD, VCMPGE_OQPD, VCMPGT_OQPD, VCMPTRUE_USPD] # VCMPPD pseudo-op
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
latency: 4
port_pressure: [[1, '01']]
throughput: 0.5
uops: 1
- name: [VCMPEQPD, VCMPLTPD, VCMPLEPD, VCMPUNORDPD, VCMPNEQPD, VCMPNLTPD, VCMPNLEPD, VCMPORDPD, VCMPEQ_UQPD, VCMPNGEPD, VCMPNGTPD, VCMPFALSEPD, VCMPNEQ_OQPD, VCMPGEPD, VCMPGTPD, VCMPTRUEPD, VCMPEQ_OSPD, VCMPLT_OQPD, VCMPLE_OQPD, VCMPUNORD_SPD, VCMPNEQ_USPD, VCMPNLT_UQPD, VCMPNLE_UQPD, VCMPORD_SPD, VCMPEQ_USPD, VCMPNGE_UQPD, VCMPNGT_UQPD, VCMPFALSE_OSPD, VCMPNEQ_OSPD, VCMPGE_OQPD, VCMPGT_OQPD, VCMPTRUE_USPD] # VCMPPD pseudo-op
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
latency: 4
port_pressure: [[1, '01']]
throughput: 0.5
uops: 1
- name: [VCMPEQPS, VCMPLTPS, VCMPLEPS, VCMPUNORDPS, VCMPNEQPS, VCMPNLTPS, VCMPNLEPS, VCMPORDPS, VCMPEQ_UQPS, VCMPNGEPS, VCMPNGTPS, VCMPFALSEPS, VCMPNEQ_OQPS, VCMPGEPS, VCMPGTPS, VCMPTRUEPS, VCMPEQ_OSPS, VCMPLT_OQPS, VCMPLE_OQPS, VCMPUNORD_SPS, VCMPNEQ_USPS, VCMPNLT_UQPS, VCMPNLE_UQPS, VCMPORD_SPS, VCMPEQ_USPS, VCMPNGE_UQPS, VCMPNGT_UQPS, VCMPFALSE_OSPS, VCMPNEQ_OSPS, VCMPGE_OQPS, VCMPGT_OQPS, VCMPTRUE_USPS] # VCMPPS pseudo-op
operands:
- class: register
name: xmm
- class: register
name: xmm
- class: register
name: xmm
latency: 4
port_pressure: [[1, '01']]
throughput: 0.5
uops: 1
- name: [VCMPEQPS, VCMPLTPS, VCMPLEPS, VCMPUNORDPS, VCMPNEQPS, VCMPNLTPS, VCMPNLEPS, VCMPORDPS, VCMPEQ_UQPS, VCMPNGEPS, VCMPNGTPS, VCMPFALSEPS, VCMPNEQ_OQPS, VCMPGEPS, VCMPGTPS, VCMPTRUEPS, VCMPEQ_OSPS, VCMPLT_OQPS, VCMPLE_OQPS, VCMPUNORD_SPS, VCMPNEQ_USPS, VCMPNLT_UQPS, VCMPNLE_UQPS, VCMPORD_SPS, VCMPEQ_USPS, VCMPNGE_UQPS, VCMPNGT_UQPS, VCMPFALSE_OSPS, VCMPNEQ_OSPS, VCMPGE_OQPS, VCMPGT_OQPS, VCMPTRUE_USPS] # VCMPPS pseudo-op
operands:
- class: register
name: ymm
- class: register
name: ymm
- class: register
name: ymm
latency: 4
port_pressure: [[1, '01']]
throughput: 0.5
uops: 1
- name: VCMPSD # uops.info import
operands: # uops.info import
- class: immediate # uops.info import

View File

@@ -3,7 +3,8 @@ micro_architecture: Sapphire Rapids
arch_code: SPR
isa: x86
ROB_size: ~
retired_uOps_per_cycle: ~
dispatched_uOps_per_cycle: 6
retired_uOps_per_cycle: 8
scheduler_size: ~
hidden_loads: false
load_latency: {gpr: 5.0, mm: 5.0, xmm: 5.0, ymm: 5.0, zmm: 5.0}
@@ -2788,7 +2789,17 @@ instruction_forms:
port_pressure: [[1, '15']] # ibench
throughput: 0.5 # ibench
uops: 1 # ibench
- name: vdivpd # ibench
- name: divpd # ibench
operands: # ibench
- class: register # ibench
name: xmm # ibench
- class: register # ibench
name: xmm # ibench
latency: 14 # ibench
port_pressure: [[1, '0'], [4, ['0DV']]] # ibench
throughput: 4.0 # ibench
uops: 4 # ibench
- name: vdivpd # ibench
operands: # ibench
- class: register # ibench
name: xmm # ibench
@@ -3840,6 +3851,16 @@ instruction_forms:
port_pressure: [[1, '01']] # ibench
throughput: 0.5 # ibench
uops: 1 # ibench
- name: [paddd, paddq] # ibench
operands: # ibench
- class: register # ibench
name: xmm # ibench
- class: register # ibench
name: xmm # ibench
latency: 1 # ibench
port_pressure: [[1, '015']] # ibench
throughput: 0.3333333333333333 # ibench
uops: 1 # ibench
- name: [vpaddd, vpaddq] # ibench
operands: # ibench
- class: register # ibench
@@ -4313,6 +4334,16 @@ instruction_forms:
port_pressure: [[1, '05']] # ibench
throughput: 0.5 # ibench
uops: 1 # ibench
- name: subsd # ibench
operands: # ibench
- class: register # ibench
name: xmm # ibench
- class: register # ibench
name: xmm # ibench
latency: 2 # ibench
port_pressure: [[1, '15']] # ibench
throughput: 0.5 # ibench
uops: 1 # ibench
- name: vsubsd # ibench
operands: # ibench
- class: register # ibench
@@ -4562,6 +4593,16 @@ instruction_forms:
port_pressure: [[1, '01'], [1, '5']]
throughput: 1.0
uops: 3
- name: [cvtdq2pd, vcvtdq2pd]
operands:
- class: register
name: xmm
- class: register
name: xmm
latency: 5
port_pressure: [[1, '01'], [1, '5']]
throughput: 1.0
uops: 2
- name: vcvtdq2pd
operands:
- class: register
@@ -5866,6 +5907,18 @@ instruction_forms:
port_pressure: [[1, '05']]
throughput: 0.5
uops: 1
- name: [vpshufd, pshufd] # uops.info
operands:
- class: immediate
imd: int
- class: register
name: xmm
- class: register
name: xmm
latency: 1
port_pressure: [[1, '15']]
throughput: 1.0
uops: 1
- name: vshuff64x2
operands:
- class: immediate
@@ -5992,7 +6045,7 @@ instruction_forms:
port_pressure: [[1, '5']] # uops.info
throughput: 1.0 # ibench
uops: 1
- name: [cltq, cdq, cdqe]
- name: [cltq, cdq, cdqe]
operands: []
latency: 1 # uops.info
port_pressure: [[1, '06']] # uops.info

View File

@@ -35,9 +35,9 @@ port_model_scheme: |
| BR | | BR | | ALU | | ALU | | ALU | | ALU | | ALU | | DV | | ALU | | DV | |SIMD/FP| |FPDV| |SIMD/FP| |SIMD/FP| |FPDV| |SIMD/FP| | LD | | LD | | LD | | ST | | ST |
+----+ +----+ +------+ +------+ +------+ +------+ +------+ +----+ +------+ +----+ | ALU | +----+ | ALU | | ALU | +----+ | ALU | +-----+ +-----+ +-----+ +-----+ +-----+
silly silly +------+ +------+ +-------+ +-------+ +-------+ +-------+ +-----+ +-----+
| MUL | | MUL | +-------+ +-------+ +-------+ +-------+ | AGU | | AGU |
+------+ +------+ |SIMD/FP| |SIMD/FP| |SIMD/FP| |SIMD/FP| +-----+ +-----+
+------+ +------+ | MISC | | MISC | | MISC | | MISC |
| MUL | | MUL | +-------+ +-------+ +-------+ +-------+ | ST | | ST |
+------+ +------+ |SIMD/FP| |SIMD/FP| |SIMD/FP| |SIMD/FP| | AGU | | AGU |
+------+ +------+ | MISC | | MISC | | MISC | | MISC | +-----+ +-----+
| CRC | | CRC | +-------+ +-------+ +-------+ +-------+
+------+ +------+ +-------+ +-------+ +-------+ +-------+
+------+ +------+ | SIMD | | SIMD | | SIMD | | SIMD |
@@ -119,6 +119,17 @@ instruction_forms:
throughput: 0.25
latency: 1.0 # 1*p2367
port_pressure: [[1, '2367']]
- name: addvl
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.5
latency: 2.0 # 1*p67
port_pressure: [[1, '67']]
- name: adds
operands:
- class: register
@@ -259,13 +270,13 @@ instruction_forms:
throughput: 0.16666666
latency: 1.0 # 1*p234567
port_pressure: [[1, '234567']]
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq]
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq, bmi]
operands:
- class: identifier
throughput: 0.5
latency: 0.0
port_pressure: [[1, '01']]
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq]
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq, bmi]
operands:
- class: immediate
imd: int
@@ -1410,7 +1421,7 @@ instruction_forms:
throughput: 0.25
latency: 1.0 # 1*p2367
port_pressure: [[1, '2367']]
- name: [incw, incd, inch]
- name: [incw, incd, inch, incb]
operands:
- class: register
prefix: z
@@ -1420,20 +1431,20 @@ instruction_forms:
throughput: 0.25
latency: 2.0 # 1*p8,9,10,11
port_pressure: [[1, ['8', '9', '10', '11']]]
- name: [incw, incd, inch]
- name: [incw, incd, inch, incb]
operands:
- class: register
prefix: x
- class: identifier
throughput: 0.5
latency: 2.0 # 1*p67
latency: 1.0 # 1*p67
port_pressure: [[1, '67']]
- name: [incw, incd, inch]
- name: [incw, incd, inch, incb]
operands:
- class: register
prefix: x
throughput: 0.5
latency: 2.0 # 1*p67
latency: 1.0 # 1*p67
port_pressure: [[1, '67']]
- name: [madd, msub] # NOTE: if the dependency is via the addend (fourth operand), the latency is only 1cy !!!
operands:
@@ -2939,6 +2950,15 @@ instruction_forms:
throughput: 1.0
latency: 1.0 # 1*p3
port_pressure: [[1, '3']]
- name: [fcmp, fcmpe]
operands:
- class: register
prefix: '*'
- class: immediate
imd: '*'
throughput: 1.0
latency: 1.0 # 1*p3
port_pressure: [[1, '3']]
- name: [fccmp, fccmpe] # LT assumed from fcmp
operands:
- class: register

View File

@@ -80,6 +80,8 @@ class Frontend(object):
s += lineno_filler + self._get_port_number_line(port_len) + "\n"
s += separator + "\n"
for instruction_form in kernel:
if KernelDG.is_load_line_number(instruction_form.line_number):
continue
line = "{:4d} {} {} {}".format(
instruction_form.line_number,
self._get_port_pressure(
@@ -112,6 +114,8 @@ class Frontend(object):
"""
s = "\n\nLatency Analysis Report\n-----------------------\n"
for instruction_form in cp_kernel:
if KernelDG.is_load_line_number(instruction_form.line_number):
continue
s += (
"{:4d} {} {:4.1f} {}{}{} {}".format(
instruction_form.line_number,
@@ -147,8 +151,11 @@ class Frontend(object):
)
# TODO find a way to overcome padding for different tab-lengths
for dep in sorted(dep_dict.keys()):
s += "{:4d} {} {:4.1f} {} {:36}{} {}\n".format(
int(dep.split("-")[0]),
dep0 = float(dep.split("-")[0])
if KernelDG.is_load_line_number(dep0):
continue
s += "{:4.0f} {} {:4.1f} {} {:36}{} {}\n".format(
dep0,
separator,
dep_dict[dep]["latency"],
separator,
@@ -356,6 +363,8 @@ class Frontend(object):
if show_cmnts is False and self._is_comment(instruction_form):
continue
line_number = instruction_form.line_number
if KernelDG.is_load_line_number(line_number):
continue
used_ports = [list(uops[1]) for uops in instruction_form.port_uops]
used_ports = list(set([p for uops_ports in used_ports for p in uops_ports]))
s += "{:4d} {}{} {} {}\n".format(

View File

@@ -75,10 +75,19 @@ class ParserX86ATT(ParserX86):
if not model:
# Check for instruction without GAS suffix.
if mnemonic[-1] in self.GAS_SUFFIXES:
mnemonic = mnemonic[:-1]
model = arch_model.get_instruction(mnemonic, instruction_form.operands)
if model:
instruction_form.mnemonic = mnemonic
nongas_mnemonic = mnemonic[:-1]
if arch_model.get_instruction(nongas_mnemonic, instruction_form.operands):
mnemonic = nongas_mnemonic
# Check for non-VEX version and vice-versa
elif mnemonic[0] == "v":
unvexed_mnemonic = mnemonic[1:]
if arch_model.get_instruction(unvexed_mnemonic, len(instruction_form.operands)):
mnemonic = unvexed_mnemonic
else:
vexed_mnemonic = "v" + mnemonic
if arch_model.get_instruction(vexed_mnemonic, len(instruction_form.operands)):
mnemonic = vexed_mnemonic
instruction_form.mnemonic = mnemonic
def construct_parser(self):
"""Create parser for x86 AT&T ISA."""
@@ -365,10 +374,8 @@ class ParserX86ATT(ParserX86):
return RegisterOperand(
prefix=operand["prefix"].lower() if "prefix" in operand else None,
name=operand["name"],
shape=operand["shape"].lower() if "shape" in operand else None,
lanes=operand["lanes"] if "lanes" in operand else None,
index=operand["index"] if "index" in operand else None,
predication=operand["predication"].lower() if "predication" in operand else None,
mask=RegisterOperand(name=operand["mask"]) if "mask" in operand else None,
)
def process_directive(self, directive):

View File

@@ -160,12 +160,18 @@ class ParserX86Intel(ParserX86):
binary_number = pp.Combine(pp.Word("01") + pp.CaselessLiteral("B"))
octal_number = pp.Combine(pp.Word("01234567") + pp.CaselessLiteral("O"))
decimal_number = pp.Combine(pp.Optional(pp.Literal("-")) + pp.Word(pp.nums))
hex_number = pp.Combine(pp.Word(pp.hexnums) + pp.CaselessLiteral("H"))
hex_number_suffix = pp.Combine(
pp.Word(pp.hexnums) + (pp.CaselessLiteral("H") ^ pp.CaselessLiteral("R"))
)
hex_number_0x = pp.Combine(
pp.Optional(pp.Literal("-")) + pp.Literal("0x") + pp.Word(pp.hexnums)
)
hex_number = hex_number_0x ^ hex_number_suffix
float_number = pp.Combine(
pp.Optional(pp.Literal("-")) + pp.Word(pp.nums) + pp.Word(".", pp.nums)
).setResultsName("value")
integer_number = (
binary_number ^ octal_number ^ decimal_number ^ hex_number
hex_number ^ binary_number ^ octal_number ^ decimal_number
).setResultsName("value")
# Comment.
@@ -192,6 +198,7 @@ class ParserX86Intel(ParserX86):
| pp.CaselessKeyword("WORD")
| pp.CaselessKeyword("XMMWORD")
| pp.CaselessKeyword("YMMWORD")
| pp.CaselessKeyword("ZMMWORD")
).setResultsName("data_type")
# Identifier. Note that $ is not mentioned in the ASM386 Assembly Language Reference,
@@ -286,15 +293,14 @@ class ParserX86Intel(ParserX86):
pp.CaselessKeyword("ST")
+ pp.Optional(pp.Literal("(") + pp.Word("01234567") + pp.Literal(")"))
).setResultsName("name")
xmm_register = pp.Combine(pp.CaselessLiteral("XMM") + pp.Word(pp.nums)) | pp.Combine(
pp.CaselessLiteral("XMM1") + pp.Word("012345")
)
simd_register = (
pp.Combine(pp.CaselessLiteral("MM") + pp.Word("01234567"))
| xmm_register
pp.Combine(pp.CaselessLiteral("MM") + pp.Word(pp.nums))
| pp.Combine(pp.CaselessLiteral("XMM") + pp.Word(pp.nums))
| pp.Combine(pp.CaselessLiteral("YMM") + pp.Word(pp.nums))
| pp.Combine(pp.CaselessLiteral("YMM1") + pp.Word("012345"))
).setResultsName("name")
| pp.Combine(pp.CaselessLiteral("ZMM") + pp.Word(pp.nums))
).setResultsName("name") + pp.Optional(
pp.Literal("{") + pp.Word(pp.alphanums).setResultsName("mask") + pp.Literal("}")
)
segment_register = (
pp.CaselessKeyword("CS")
| pp.CaselessKeyword("DS")
@@ -395,7 +401,9 @@ class ParserX86Intel(ParserX86):
+ pp.Optional(pp.Literal("+") + immediate.setResultsName("displacement"))
).setResultsName("offset_expression")
ptr_expression = pp.Group(
data_type + pp.CaselessKeyword("PTR") + address_expression
data_type
+ (pp.CaselessKeyword("PTR") | pp.CaselessKeyword("BCST"))
+ address_expression
).setResultsName("ptr_expression")
short_expression = pp.Group(pp.CaselessKeyword("SHORT") + identifier).setResultsName(
"short_expression"
@@ -659,7 +667,10 @@ class ParserX86Intel(ParserX86):
return directive_new, directive.get("comment")
def process_register(self, operand):
return RegisterOperand(name=operand.name)
return RegisterOperand(
name=operand.name,
mask=RegisterOperand(name=operand.mask) if "mask" in operand else None,
)
def process_register_expression(self, register_expression):
base = register_expression.get("base")
@@ -793,8 +804,10 @@ class ParserX86Intel(ParserX86):
if isinstance(imd.value, str):
if "." in imd.value:
return float(imd.value)
if imd.value.startswith("0x"):
return int(imd.value, 0)
# Now parse depending on the base.
base = {"B": 2, "O": 8, "H": 16}.get(imd.value[-1], 10)
base = {"B": 2, "O": 8, "H": 16, "R": 16}.get(imd.value[-1], 10)
value = 0
negative = imd.value[0] == "-"
positive = imd.value[0] == "+"

View File

@@ -13,7 +13,7 @@ class RegisterOperand(Operand):
lanes=None,
shape=None,
index=None,
mask=False,
mask=None,
zeroing=False,
predication=None,
source=False,

View File

@@ -1,12 +1,14 @@
#!/usr/bin/env python3
import copy
from enum import Enum
import time
from itertools import chain
from itertools import chain, groupby
from multiprocessing import Manager, Process, cpu_count
import networkx as nx
from osaca.semantics import INSTR_FLAGS, ArchSemantics, MachineModel
from osaca.parser.instruction_form import InstructionForm
from osaca.parser.memory import MemoryOperand
from osaca.parser.register import RegisterOperand
from osaca.parser.immediate import ImmediateOperand
@@ -17,6 +19,11 @@ class KernelDG(nx.DiGraph):
# threshold for checking dependency graph sequential or in parallel
INSTRUCTION_THRESHOLD = 50
class ReadKind(Enum):
NOT_A_READ = 0
READ_FOR_LOAD = 1
OTHER_READ = 2
def __init__(
self,
parsed_kernel,
@@ -46,6 +53,23 @@ class KernelDG(nx.DiGraph):
dst_list.extend(tmp_list)
# print('Thread [{}-{}] done'.format(kernel[0]['line_number'], kernel[-1]['line_number']))
@staticmethod
def get_load_line_number(line_number):
# The line number of the load must be less than the line number of the instruction. The
# offset is irrelevant, but it must be a machine number with trailing zeroes to avoid silly
# rounding issues.
return line_number - 0.125
@staticmethod
def is_load_line_number(line_number):
return line_number != int(line_number)
@staticmethod
def get_real_line_number(line_number):
return (
int(line_number + 0.125) if KernelDG.is_load_line_number(line_number) else line_number
)
def create_DG(self, kernel, flag_dependencies=False):
"""
Create directed graph from given kernel
@@ -57,10 +81,10 @@ class KernelDG(nx.DiGraph):
:type flag_dependencies: boolean, optional
:returns: :class:`~nx.DiGraph` -- directed graph object
"""
# 1. go through kernel instruction forms and add them as node attribute
# 2. find edges (to dependend further instruction)
# 3. get LT value and set as edge weight
# Go through kernel instruction forms and add them as nodes of the graph. Create a LOAD
# node for instructions that include a memory reference.
dg = nx.DiGraph()
loads = {}
for i, instruction_form in enumerate(kernel):
dg.add_node(instruction_form.line_number)
dg.nodes[instruction_form.line_number]["instruction_form"] = instruction_form
@@ -70,14 +94,22 @@ class KernelDG(nx.DiGraph):
and INSTR_FLAGS.LD not in instruction_form.flags
):
# add new node
dg.add_node(instruction_form.line_number + 0.1)
dg.nodes[instruction_form.line_number + 0.1]["instruction_form"] = instruction_form
load_line_number = KernelDG.get_load_line_number(instruction_form.line_number)
loads[instruction_form.line_number] = load_line_number
dg.add_node(load_line_number)
dg.nodes[load_line_number]["instruction_form"] = InstructionForm(
mnemonic="_LOAD_", line=instruction_form.line, line_number=load_line_number
)
# and set LD latency as edge weight
dg.add_edge(
instruction_form.line_number + 0.1,
load_line_number,
instruction_form.line_number,
latency=instruction_form.latency - instruction_form.latency_wo_load,
)
# 1. find edges (to dependend further instruction)
# 2. get LT value and set as edge weight
for i, instruction_form in enumerate(kernel):
for dep, dep_flags in self.find_depending(
instruction_form, kernel[i + 1 :], flag_dependencies
):
@@ -91,11 +123,18 @@ class KernelDG(nx.DiGraph):
edge_weight += self.model.get("store_to_load_forward_latency", 0)
if "p_indexed" in dep_flags and self.model is not None:
edge_weight = self.model.get("p_index_latency", 1)
dg.add_edge(
instruction_form.line_number,
dep.line_number,
latency=edge_weight,
)
if "for_load" in dep_flags and self.model is not None and dep.line_number in loads:
dg.add_edge(
instruction_form.line_number,
loads[dep.line_number],
latency=edge_weight,
)
else:
dg.add_edge(
instruction_form.line_number,
dep.line_number,
latency=edge_weight,
)
dg.nodes[dep.line_number]["instruction_form"] = dep
return dg
@@ -204,23 +243,17 @@ class KernelDG(nx.DiGraph):
for lat_sum, involved_lines in loopcarried_deps:
dict_key = "-".join([str(il[0]) for il in involved_lines])
loopcarried_deps_dict[dict_key] = {
"root": self._get_node_by_lineno(involved_lines[0][0]),
"root": self._get_node_by_lineno(dg, involved_lines[0][0]),
"dependencies": [
(self._get_node_by_lineno(ln), lat) for ln, lat in involved_lines
(self._get_node_by_lineno(dg, ln), lat) for ln, lat in involved_lines
],
"latency": lat_sum,
}
return loopcarried_deps_dict
def _get_node_by_lineno(self, lineno, kernel=None, all=False):
"""Return instruction form with line number ``lineno`` from kernel"""
if kernel is None:
kernel = self.kernel
result = [instr for instr in kernel if instr.line_number == lineno]
if not all:
return result[0]
else:
return result
def _get_node_by_lineno(self, dg, lineno):
"""Return instruction form with line number ``lineno`` from dg"""
return dg.nodes[lineno]["instruction_form"]
def get_critical_path(self):
"""Find and return critical path after the creation of a directed graph."""
@@ -229,21 +262,21 @@ class KernelDG(nx.DiGraph):
longest_path = nx.algorithms.dag.dag_longest_path(self.dg, weight="latency")
# TODO verify that we can remove the next two lince due to earlier initialization
for line_number in longest_path:
self._get_node_by_lineno(int(line_number)).latency_cp = 0
self._get_node_by_lineno(self.dg, line_number).latency_cp = 0
# set cp latency to instruction
path_latency = 0.0
for s, d in nx.utils.pairwise(longest_path):
node = self._get_node_by_lineno(int(s))
node = self._get_node_by_lineno(self.dg, s)
node.latency_cp = self.dg.edges[(s, d)]["latency"]
path_latency += node.latency_cp
# add latency for last instruction
node = self._get_node_by_lineno(int(longest_path[-1]))
node = self._get_node_by_lineno(self.dg, longest_path[-1])
node.latency_cp = node.latency
if max_latency_instr.latency > path_latency:
max_latency_instr.latency_cp = float(max_latency_instr.latency)
return [max_latency_instr]
else:
return [x for x in self.kernel if x.line_number in longest_path]
return [self._get_node_by_lineno(self.dg, x) for x in longest_path]
else:
# split to DAG
raise NotImplementedError("Kernel is cyclic.")
@@ -284,15 +317,18 @@ class KernelDG(nx.DiGraph):
# print(" TO", instr_form.line, register_changes)
if isinstance(dst, RegisterOperand):
# read of register
if self.is_read(dst, instr_form):
read_kind = self._read_kind(dst, instr_form)
if read_kind != KernelDG.ReadKind.NOT_A_READ:
dep_flags = []
if (
dst.pre_indexed
or dst.post_indexed
or (isinstance(dst.post_indexed, dict))
):
yield instr_form, ["p_indexed"]
else:
yield instr_form, []
dep_flags = ["p_indexed"]
if read_kind == KernelDG.ReadKind.READ_FOR_LOAD:
dep_flags += ["for_load"]
yield instr_form, dep_flags
# write to register -> abort
if self.is_written(dst, instr_form):
break
@@ -363,11 +399,12 @@ class KernelDG(nx.DiGraph):
return self.dg.successors(line_number)
return iter([])
def is_read(self, register, instruction_form):
"""Check if instruction form reads from given register"""
def _read_kind(self, register, instruction_form):
"""Check if instruction form reads from given register. Returns a ReadKind."""
is_read = False
for_load = False
if instruction_form.semantic_operands is None:
return is_read
return KernelDG.ReadKind.NOT_A_READ
for src in chain(
instruction_form.semantic_operands["source"],
instruction_form.semantic_operands["src_dst"],
@@ -377,10 +414,15 @@ class KernelDG(nx.DiGraph):
if isinstance(src, FlagOperand):
is_read = self.parser.is_flag_dependend_of(register, src) or is_read
if isinstance(src, MemoryOperand):
is_memory_read = False
if src.base is not None:
is_read = self.parser.is_reg_dependend_of(register, src.base) or is_read
is_memory_read = self.parser.is_reg_dependend_of(register, src.base)
if src.index is not None and isinstance(src.index, RegisterOperand):
is_read = self.parser.is_reg_dependend_of(register, src.index) or is_read
is_memory_read = (
self.parser.is_reg_dependend_of(register, src.index) or is_memory_read
)
for_load = is_memory_read
is_read = is_read or is_memory_read
# Check also if read in destination memory address
for dst in chain(
instruction_form.semantic_operands["destination"],
@@ -391,7 +433,16 @@ class KernelDG(nx.DiGraph):
is_read = self.parser.is_reg_dependend_of(register, dst.base) or is_read
if dst.index is not None:
is_read = self.parser.is_reg_dependend_of(register, dst.index) or is_read
return is_read
if is_read:
if for_load:
return KernelDG.ReadKind.READ_FOR_LOAD
else:
return KernelDG.ReadKind.OTHER_READ
else:
return KernelDG.ReadKind.NOT_A_READ
def is_read(self, register, instruction_form):
return self._read_kind(register, instruction_form) != KernelDG.ReadKind.NOT_A_READ
def is_memload(self, mem, instruction_form, register_changes={}):
"""Check if instruction form loads from given location, assuming register_changes"""
@@ -520,16 +571,13 @@ class KernelDG(nx.DiGraph):
lcd_line_numbers = {}
for dep in lcd:
lcd_line_numbers[dep] = [x.line_number for x, lat in lcd[dep]["dependencies"]]
# add color scheme
graph.graph["node"] = {"colorscheme": "accent8"}
graph.graph["edge"] = {"colorscheme": "accent8"}
# create LCD edges
for dep in lcd_line_numbers:
min_line_number = min(lcd_line_numbers[dep])
max_line_number = max(lcd_line_numbers[dep])
graph.add_edge(max_line_number, min_line_number)
graph.edges[max_line_number, min_line_number]["latency"] = [
graph.add_edge(min_line_number, max_line_number, dir="back")
graph.edges[min_line_number, max_line_number]["latency"] = [
lat for x, lat in lcd[dep]["dependencies"] if x.line_number == max_line_number
]
@@ -541,59 +589,81 @@ class KernelDG(nx.DiGraph):
for n in cp:
graph.nodes[n.line_number]["instruction_form"].latency_cp = n.latency_cp
# color CP and LCD
# Make the critical path bold.
for n in graph.nodes:
if n in cp_line_numbers:
# graph.nodes[n]['color'] = 1
graph.nodes[n]["style"] = "bold"
graph.nodes[n]["penwidth"] = 4
for col, dep in enumerate(lcd):
if n in lcd_line_numbers[dep]:
if "style" not in graph.nodes[n]:
graph.nodes[n]["style"] = "filled"
else:
graph.nodes[n]["style"] += ",filled"
graph.nodes[n]["fillcolor"] = 2 + col
# color edges
for e in graph.edges:
if (
graph.nodes[e[0]]["instruction_form"].line_number in cp_line_numbers
and graph.nodes[e[1]]["instruction_form"].line_number in cp_line_numbers
and e[0] < e[1]
# Make critical path edges bold.
for u, v in zip(cp_line_numbers[:-1], cp_line_numbers[1:]):
graph.edges[u, v]["style"] = "bold"
graph.edges[u, v]["penwidth"] = 3
# Color the cycles created by loop-carried dependencies, longest first, never recoloring
# any node or edge, so that the longest LCD and most long chains that are involved in the
# loop are legible.
lcd_by_latencies = sorted(
(
(latency, list(deps))
for latency, deps in groupby(lcd, lambda dep: lcd[dep]["latency"])
),
reverse=True,
)
node_colors = {}
edge_colors = {}
colors_used = 0
for i, (latency, deps) in enumerate(lcd_by_latencies):
color = None
for dep in deps:
path = lcd_line_numbers[dep]
for n in path:
if n not in node_colors:
if not color:
color = colors_used + 1
colors_used += 1
node_colors[n] = color
for u, v in zip(path, path[1:] + [path[0]]):
if (u, v) not in edge_colors:
# Dont introduce a color just for an edge.
if not color:
color = colors_used
edge_colors[u, v] = color
max_color = min(11, colors_used)
colorscheme = f"spectral{max(3, max_color)}"
graph.graph["node"] = {"colorscheme": colorscheme}
graph.graph["edge"] = {"colorscheme": colorscheme}
for n, color in node_colors.items():
if "style" not in graph.nodes[n]:
graph.nodes[n]["style"] = "filled"
else:
graph.nodes[n]["style"] += ",filled"
graph.nodes[n]["fillcolor"] = color
if (max_color >= 4 and color in (1, max_color)) or (
max_color >= 10 and color in (1, 2, max_color - 1, max_color)
):
bold_edge = True
for i in range(e[0] + 1, e[1]):
if i in cp_line_numbers:
bold_edge = False
if bold_edge:
graph.edges[e]["style"] = "bold"
graph.edges[e]["penwidth"] = 3
for dep in lcd_line_numbers:
if (
graph.nodes[e[0]]["instruction_form"].line_number in lcd_line_numbers[dep]
and graph.nodes[e[1]]["instruction_form"].line_number in lcd_line_numbers[dep]
):
graph.edges[e]["color"] = graph.nodes[e[1]]["fillcolor"]
graph.nodes[n]["fontcolor"] = "white"
for (u, v), color in edge_colors.items():
# The backward edge of the cycle is represented as the corresponding forward
# edge with the attribute dir=back.
edge = graph.edges[u, v] if (u, v) in graph.edges else graph.edges[v, u]
edge["color"] = color
# rename node from [idx] to [idx mnemonic] and add shape
mapping = {}
for n in graph.nodes:
if int(n) != n:
mapping[n] = "{}: LOAD".format(int(n))
node = graph.nodes[n]["instruction_form"]
if node.mnemonic is not None:
mapping[n] = "{}: {}".format(KernelDG.get_real_line_number(n), node.mnemonic)
else:
label = "label" if node.label is not None else None
label = "directive" if node.directive is not None else label
label = "comment" if node.comment is not None and label is None else label
mapping[n] = "{}: {}".format(n, label)
graph.nodes[n]["fontname"] = "italic"
graph.nodes[n]["fontsize"] = 11.0
else:
node = graph.nodes[n]["instruction_form"]
if node.mnemonic is not None:
mapping[n] = "{}: {}".format(n, node.mnemonic)
else:
label = "label" if node.label is not None else None
label = "directive" if node.directive is not None else label
label = "comment" if node.comment is not None and label is None else label
mapping[n] = "{}: {}".format(n, label)
graph.nodes[n]["fontname"] = "italic"
graph.nodes[n]["fontsize"] = 11.0
if not KernelDG.is_load_line_number(n):
graph.nodes[n]["shape"] = "rectangle"
nx.relabel.relabel_nodes(graph, mapping, copy=False)

View File

@@ -103,6 +103,8 @@ class TestParserX86Intel(unittest.TestCase):
instr12 = "\tvfmadd213sd xmm0, xmm1, QWORD PTR __real@bfc5555555555555"
instr13 = "\tjmp\t$LN18@operator"
instr14 = "vaddsd xmm0, xmm0, QWORD PTR [rdx+8+rax*8]"
instr15 = "vextractf128 xmm1, ymm2, 0x2"
instr16 = "vmovupd xmm0, [rax+123R]"
parsed_1 = self.parser.parse_instruction(instr1)
parsed_2 = self.parser.parse_instruction(instr2)
@@ -118,6 +120,8 @@ class TestParserX86Intel(unittest.TestCase):
parsed_12 = self.parser.parse_instruction(instr12)
parsed_13 = self.parser.parse_instruction(instr13)
parsed_14 = self.parser.parse_instruction(instr14)
parsed_15 = self.parser.parse_instruction(instr15)
parsed_16 = self.parser.parse_instruction(instr16)
self.assertEqual(parsed_1.mnemonic, "sub")
self.assertEqual(parsed_1.operands[0], RegisterOperand(name="RSP"))
@@ -221,6 +225,18 @@ class TestParserX86Intel(unittest.TestCase):
),
)
self.assertEqual(parsed_15.mnemonic, "vextractf128")
self.assertEqual(parsed_15.operands[0], RegisterOperand(name="XMM1"))
self.assertEqual(parsed_15.operands[1], RegisterOperand(name="YMM2"))
self.assertEqual(parsed_15.operands[2], ImmediateOperand(value=2))
self.assertEqual(parsed_16.mnemonic, "vmovupd")
self.assertEqual(parsed_16.operands[0], RegisterOperand(name="XMM0"))
self.assertEqual(
parsed_16.operands[1],
MemoryOperand(base=RegisterOperand(name="RAX"), offset=ImmediateOperand(value=291)),
)
def test_parse_line(self):
line_comment = "; -- Begin main"
line_instruction = "\tret\t0"

View File

@@ -89,6 +89,9 @@ class TestSemanticTools(unittest.TestCase):
cls.machine_model_csx = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "csx.yml")
)
cls.machine_model_skx = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "skx.yml")
)
cls.machine_model_tx2 = MachineModel(
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")
)
@@ -107,6 +110,11 @@ class TestSemanticTools(unittest.TestCase):
cls.machine_model_csx,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
)
cls.semantics_skx_intel = ArchSemantics(
cls.parser_x86_intel,
cls.machine_model_skx,
path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/x86.yml"),
)
cls.semantics_aarch64 = ISASemantics(cls.parser_AArch64)
cls.semantics_tx2 = ArchSemantics(
cls.parser_AArch64,
@@ -136,10 +144,10 @@ class TestSemanticTools(unittest.TestCase):
for i in range(len(cls.kernel_x86_intel)):
cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel[i])
cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel[i])
cls.semantics_csx_intel.normalize_instruction_forms(cls.kernel_x86_intel_memdep)
cls.semantics_skx_intel.normalize_instruction_forms(cls.kernel_x86_intel_memdep)
for i in range(len(cls.kernel_x86_intel_memdep)):
cls.semantics_csx_intel.assign_src_dst(cls.kernel_x86_intel_memdep[i])
cls.semantics_csx_intel.assign_tp_lt(cls.kernel_x86_intel_memdep[i])
cls.semantics_skx_intel.assign_src_dst(cls.kernel_x86_intel_memdep[i])
cls.semantics_skx_intel.assign_tp_lt(cls.kernel_x86_intel_memdep[i])
cls.semantics_tx2.normalize_instruction_forms(cls.kernel_AArch64)
for i in range(len(cls.kernel_AArch64)):
cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
@@ -458,7 +466,7 @@ class TestSemanticTools(unittest.TestCase):
# / /
# 4 /
# /
# 5.1
# 4.875
#
dg = KernelDG(
self.kernel_x86_intel,
@@ -473,8 +481,8 @@ class TestSemanticTools(unittest.TestCase):
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=4)), 5)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5)), 6)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=5.1))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=5.1)), 5)
self.assertEqual(len(list(dg.get_dependent_instruction_forms(line_number=4.875))), 1)
self.assertEqual(next(dg.get_dependent_instruction_forms(line_number=4.875)), 5)
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=6)), [])
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=7)), [])
self.assertEqual(list(dg.get_dependent_instruction_forms(line_number=8)), [])
@@ -502,12 +510,15 @@ class TestSemanticTools(unittest.TestCase):
dg = KernelDG(
self.kernel_x86_intel_memdep,
self.parser_x86_intel,
self.machine_model_csx,
self.semantics_csx_intel,
self.machine_model_skx,
self.semantics_skx_intel,
)
self.assertTrue(nx.algorithms.dag.is_directed_acyclic_graph(dg.dg))
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=3)), {6, 8})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=5)), {10, 12})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=18)), {18.875})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=18.875)), {19})
self.assertEqual(set(dg.get_dependent_instruction_forms(line_number=19)), set())
with self.assertRaises(ValueError):
dg.get_dependent_instruction_forms()
# test dot creation
@@ -849,6 +860,9 @@ class TestSemanticTools(unittest.TestCase):
instr_form_r_ymm = self.parser_x86_intel.parse_line("vmovapd ymm0, ymm1")
self.semantics_csx_intel.normalize_instruction_form(instr_form_r_ymm)
self.semantics_csx_intel.assign_src_dst(instr_form_r_ymm)
instr_form_rw_sar = self.parser_x86_intel.parse_line("sar rcx, 43")
self.semantics_csx_intel.normalize_instruction_form(instr_form_rw_sar)
self.semantics_csx_intel.assign_src_dst(instr_form_rw_sar)
self.assertTrue(dag.is_read(reg_rcx, instr_form_r_c))
self.assertFalse(dag.is_read(reg_rcx, instr_form_non_r_c))
self.assertFalse(dag.is_read(reg_rcx, instr_form_w_c))
@@ -860,6 +874,8 @@ class TestSemanticTools(unittest.TestCase):
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_1))
self.assertTrue(dag.is_written(reg_ymm1, instr_form_rw_ymm_2))
self.assertFalse(dag.is_written(reg_ymm1, instr_form_r_ymm))
self.assertTrue(dag.is_read(reg_rcx, instr_form_rw_sar))
self.assertTrue(dag.is_written(reg_rcx, instr_form_rw_sar))
def test_is_read_is_written_AArch64(self):
# independent form HW model