new instructions

This commit is contained in:
JanLJL
2025-08-13 14:42:30 +02:00
parent 590f915f85
commit 714319e613
2 changed files with 97 additions and 13 deletions

View File

@@ -3,7 +3,8 @@ micro_architecture: Sapphire Rapids
arch_code: SPR
isa: x86
ROB_size: ~
retired_uOps_per_cycle: ~
dispatched_uOps_per_cycle: 6
retired_uOps_per_cycle: 8
scheduler_size: ~
hidden_loads: false
load_latency: {gpr: 5.0, mm: 5.0, xmm: 5.0, ymm: 5.0, zmm: 5.0}
@@ -2788,7 +2789,17 @@ instruction_forms:
port_pressure: [[1, '15']] # ibench
throughput: 0.5 # ibench
uops: 1 # ibench
- name: vdivpd # ibench
- name: divpd # ibench
operands: # ibench
- class: register # ibench
name: xmm # ibench
- class: register # ibench
name: xmm # ibench
latency: 14 # ibench
port_pressure: [[1, '0'], [4, ['0DV']]] # ibench
throughput: 4.0 # ibench
uops: 4 # ibench
- name: vdivpd # ibench
operands: # ibench
- class: register # ibench
name: xmm # ibench
@@ -3840,6 +3851,16 @@ instruction_forms:
port_pressure: [[1, '01']] # ibench
throughput: 0.5 # ibench
uops: 1 # ibench
- name: [paddd, paddq] # ibench
operands: # ibench
- class: register # ibench
name: xmm # ibench
- class: register # ibench
name: xmm # ibench
latency: 1 # ibench
port_pressure: [[1, '015']] # ibench
throughput: 0.3333333333333333 # ibench
uops: 1 # ibench
- name: [vpaddd, vpaddq] # ibench
operands: # ibench
- class: register # ibench
@@ -4313,6 +4334,16 @@ instruction_forms:
port_pressure: [[1, '05']] # ibench
throughput: 0.5 # ibench
uops: 1 # ibench
- name: subsd # ibench
operands: # ibench
- class: register # ibench
name: xmm # ibench
- class: register # ibench
name: xmm # ibench
latency: 2 # ibench
port_pressure: [[1, '15']] # ibench
throughput: 0.5 # ibench
uops: 1 # ibench
- name: vsubsd # ibench
operands: # ibench
- class: register # ibench
@@ -4562,6 +4593,16 @@ instruction_forms:
port_pressure: [[1, '01'], [1, '5']]
throughput: 1.0
uops: 3
- name: [cvtdq2pd, vcvtdq2pd]
operands:
- class: register
name: xmm
- class: register
name: xmm
latency: 5
port_pressure: [[1, '01'], [1, '5']]
throughput: 1.0
uops: 2
- name: vcvtdq2pd
operands:
- class: register
@@ -5866,6 +5907,18 @@ instruction_forms:
port_pressure: [[1, '05']]
throughput: 0.5
uops: 1
- name: [vpshufd, pshufd] # uops.info
operands:
- class: immediate
imd: int
- class: register
name: xmm
- class: register
name: xmm
latency: 1
port_pressure: [[1, '15']]
throughput: 1.0
uops: 1
- name: vshuff64x2
operands:
- class: immediate
@@ -5992,7 +6045,7 @@ instruction_forms:
port_pressure: [[1, '5']] # uops.info
throughput: 1.0 # ibench
uops: 1
- name: [cltq, cdq, cdqe]
- name: [cltq, cdq, cdqe]
operands: []
latency: 1 # uops.info
port_pressure: [[1, '06']] # uops.info

View File

@@ -35,9 +35,9 @@ port_model_scheme: |
| BR | | BR | | ALU | | ALU | | ALU | | ALU | | ALU | | DV | | ALU | | DV | |SIMD/FP| |FPDV| |SIMD/FP| |SIMD/FP| |FPDV| |SIMD/FP| | LD | | LD | | LD | | ST | | ST |
+----+ +----+ +------+ +------+ +------+ +------+ +------+ +----+ +------+ +----+ | ALU | +----+ | ALU | | ALU | +----+ | ALU | +-----+ +-----+ +-----+ +-----+ +-----+
silly silly +------+ +------+ +-------+ +-------+ +-------+ +-------+ +-----+ +-----+
| MUL | | MUL | +-------+ +-------+ +-------+ +-------+ | AGU | | AGU |
+------+ +------+ |SIMD/FP| |SIMD/FP| |SIMD/FP| |SIMD/FP| +-----+ +-----+
+------+ +------+ | MISC | | MISC | | MISC | | MISC |
| MUL | | MUL | +-------+ +-------+ +-------+ +-------+ | ST | | ST |
+------+ +------+ |SIMD/FP| |SIMD/FP| |SIMD/FP| |SIMD/FP| | AGU | | AGU |
+------+ +------+ | MISC | | MISC | | MISC | | MISC | +-----+ +-----+
| CRC | | CRC | +-------+ +-------+ +-------+ +-------+
+------+ +------+ +-------+ +-------+ +-------+ +-------+
+------+ +------+ | SIMD | | SIMD | | SIMD | | SIMD |
@@ -119,6 +119,17 @@ instruction_forms:
throughput: 0.25
latency: 1.0 # 1*p2367
port_pressure: [[1, '2367']]
- name: addvl
operands:
- class: register
prefix: x
- class: register
prefix: x
- class: immediate
imd: int
throughput: 0.5
latency: 2.0 # 1*p67
port_pressure: [[1, '67']]
- name: adds
operands:
- class: register
@@ -259,13 +270,13 @@ instruction_forms:
throughput: 0.16666666
latency: 1.0 # 1*p234567
port_pressure: [[1, '234567']]
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq]
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq, bmi]
operands:
- class: identifier
throughput: 0.5
latency: 0.0
port_pressure: [[1, '01']]
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq]
- name: [b, bl, bcc, bcs, bgt, bhi, b.lo, b.ne, b.any, b.none, bal, b.al, b.lt, b.eq, b.hs, b.gt, b.hi, bne, beq, bmi]
operands:
- class: immediate
imd: int
@@ -1410,7 +1421,7 @@ instruction_forms:
throughput: 0.25
latency: 1.0 # 1*p2367
port_pressure: [[1, '2367']]
- name: [incw, incd, inch]
- name: [incw, incd, inch, incb]
operands:
- class: register
prefix: z
@@ -1420,20 +1431,20 @@ instruction_forms:
throughput: 0.25
latency: 2.0 # 1*p8,9,10,11
port_pressure: [[1, ['8', '9', '10', '11']]]
- name: [incw, incd, inch]
- name: [incw, incd, inch, incb]
operands:
- class: register
prefix: x
- class: identifier
throughput: 0.5
latency: 2.0 # 1*p67
latency: 1.0 # 1*p67
port_pressure: [[1, '67']]
- name: [incw, incd, inch]
- name: [incw, incd, inch, incb]
operands:
- class: register
prefix: x
throughput: 0.5
latency: 2.0 # 1*p67
latency: 1.0 # 1*p67
port_pressure: [[1, '67']]
- name: [madd, msub] # NOTE: if the dependency is via the addend (fourth operand), the latency is only 1cy !!!
operands:
@@ -2939,6 +2950,15 @@ instruction_forms:
throughput: 1.0
latency: 1.0 # 1*p3
port_pressure: [[1, '3']]
- name: [fcmp, fcmpe]
operands:
- class: register
prefix: '*'
- class: immediate
imd: '*'
throughput: 1.0
latency: 1.0 # 1*p3
port_pressure: [[1, '3']]
- name: [fccmp, fccmpe] # LT assumed from fcmp
operands:
- class: register
@@ -3726,6 +3746,17 @@ instruction_forms:
throughput: 0.25
latency: 3.0 # 1*p89,10,11
port_pressure: [[1, ['8','9','10','11']]]
- name: [UNKNONWfmul]
operands:
- class: register
prefix: d
- class: register
prefix: d
- class: register
prefix: d
throughput: 0.25
latency: 30.0 # 1*p89,10,11
port_pressure: [[20, ['8','9','10','11']]]
- name: [fmul, fmulx]
operands:
- class: register