From 6df973d16a72077711c2648af32d5bae4e148c86 Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Thu, 22 Feb 2024 13:37:13 +0100 Subject: [PATCH] Restore deleted files --- osaca/data/a72/mapping_pmevo.json | 401 +++++++ osaca/data/create_db_entry.py | 180 +++ osaca/data/generate_mov_entries.py | 1644 ++++++++++++++++++++++++++++ osaca/data/model_importer.py | 309 ++++++ osaca/data/pmevo_importer.py | 321 ++++++ 5 files changed, 2855 insertions(+) create mode 100644 osaca/data/a72/mapping_pmevo.json create mode 100644 osaca/data/create_db_entry.py create mode 100644 osaca/data/generate_mov_entries.py create mode 100644 osaca/data/model_importer.py create mode 100644 osaca/data/pmevo_importer.py diff --git a/osaca/data/a72/mapping_pmevo.json b/osaca/data/a72/mapping_pmevo.json new file mode 100644 index 0000000..30cf194 --- /dev/null +++ b/osaca/data/a72/mapping_pmevo.json @@ -0,0 +1,401 @@ +{ + "kind": "Mapping3", + "arch": { + "kind": "Architecture", + "ports": ["0", "1", "2", "3", "4", "5", "6"], + "name": "A72", + "insns": ["_abs_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_abs_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_add_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_add_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_add_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_add_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtb", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxth", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxth_3", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw_2", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtb", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtb_3", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxth", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxth_3", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtw", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtw_3", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_asr_3", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_2", "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_4", "_add_((REG:W:G:64)),_((REG:R:G:64)),_8", "_addp_((REG:W:F:64)),_((REG:R:F:VEC)).2d", "_adds_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw", "_adds_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw_2", "_adds_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_adds_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_3", "_adds_((REG:W:G:64)),_((REG:R:G:64)),_40", "_addv_((REG:W:F:32)),_((REG:R:F:VEC)).4s", "_addv_((REG:W:F:8)),_((REG:R:F:VEC)).16b", "_and_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_and_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_and_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_7", "_and_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_8", "_and_((REG:W:G:64)),_((REG:R:G:64)),_2147483648", "_ands_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_ands_((REG:W:G:64)),_((REG:R:G:64)),_7", "_asr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_asr_((REG:W:G:64)),_((REG:R:G:64)),_2", "_bfi_((REG:W:G:64)),_((REG:R:G:64)),_16,_16", "_bic_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_bic_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_bic_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_8", "_bic_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_8", "_bics_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_bif_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_bit_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_bsl_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_clz_((REG:W:G:64)),_((REG:R:G:64))", "_cmeq_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b,_#0", "_cmeq_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_cmeq_((REG:R:F:VEC)).2s,_((REG:R:F:VEC)).2s,_#0", "_cmeq_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_cmeq_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_cmge_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_cmge_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_#0", "_cmge_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_cmgt_((REG:R:F:64)),_((REG:R:F:64)),_#0", "_cmgt_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_cmgt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_#0", "_cmgt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_cmgt_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h,_#0", "_cmhi_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_cmhi_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_cmhi_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_cmhs_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_cmn_((REG:R:G:64)),_#1", "_cmn_((REG:R:G:64)),_((REG:R:G:64))", "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_sxth", "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_sxtw", "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_sxtw_3", "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_uxtb", "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_uxth", "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_uxtw", "_cmp_((REG:R:G:64)),_((REG:R:G:64))", "_cmp_((REG:R:G:64)),_((REG:R:G:64)),_asr_2", "_cmp_((REG:R:G:64)),_((REG:R:G:64)),_lsl_3", "_cmp_((REG:R:G:64)),_((REG:R:G:64)),_lsr_3", "_cmp_((REG:R:G:64)),_624", "_dup_((REG:W:F:32)),_((REG:R:F:VEC)).s[0]", "_dup_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).d[0]", "_dup_((REG:W:F:VEC)).2d,_((REG:W:G:64))", "_dup_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).s[0]", "_eor_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_asr_63", "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_11", "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_30", "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_ror_18", "_eor_((REG:W:G:64)),_((REG:R:G:64)),_4", "_extr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_49", "_fabd_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fabd_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fabd_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fabd_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fabs_((REG:W:F:32)),_((REG:R:F:32))", "_fabs_((REG:W:F:64)),_((REG:R:F:64))", "_fabs_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fabs_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fadd_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fadd_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fadd_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fadd_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fcmeq_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fcmeq_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_0", "_fcmeq_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_0", "_fcmge_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fcmge_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fcmgt_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fcmgt_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_0", "_fcmgt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fcmgt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_0", "_fcmle_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_0", "_fcmlt_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_0", "_fcmlt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_0", "_fcmp_((REG:R:F:32)),_#0.0", "_fcmp_((REG:R:F:32)),_((REG:R:F:32))", "_fcmp_((REG:R:F:64)),_#0.0", "_fcmp_((REG:R:F:64)),_((REG:R:F:64))", "_fcmpe_((REG:R:F:32)),_#0.0", "_fcmpe_((REG:R:F:32)),_((REG:R:F:32))", "_fcmpe_((REG:R:F:64)),_#0.0", "_fcmpe_((REG:R:F:64)),_((REG:R:F:64))", "_fcvt_((REG:W:F:32)),_((REG:R:F:64))", "_fcvt_((REG:W:F:64)),_((REG:R:F:32))", "_fcvtas_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fcvtl2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s", "_fcvtl_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s", "_fcvtms_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fcvtms_((REG:W:G:64)),_((REG:W:F:32))", "_fcvtms_((REG:W:G:64)),_((REG:W:F:64))", "_fcvtmu_((REG:W:G:64)),_((REG:W:F:64))", "_fcvtn2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).2d", "_fcvtn_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2d", "_fcvtps_((REG:W:G:64)),_((REG:W:F:64))", "_fcvtpu_((REG:W:G:64)),_((REG:W:F:64))", "_fcvtzs_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fcvtzs_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s", "_fcvtzs_((REG:W:G:64)),_((REG:W:F:32))", "_fcvtzs_((REG:W:G:64)),_((REG:W:F:64))", "_fcvtzu_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fcvtzu_((REG:W:G:64)),_((REG:W:F:32))", "_fcvtzu_((REG:W:G:64)),_((REG:W:F:64))", "_fdiv_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fdiv_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fdiv_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fdiv_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fmadd_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fmadd_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fmla_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fmla_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).d[0]", "_fmla_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fmls_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fmls_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fmov_((REG:W:F:32)),_((REG:R:F:32))", "_fmov_((REG:W:F:32)),_2.0e+1", "_fmov_((REG:W:F:64)),_((REG:R:F:64))", "_fmov_((REG:W:F:64)),_((REG:W:G:64))", "_fmov_((REG:W:F:64)),_1.0e+1", "_fmov_((REG:W:F:VEC)).2d,_1.0e+0", "_fmov_((REG:W:F:VEC)).4s,_1.0e+0", "_fmov_((REG:W:F:VEC)).d[1],_((REG:W:G:64))", "_fmov_((REG:W:G:64)),_((REG:W:F:64))", "_fmov_((REG:W:G:64)),_((REG:W:F:VEC)).d[1]", "_fmsub_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fmsub_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fmul_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fmul_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fmul_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fmul_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).d[0]", "_fmul_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fmul_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).s[1]", "_fneg_((REG:W:F:32)),_((REG:R:F:32))", "_fneg_((REG:W:F:64)),_((REG:R:F:64))", "_fneg_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fneg_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fnmadd_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fnmadd_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fnmsub_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fnmsub_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fnmul_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fnmul_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_frinta_((REG:W:F:64)),_((REG:R:F:64))", "_frintm_((REG:W:F:32)),_((REG:R:F:32))", "_frintm_((REG:W:F:64)),_((REG:R:F:64))", "_frintm_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s", "_frintp_((REG:W:F:32)),_((REG:R:F:32))", "_frintp_((REG:W:F:64)),_((REG:R:F:64))", "_frintp_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_frintx_((REG:W:F:64)),_((REG:R:F:64))", "_frintz_((REG:W:F:32)),_((REG:R:F:32))", "_frintz_((REG:W:F:64)),_((REG:R:F:64))", "_frintz_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fsqrt_((REG:W:F:32)),_((REG:R:F:32))", "_fsqrt_((REG:W:F:64)),_((REG:R:F:64))", "_fsqrt_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fsqrt_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_fsub_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))", "_fsub_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_fsub_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_fsub_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_ins_((REG:W:F:VEC)).d[1],_((REG:R:F:VEC)).d[0]", "_ins_((REG:W:F:VEC)).d[1],_((REG:W:G:64))", "_ldr_((REG:W:F:128)),_[((MEM:64)),_((MIMM:16))]", "_ldr_((REG:W:F:16)),_[((MEM:64)),_((MIMM:16))]", "_ldr_((REG:W:F:32)),_[((MEM:64)),_((MIMM:16))]", "_ldr_((REG:W:F:64)),_[((MEM:64)),_((MIMM:16))]", "_ldr_((REG:W:F:8)),_[((MEM:64)),_((MIMM:16))]", "_ldr_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]", "_ldrsb_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]", "_ldrsh_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]", "_ldrsw_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]", "_lsl_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_lsl_((REG:W:G:64)),_((REG:R:G:64)),_4", "_lsr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_lsr_((REG:W:G:64)),_((REG:R:G:64)),_32", "_madd_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_mla_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_mla_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_mla_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_mneg_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_mov_((REG:W:F:VEC)).8b,_((REG:R:F:VEC)).8b", "_mov_((REG:W:G:64)),_((REG:R:G:64))", "_mov_((REG:W:G:64)),_2147483647", "_movi_((REG:W:F:64)),_-256", "_movi_((REG:W:F:VEC)).16b,_0xdf", "_movi_((REG:W:F:VEC)).4s,_0", "_movi_((REG:W:F:VEC)).4s,_0x4,_lsl_8", "_movi_((REG:W:F:VEC)).4s,_0xff,_msl_8", "_movi_((REG:W:F:VEC)).8h,_0x4,_lsl_8", "_movi_((REG:W:F:VEC)).8h,_0x53", "_movk_((REG:W:G:64)),_0x6c07,_lsl_16", "_msub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_mul_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_mul_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_mul_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_mul_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_mvn_((REG:W:G:64)),_((REG:R:G:64))", "_mvn_((REG:W:G:64)),_((REG:R:G:64)),_lsl_2", "_mvn_((REG:W:G:64)),_((REG:R:G:64)),_lsr_6", "_mvni_((REG:W:F:VEC)).4h,_0xfe,_lsl_8", "_mvni_((REG:W:F:VEC)).4s,_0", "_mvni_((REG:W:F:VEC)).4s,_0x7c,_msl_8", "_mvni_((REG:W:F:VEC)).4s,_0x80,_lsl_24", "_mvni_((REG:W:F:VEC)).8h,_0x40", "_neg_((REG:W:F:64)),_((REG:R:F:64))", "_neg_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_neg_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_neg_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_neg_((REG:W:G:64)),_((REG:R:G:64))", "_neg_((REG:W:G:64)),_((REG:R:G:64)),_asr_2", "_neg_((REG:W:G:64)),_((REG:R:G:64)),_lsl_3", "_neg_((REG:W:G:64)),_((REG:R:G:64)),_lsr_2", "_negs_((REG:W:G:64)),_((REG:R:G:64))", "_not_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_orn_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_orr_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_orr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_orr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_7", "_orr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_9", "_orr_((REG:W:G:64)),_((REG:R:G:64)),_-4294967296", "_rev_((REG:W:G:64)),_((REG:R:G:64))", "_ror_((REG:W:G:64)),_((REG:R:G:64)),_14", "_sabd_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_saddl2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_saddl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_((REG:R:F:VEC)).4h", "_sbfiz_((REG:W:G:64)),_((REG:R:G:64)),_6,_32", "_sbfx_((REG:W:G:64)),_((REG:R:G:64)),_32,_32", "_scvtf_((REG:W:F:32)),_((REG:W:G:64))", "_scvtf_((REG:W:F:64)),_((REG:W:G:64))", "_scvtf_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_scvtf_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_sdiv_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_shl_((REG:W:F:64)),_((REG:R:F:64)),_3", "_shl_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_56", "_shl_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s,_1", "_shl_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_8", "_smax_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_smax_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_smax_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_smaxv_((REG:W:F:16)),_((REG:R:F:VEC)).8h", "_smaxv_((REG:W:F:32)),_((REG:R:F:VEC)).4s", "_smaxv_((REG:W:F:8)),_((REG:R:F:VEC)).16b", "_smin_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_smin_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_smin_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_sminv_((REG:W:F:16)),_((REG:R:F:VEC)).8h", "_sminv_((REG:W:F:32)),_((REG:R:F:VEC)).4s", "_sminv_((REG:W:F:8)),_((REG:R:F:VEC)).16b", "_smulh_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_smull2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_smull_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s,_((REG:R:F:VEC)).2s", "_sshl_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_sshl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_sshll2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s,_0", "_sshll2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_0", "_sshll2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).16b,_0", "_sshll_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s,_0", "_sshll_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_0", "_sshll_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8b,_0", "_sshr_((REG:W:F:64)),_((REG:R:F:64)),_3", "_sshr_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_56", "_sshr_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s,_10", "_sshr_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_8", "_ssubl2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_ssubl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_((REG:R:F:VEC)).4h", "_ssubw2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).4s", "_str_((REG:W:F:128)),_[((MEM:64)),_((MIMM:16))]", "_str_((REG:W:F:16)),_[((MEM:64)),_((MIMM:16))]", "_str_((REG:W:F:32)),_[((MEM:64)),_((MIMM:16))]", "_str_((REG:W:F:64)),_[((MEM:64)),_((MIMM:16))]", "_str_((REG:W:F:8)),_[((MEM:64)),_((MIMM:16))]", "_str_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]", "_sub_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_sub_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_sub_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_sub_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_#1824", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw_3", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtb", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtw", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtw_2", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_asr_63", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_3", "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_8", "_subs_((REG:W:G:64)),_((REG:R:G:64)),_#1", "_subs_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw", "_subs_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_subs_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_5", "_tbl_((REG:W:F:VEC)).16b,_{((REG:R:F:VEC)).16b},_((REG:R:F:VEC)).16b", "_tst_((REG:W:G:64)),_((REG:R:G:64))", "_tst_((REG:W:G:64)),_-3", "_uaddl2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_uaddl2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_uaddl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_((REG:R:F:VEC)).4h", "_uaddl_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8b,_((REG:R:F:VEC)).8b", "_ubfiz_((REG:W:G:64)),_((REG:R:G:64)),_2,_6", "_ubfx_((REG:W:G:64)),_((REG:R:G:64)),_5,_2", "_ucvtf_((REG:W:F:32)),_((REG:W:G:64))", "_ucvtf_((REG:W:F:64)),_((REG:W:G:64))", "_ucvtf_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d", "_ucvtf_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_udiv_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_umax_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_umax_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_umaxv_((REG:W:F:32)),_((REG:R:F:VEC)).4s", "_umaxv_((REG:W:F:8)),_((REG:R:F:VEC)).16b", "_umin_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_uminv_((REG:W:F:8)),_((REG:R:F:VEC)).16b", "_umlal2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_umlal_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s,_((REG:R:F:VEC)).2s", "_umov_((REG:W:G:64)),_((REG:W:F:VEC)).d[1]", "_umulh_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))", "_umull2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_umull_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8b,_((REG:R:F:VEC)).8b", "_ushl_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))", "_ushl_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s,_((REG:R:F:VEC)).2s", "_ushll2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s,_0", "_ushll2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_0", "_ushll2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).16b,_0", "_ushll_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s,_0", "_ushll_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_0", "_ushll_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8b,_0", "_ushr_((REG:W:F:64)),_((REG:R:F:64)),_63", "_ushr_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_19", "_ushr_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s,_1", "_ushr_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_8", "_usubl2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_usubl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_((REG:R:F:VEC)).4h", "_usubw2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).8h", "_uzp1_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_uzp1_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_uzp1_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_uzp2_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_uzp2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_uzp2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_xtn2_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).8h", "_xtn2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).2d", "_xtn2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).4s", "_xtn_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2d", "_xtn_((REG:W:F:VEC)).4h,_((REG:R:F:VEC)).4s", "_xtn_((REG:W:F:VEC)).8b,_((REG:R:F:VEC)).8h", "_zip1_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_zip1_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_zip1_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h", "_zip2_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b", "_zip2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s", "_zip2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h"] + }, + "assignment": { + "_mvn_((REG:W:G:64)),_((REG:R:G:64)),_lsl_2": [["2"]], + "_saddl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_((REG:R:F:VEC)).4h": [["5"]], + "_mov_((REG:W:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_fcvtzu_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["4"]], + "_uzp1_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_smulh_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["2"], ["2"], ["2"], ["2"]], + "_ldr_((REG:W:F:16)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_4": [["2"]], + "_fmla_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).d[0]": [["5"]], + "_ubfx_((REG:W:G:64)),_((REG:R:G:64)),_5,_2": [["0", "5"]], + "_asr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_dup_((REG:W:F:32)),_((REG:R:F:VEC)).s[0]": [["4", "5"]], + "_frintm_((REG:W:F:64)),_((REG:R:F:64))": [["4"]], + "_bics_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_fmul_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_ucvtf_((REG:W:F:64)),_((REG:W:G:64))": [["4"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtw": [["2"]], + "_uzp1_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_xtn2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).2d": [["4", "5"]], + "_fcvtms_((REG:W:G:64)),_((REG:W:F:64))": [["4"]], + "_frintp_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["4"]], + "_sminv_((REG:W:F:32)),_((REG:R:F:VEC)).4s": [["6"]], + "_shl_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_8": [["6"]], + "_mvni_((REG:W:F:VEC)).4s,_0x7c,_msl_8": [["5"]], + "_mvni_((REG:W:F:VEC)).4s,_0x80,_lsl_24": [["5"]], + "_ushll2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_0": [["6"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxth": [["2"]], + "_sub_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_fabd_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:64)),_lsr_3": [["2"]], + "_shl_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_56": [["6"]], + "_mneg_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["2"], ["2"], ["2"]], + "_sub_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_fmls_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_dup_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).d[0]": [["5"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtw": [["2"]], + "_cmgt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_scvtf_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["4"]], + "_fnmsub_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_smin_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_#1824": [["0", "5"]], + "_fcvtzs_((REG:W:G:64)),_((REG:W:F:64))": [["4"]], + "_fabs_((REG:W:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_frintz_((REG:W:F:32)),_((REG:R:F:32))": [["4"]], + "_mvn_((REG:W:G:64)),_((REG:R:G:64)),_lsr_6": [["2"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtb_3": [["2"]], + "_and_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_8": [["2"]], + "_ins_((REG:W:F:VEC)).d[1],_((REG:R:F:VEC)).d[0]": [["4", "5"]], + "_cmhi_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_ands_((REG:W:G:64)),_((REG:R:G:64)),_7": [["0", "5"]], + "_ldrsw_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_uaddl_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8b,_((REG:R:F:VEC)).8b": [["5"]], + "_ushl_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s,_((REG:R:F:VEC)).2s": [["6"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_8": [["0", "5"]], + "_fneg_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_smaxv_((REG:W:F:16)),_((REG:R:F:VEC)).8h": [["4"]], + "_str_((REG:W:F:8)),_[((MEM:64)),_((MIMM:16))]": [["3"], ["3"], ["3"]], + "_sub_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_fabd_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_fabd_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_fneg_((REG:W:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_fcmgt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_negs_((REG:W:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_rev_((REG:W:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_cmeq_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b,_#0": [["5"]], + "_cmge_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_cmhi_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_movi_((REG:W:F:VEC)).8h,_0x4,_lsl_8": [["5"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_sxth": [["2"]], + "_neg_((REG:W:G:64)),_((REG:R:G:64)),_asr_2": [["2"]], + "_fcmp_((REG:R:F:32)),_((REG:R:F:32))": [["6"]], + "_subs_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw": [["2"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxth": [["2"]], + "_shl_((REG:W:F:64)),_((REG:R:F:64)),_3": [["6"]], + "_fcvt_((REG:W:F:64)),_((REG:R:F:32))": [["4"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:64)),_lsl_3": [["2"]], + "_fcmpe_((REG:R:F:32)),_#0.0": [["6"]], + "_fmov_((REG:W:G:64)),_((REG:W:F:64))": [["1"]], + "_lsr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_orr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_9": [["2"]], + "_bic_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_8": [["2"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxth_3": [["2"]], + "_fmov_((REG:W:G:64)),_((REG:W:F:VEC)).d[1]": [["1"]], + "_smin_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_fsub_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_orr_((REG:W:G:64)),_((REG:R:G:64)),_-4294967296": [["0", "5"]], + "_ushll_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s,_0": [["6"]], + "_mla_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["4"], ["4"]], + "_fcvtpu_((REG:W:G:64)),_((REG:W:F:64))": [["4"]], + "_cmge_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_#0": [["5"]], + "_uminv_((REG:W:F:8)),_((REG:R:F:VEC)).16b": [["6"], ["6"]], + "_sminv_((REG:W:F:8)),_((REG:R:F:VEC)).16b": [["6"], ["6"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_2": [["2"]], + "_umin_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_addv_((REG:W:F:8)),_((REG:R:F:VEC)).16b": [["6"], ["6"]], + "_ldrsh_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_cmhi_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_cmeq_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtb": [["2"]], + "_tst_((REG:W:G:64)),_-3": [["0", "5"]], + "_umax_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_fcvtn_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2d": [["4"]], + "_fmsub_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxth_3": [["2"]], + "_fdiv_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"]], + "_sshll2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s,_0": [["6"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtw_3": [["2"]], + "_fabd_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_cmp_((REG:R:G:64)),_624": [["0", "5"]], + "_sub_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_cmgt_((REG:R:F:64)),_((REG:R:F:64)),_#0": [["4", "5"]], + "_fabs_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_fmov_((REG:W:F:32)),_2.0e+1": [["4", "5"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:64)),_asr_2": [["2"]], + "_ushll2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s,_0": [["6"]], + "_ldr_((REG:W:F:8)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_sdiv_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["2"], ["2"], ["2"], ["2"]], + "_fsqrt_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"]], + "_fsqrt_((REG:W:F:32)),_((REG:R:F:32))": [["4"], ["4"], ["4"]], + "_and_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_7": [["2"]], + "_fnmadd_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_movi_((REG:W:F:64)),_-256": [["4", "5"]], + "_fadd_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_and_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_zip2_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_usubw2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).8h": [["5"]], + "_umulh_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["2"], ["2"], ["2"], ["2"]], + "_fmov_((REG:W:F:64)),_((REG:W:G:64))": [["1"]], + "_sshr_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s,_10": [["6"]], + "_fcvtl2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s": [["4"]], + "_fcvtms_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["4"]], + "_movk_((REG:W:G:64)),_0x6c07,_lsl_16": [["0", "5"]], + "_sshll2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_0": [["6"]], + "_mul_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["4"], ["4"]], + "_mvni_((REG:W:F:VEC)).4s,_0": [["5"]], + "_neg_((REG:W:G:64)),_((REG:R:G:64)),_lsr_2": [["2"]], + "_adds_((REG:W:G:64)),_((REG:R:G:64)),_40": [["0", "5"]], + "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_asr_63": [["2"]], + "_smax_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_adds_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_neg_((REG:W:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_usubl2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_xtn_((REG:W:F:VEC)).4h,_((REG:R:F:VEC)).4s": [["4", "5"]], + "_fmul_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_sxtw_3": [["2"]], + "_cmgt_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_mul_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["4"], ["4"]], + "_fmla_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_movi_((REG:W:F:VEC)).16b,_0xdf": [["5"]], + "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_lsl_((REG:W:G:64)),_((REG:R:G:64)),_4": [["0", "5"]], + "_fabs_((REG:W:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_fcvtms_((REG:W:G:64)),_((REG:W:F:32))": [["4"]], + "_orr_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_ushl_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["6"]], + "_neg_((REG:W:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_mul_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"], ["4"]], + "_subs_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_5": [["2"]], + "_str_((REG:W:F:64)),_[((MEM:64)),_((MIMM:16))]": [["3"], ["3"]], + "_umlal_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s,_((REG:R:F:VEC)).2s": [["4"]], + "_fcvtzu_((REG:W:G:64)),_((REG:W:F:32))": [["4"]], + "_fcmeq_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_0": [["5"]], + "_cmgt_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h,_#0": [["5"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw": [["2"]], + "_fnmadd_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_fdiv_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["2"], ["2"], ["2"], ["2"]], + "_ushll_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8b,_0": [["6"]], + "_lsl_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_abs_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["6"]], + "_fnmul_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_ushr_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s,_1": [["6"]], + "_not_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_sbfiz_((REG:W:G:64)),_((REG:R:G:64)),_6,_32": [["0", "5"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_ushll2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).16b,_0": [["6"]], + "_saddl2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_ror_18": [["2"]], + "_ldr_((REG:W:F:128)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_adds_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_3": [["2"]], + "_fdiv_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"]], + "_dup_((REG:W:F:VEC)).2d,_((REG:W:G:64))": [["5"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_sshr_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_56": [["6"]], + "_fcvtps_((REG:W:G:64)),_((REG:W:F:64))": [["4"]], + "_usubl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_((REG:R:F:VEC)).4h": [["5"]], + "_umull2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["4"]], + "_cmge_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_fmov_((REG:W:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_fcmeq_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_uzp2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_ushr_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_19": [["6"]], + "_fmadd_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_fcvtzu_((REG:W:G:64)),_((REG:W:F:64))": [["4"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtb": [["2"]], + "_uaddl2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_fcmlt_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_0": [["5"]], + "_ssubw2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).4s": [["5"]], + "_fcmgt_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_fmov_((REG:W:F:VEC)).4s,_1.0e+0": [["5"]], + "_fcmle_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_0": [["5"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtw_2": [["2"]], + "_fcvtzs_((REG:W:G:64)),_((REG:W:F:32))": [["4"]], + "_bic_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_uxtb": [["2"]], + "_fcmp_((REG:R:F:32)),_#0.0": [["6"]], + "_fsub_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_xtn2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).4s": [["4", "5"]], + "_sshll_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8b,_0": [["6"]], + "_bif_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["4"]], + "_ands_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_fadd_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_fcmgt_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_0": [["5"]], + "_mvn_((REG:W:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_fmul_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).d[0]": [["5"]], + "_mov_((REG:W:F:VEC)).8b,_((REG:R:F:VEC)).8b": [["4", "5"]], + "_fcvtzs_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["4"]], + "_fmls_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_fmul_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_fmla_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_and_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_frintx_((REG:W:F:64)),_((REG:R:F:64))": [["4"]], + "_frintm_((REG:W:F:32)),_((REG:R:F:32))": [["4"]], + "_sabd_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["6"]], + "_fnmsub_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_ucvtf_((REG:W:F:32)),_((REG:W:G:64))": [["4"]], + "_movi_((REG:W:F:VEC)).4s,_0x4,_lsl_8": [["5"]], + "_umull_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8b,_((REG:R:F:VEC)).8b": [["4"]], + "_shl_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s,_1": [["6"]], + "_frintp_((REG:W:F:64)),_((REG:R:F:64))": [["4"]], + "_orr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_7": [["2"]], + "_lsr_((REG:W:G:64)),_((REG:R:G:64)),_32": [["0", "5"]], + "_sshll_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s,_0": [["6"]], + "_fdiv_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["0"], ["0"], ["0"]], + "_fcmp_((REG:R:F:64)),_((REG:R:F:64))": [["6"]], + "_sshll2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).16b,_0": [["6"]], + "_sshl_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["6"], ["6"]], + "_str_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]": [["3"], ["3"]], + "_bic_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_8": [["2"]], + "_ucvtf_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"], ["4"]], + "_str_((REG:W:F:128)),_[((MEM:64)),_((MIMM:16))]": [["0"], ["0"]], + "_bsl_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["4"]], + "_fneg_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_cmn_((REG:R:G:64)),_#1": [["0", "5"]], + "_fmul_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_addp_((REG:W:F:64)),_((REG:R:F:VEC)).2d": [["4", "5"]], + "_movi_((REG:W:F:VEC)).4s,_0": [["5"]], + "_smax_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_add_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_ucvtf_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["4"]], + "_fcvt_((REG:W:F:32)),_((REG:R:F:64))": [["4"]], + "_fcvtn2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).2d": [["4"]], + "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_30": [["2"]], + "_movi_((REG:W:F:VEC)).4s,_0xff,_msl_8": [["5"]], + "_add_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_msub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["2"], ["2"], ["2"]], + "_fcmeq_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_0": [["5"]], + "_uzp1_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_uxtw": [["2"]], + "_fsub_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_neg_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_str_((REG:W:F:16)),_[((MEM:64)),_((MIMM:16))]": [["3"], ["3"], ["3"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_sxtw": [["2"]], + "_sshr_((REG:W:F:64)),_((REG:R:F:64)),_3": [["6"]], + "_addv_((REG:W:F:32)),_((REG:R:F:VEC)).4s": [["6"]], + "_fadd_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_adds_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw_2": [["2"]], + "_neg_((REG:W:G:64)),_((REG:R:G:64)),_lsl_3": [["2"]], + "_zip1_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw": [["2"]], + "_smax_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_umov_((REG:W:G:64)),_((REG:W:F:VEC)).d[1]": [["1"]], + "_scvtf_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"], ["4"]], + "_asr_((REG:W:G:64)),_((REG:R:G:64)),_2": [["0", "5"]], + "_xtn_((REG:W:F:VEC)).8b,_((REG:R:F:VEC)).8h": [["4", "5"]], + "_sshll_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_0": [["6"]], + "_orn_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_ssubl2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_scvtf_((REG:W:F:64)),_((REG:W:G:64))": [["4"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_uxth": [["2"]], + "_sshr_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_8": [["6"]], + "_ushll_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_0": [["6"]], + "_fmov_((REG:W:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_neg_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_smull_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s,_((REG:R:F:VEC)).2s": [["4"]], + "_umax_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_cmeq_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_bfi_((REG:W:G:64)),_((REG:R:G:64)),_16,_16": [["2"]], + "_fcmp_((REG:R:F:64)),_#0.0": [["6"]], + "_smaxv_((REG:W:F:32)),_((REG:R:F:VEC)).4s": [["6"]], + "_cmhs_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_cmgt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_#0": [["5"]], + "_movi_((REG:W:F:VEC)).8h,_0x53": [["5"]], + "_fmul_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).s[1]": [["5"]], + "_fmov_((REG:W:F:64)),_1.0e+1": [["4", "5"]], + "_bic_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_zip1_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_fadd_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_ror_((REG:W:G:64)),_((REG:R:G:64)),_14": [["0", "5"]], + "_sshl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["6"], ["6"]], + "_uzp2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_fsub_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_tbl_((REG:W:F:VEC)).16b,_{((REG:R:F:VEC)).16b},_((REG:R:F:VEC)).16b": [["0"], ["0"]], + "_scvtf_((REG:W:F:32)),_((REG:W:G:64))": [["4"]], + "_ins_((REG:W:F:VEC)).d[1],_((REG:W:G:64))": [["1"]], + "_fcmpe_((REG:R:F:64)),_((REG:R:F:64))": [["6"]], + "_cmeq_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_neg_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_eor_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_frintm_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s": [["4"]], + "_dup_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).s[0]": [["5"]], + "_fcmgt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_0": [["5"]], + "_fmadd_((REG:W:F:32)),_((REG:R:F:32)),_((REG:R:F:32)),_((REG:R:F:32))": [["4", "5"]], + "_fcvtas_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"], ["4"]], + "_fcmge_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["5"]], + "_adds_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw": [["2"]], + "_xtn2_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).8h": [["4", "5"]], + "_abs_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2d": [["6"]], + "_fcvtl_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).2s": [["4"]], + "_fabs_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_ldr_((REG:W:F:64)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_ldrsb_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_add_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_fnmul_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_ubfiz_((REG:W:G:64)),_((REG:R:G:64)),_2,_6": [["0", "5"]], + "_fmov_((REG:W:F:VEC)).2d,_1.0e+0": [["5"]], + "_mvni_((REG:W:F:VEC)).4h,_0xfe,_lsl_8": [["4", "5"]], + "_mvni_((REG:W:F:VEC)).8h,_0x40": [["5"]], + "_fmsub_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_mla_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["4"], ["4"]], + "_fcmge_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_subs_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_eor_((REG:W:G:64)),_((REG:R:G:64)),_4": [["0", "5"]], + "_fsqrt_((REG:W:F:64)),_((REG:R:F:64))": [["4"], ["4"], ["4"], ["4"], ["4"]], + "_frinta_((REG:W:F:64)),_((REG:R:F:64))": [["4"]], + "_and_((REG:W:G:64)),_((REG:R:G:64)),_2147483648": [["0", "5"]], + "_uaddl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_((REG:R:F:VEC)).4h": [["5"]], + "_fneg_((REG:W:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_smin_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_str_((REG:W:F:32)),_[((MEM:64)),_((MIMM:16))]": [["3"], ["3"]], + "_mul_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["2"], ["2"], ["2"]], + "_ssubl_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4h,_((REG:R:F:VEC)).4h": [["5"]], + "_ushr_((REG:W:F:64)),_((REG:R:F:64)),_63": [["6"]], + "_zip2_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["5"]], + "_cmn_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_eor_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_11": [["2"]], + "_sbfx_((REG:W:G:64)),_((REG:R:G:64)),_32,_32": [["0", "5"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_asr_3": [["2"]], + "_madd_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["2"], ["2"], ["2"]], + "_smull2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"]], + "_mla_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"], ["4"]], + "_fcmpe_((REG:R:F:64)),_#0.0": [["6"]], + "_uaddl2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_frintz_((REG:W:F:64)),_((REG:R:F:64))": [["4"]], + "_fcmlt_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s,_0": [["5"]], + "_cmeq_((REG:R:F:VEC)).2s,_((REG:R:F:VEC)).2s,_#0": [["4", "5"]], + "_orr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_fcvtmu_((REG:W:G:64)),_((REG:W:F:64))": [["4"]], + "_ldr_((REG:W:G:64)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_umlal2_((REG:W:F:VEC)).2d,_((REG:R:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"]], + "_ldr_((REG:W:F:32)),_[((MEM:64)),_((MIMM:16))]": [["1"]], + "_sminv_((REG:W:F:16)),_((REG:R:F:VEC)).8h": [["4"]], + "_umaxv_((REG:W:F:8)),_((REG:R:F:VEC)).16b": [["6"], ["6"]], + "_extr_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_49": [["2"]], + "_fcvtzs_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2s": [["4"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsl_3": [["2"]], + "_udiv_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64))": [["2"], ["2"], ["2"], ["2"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw_3": [["2"]], + "_uzp2_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_frintp_((REG:W:F:32)),_((REG:R:F:32))": [["4"]], + "_smaxv_((REG:W:F:8)),_((REG:R:F:VEC)).16b": [["6"], ["6"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_asr_63": [["2"]], + "_cmp_((REG:R:G:64)),_((REG:R:G:32)),_uxtb": [["2"]], + "_zip1_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["5"]], + "_umaxv_((REG:W:F:32)),_((REG:R:F:VEC)).4s": [["6"]], + "_ushr_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_8": [["6"]], + "_zip2_((REG:W:F:VEC)).8h,_((REG:R:F:VEC)).8h,_((REG:R:F:VEC)).8h": [["5"]], + "_xtn_((REG:W:F:VEC)).2s,_((REG:R:F:VEC)).2d": [["4", "5"]], + "_subs_((REG:W:G:64)),_((REG:R:G:64)),_#1": [["0", "5"]], + "_fsqrt_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"], ["4"]], + "_add_((REG:W:F:64)),_((REG:R:F:64)),_((REG:R:F:64))": [["4", "5"]], + "_bit_((REG:W:F:VEC)).16b,_((REG:R:F:VEC)).16b,_((REG:R:F:VEC)).16b": [["4"]], + "_tst_((REG:W:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_fmov_((REG:W:F:VEC)).d[1],_((REG:W:G:64))": [["1"]], + "_mov_((REG:W:G:64)),_2147483647": [["0", "5"]], + "_clz_((REG:W:G:64)),_((REG:R:G:64))": [["0", "5"]], + "_frintz_((REG:W:F:VEC)).4s,_((REG:R:F:VEC)).4s": [["4"], ["4"]], + "_add_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:32)),_sxtw_2": [["2"]], + "_sub_((REG:W:G:64)),_((REG:R:G:64)),_((REG:R:G:64)),_lsr_8": [["2"]], + "_fcmpe_((REG:R:F:32)),_((REG:R:F:32))": [["6"]] + } + } \ No newline at end of file diff --git a/osaca/data/create_db_entry.py b/osaca/data/create_db_entry.py new file mode 100644 index 0000000..f07a266 --- /dev/null +++ b/osaca/data/create_db_entry.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 +from collections import defaultdict +from fractions import Fraction + + +class EntryBuilder: + @staticmethod + def compute_throughput(port_pressure): + port_occupancy = defaultdict(Fraction) + for uops, ports in port_pressure: + for p in ports: + port_occupancy[p] += Fraction(uops, len(ports)) + return float(max(list(port_occupancy.values()) + [0])) + + @staticmethod + def classify(operands_types): + load = "mem" in operands_types[:-1] + store = "mem" in operands_types[-1:] + vec = False + if any([vecr in operands_types for vecr in ["mm", "xmm", "ymm", "zmm"]]): + vec = True + assert not (load and store), "Can not process a combined load-store instruction." + return load, store, vec + + def build_description( + self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None + ): + if comment: + comment = " # " + comment + else: + comment = "" + description = "- name: {}{}\n operands: {}\n".format( + instruction_name, comment, "[]" if len(operand_types) == 0 else "" + ) + + for ot in operand_types: + if ot == "imd": + description += " - class: immediate\n imd: int\n" + elif ot.startswith("mem"): + description += " - class: memory\n" ' base: "*"\n' ' offset: "*"\n' + if ot == "mem_simple": + description += " index: ~\n" + elif ot == "mem_complex": + description += " index: gpr\n" + else: + description += ' index: "*"\n' + description += ' scale: "*"\n' + else: + if "{k}" in ot: + description += " - class: register\n name: {}\n mask: True\n".format( + ot.replace("{k}", "") + ) + else: + description += " - class: register\n name: {}\n".format(ot) + + description += ( + " latency: {latency}\n" + " port_pressure: {port_pressure!r}\n" + " throughput: {throughput}\n" + " uops: {uops}\n" + ).format( + latency=latency, + port_pressure=port_pressure, + throughput=self.compute_throughput(port_pressure), + uops=sum([i for i, p in port_pressure]), + ) + return description + + def parse_port_pressure(self, port_pressure_str): + """ + Example: + 1*p45+2*p0+2*p10,11 -> [[1, '45'], [2, '0'], [2, ['10', '11']]] + """ + port_pressure = [] + if port_pressure_str: + for p in port_pressure_str.split("+"): + cycles, ports = p.split("*p") + ports = ports.split(",") + if len(ports) == 1: + ports = ports[0] + else: + ports = list(filter(lambda p: len(p) > 0, ports)) + + port_pressure.append([int(cycles), ports]) + return port_pressure + + def process_item(self, instruction_form, resources): + """ + Example: + ('mov xmm mem', ('1*p45+2*p0', 7) -> ('mov', ['xmm', 'mem'], [[1, '45'], [2, '0']], 7) + """ + if instruction_form.startswith("[") and "]" in instruction_form: + instr_elements = instruction_form.split("]") + instr_elements = [instr_elements[0] + "]"] + instr_elements[1].strip().split(" ") + else: + instr_elements = instruction_form.split(" ") + latency = int(resources[1]) + port_pressure = self.parse_port_pressure(resources[0]) + instruction_name = instr_elements[0] + operand_types = instr_elements[1:] + return self.build_description(instruction_name, operand_types, port_pressure, latency) + + +class ArchEntryBuilder(EntryBuilder): + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): + # Intel ICX + # LD_pressure = [[1, "23"], [1, ["2D", "3D"]]] + # LD_pressure_vec = LD_pressure + # ST_pressure = [[1, "79"], [1, "48"]] + # ST_pressure_vec = ST_pressure + # LD_lat = 5 + # ST_lat = 0 + # Zen3 + LD_pressure = [[1, ["11", "12", "13"]]] + LD_pressure_vec = [[1, ["11", "12"]]] + ST_pressure = [[1, ["12", "13"]]] + ST_pressure_vec = [[1, ["4"]], [1, ["13"]]] + LD_lat = 4 + ST_lat = 0 + + load, store, vec = self.classify(operand_types) + + if load: + if vec: + port_pressure += LD_pressure_vec + else: + port_pressure += LD_pressure + latency += LD_lat + comment = "with load" + return EntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency, comment + ) + if store: + if vec: + port_pressure = port_pressure + ST_pressure_vec + else: + port_pressure = port_pressure + ST_pressure + operands = ["mem" if o == "mem" else o for o in operand_types] + latency += ST_lat + return EntryBuilder.build_description( + self, + instruction_name, + operands, + port_pressure, + latency, + "with store", + ) + + # Register only: + return EntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency + ) + + +def get_description(instruction_form, port_pressure, latency, rhs_comment=None): + entry = ArchEntryBuilder().process_item(instruction_form, (port_pressure, latency)) + + if rhs_comment is not None: + max_length = max([len(line) for line in entry.split("\n")]) + + commented_entry = "" + for line in entry.split("\n"): + commented_entry += ("{:<" + str(max_length) + "} # {}\n").format(line, rhs_comment) + entry = commented_entry + + return entry + + +if __name__ == "__main__": + import sys + + if len(sys.argv) != 4 and len(sys.argv) != 5: + print("Usage: {} [COMMENT]".format(sys.argv[0])) + sys.exit(0) + + try: + print(get_description(*sys.argv[1:])) + except KeyError: + print("Unknown architecture.") + sys.exit(1) \ No newline at end of file diff --git a/osaca/data/generate_mov_entries.py b/osaca/data/generate_mov_entries.py new file mode 100644 index 0000000..dd7d336 --- /dev/null +++ b/osaca/data/generate_mov_entries.py @@ -0,0 +1,1644 @@ +#!/usr/bin/env python3 +from collections import OrderedDict, defaultdict +from fractions import Fraction + + +class MOVEntryBuilder: + @staticmethod + def compute_throughput(port_pressure): + port_occupancy = defaultdict(Fraction) + for uops, ports in port_pressure: + for p in ports: + port_occupancy[p] += Fraction(uops, len(ports)) + return float(max(list(port_occupancy.values()) + [0])) + + @staticmethod + def classify(operands_types): + load = "mem" in operands_types[:-1] + store = "mem" in operands_types[-1:] + vec = False + if any([vecr in operands_types for vecr in ["mm", "xmm", "ymm", "zmm"]]): + vec = True + assert not (load and store), "Can not process a combined load-store instruction." + return load, store, vec + + def build_description( + self, instruction_name, operand_types, port_pressure=[], latency=0, comment=None + ): + if comment: + comment = " # " + comment + else: + comment = "" + description = "- name: {}{}\n operands:\n".format(instruction_name, comment) + + for ot in operand_types: + if ot == "imd": + description += " - class: immediate\n imd: int\n" + elif ot.startswith("mem"): + description += " - class: memory\n" ' base: "*"\n' ' offset: "*"\n' + if ot == "mem_simple": + description += " index: ~\n" + elif ot == "mem_complex": + description += " index: gpr\n" + else: + description += ' index: "*"\n' + description += ' scale: "*"\n' + else: + description += " - class: register\n name: {}\n".format(ot) + + description += ( + " latency: {latency}\n" + " port_pressure: {port_pressure!r}\n" + " throughput: {throughput}\n" + " uops: {uops}\n" + ).format( + latency=latency, + port_pressure=port_pressure, + throughput=self.compute_throughput(port_pressure), + uops=sum([i for i, p in port_pressure]), + ) + return description + + def parse_port_pressure(self, port_pressure_str): + """ + Example: + 1*p45+2*p0 -> [[1, '45'], [2, '0']] + """ + port_pressure = [] + if port_pressure_str: + for p in port_pressure_str.split("+"): + cycles, ports = p.split("*p") + ports = ports.split(",") + if len(ports) == 1: + ports = ports[0] + port_pressure.append([int(cycles), ports]) + return port_pressure + + def process_item(self, instruction_form, resources): + """ + Example: + ('mov xmm mem', ('1*p45+2*p0', 7) -> ('mov', ['xmm', 'mem'], [[1, '45'], [2, '0']], 7) + """ + instr_elements = instruction_form.split(" ") + latency = resources[1] + port_pressure = self.parse_port_pressure(resources[0]) + instruction_name = instr_elements[0] + operand_types = instr_elements[1:] + return self.build_description(instruction_name, operand_types, port_pressure, latency) + + +class MOVEntryBuilderIntelNoPort7AGU(MOVEntryBuilder): + # for SNB and IVB + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): + load, store, vec = self.classify(operand_types) + + comment = None + if load: + if "ymm" in operand_types: + port2D3D_pressure = 2 + else: + port2D3D_pressure = 1 + port_pressure += [[1, "23"], [port2D3D_pressure, ["2D", "3D"]]] + latency += 4 + comment = "with load" + if store: + if "ymm" in operand_types: + port4_pressure = 2 + else: + port4_pressure = 1 + port_pressure += [[1, "23"], [port4_pressure, "4"]] + latency += 0 + comment = "with store" + + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency, comment + ) + + +class MOVEntryBuilderIntelPort9(MOVEntryBuilder): + # for ICX + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): + load, store, vec = self.classify(operand_types) + + if load: + port_pressure += [[1, "23"], [1, ["2D", "3D"]]] + latency += 5 + comment = "with load" + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency, comment + ) + if store: + port_pressure = port_pressure + [[1, "79"], [1, "48"]] + operands = ["mem" if o == "mem" else o for o in operand_types] + latency += 0 + return MOVEntryBuilder.build_description( + self, + instruction_name, + operands, + port_pressure, + latency, + "with store", + ) + + # Register only: + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency + ) + + +class MOVEntryBuilderAMDZen3(MOVEntryBuilder): + # for Zen 3 + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): + load, store, vec = self.classify(operand_types) + + if load and vec: + port_pressure += [[1, ["11", "12"]]] + latency += 4 + comment = "with load" + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency, comment + ) + elif load: + port_pressure += [[1, ["11", "12", "13"]]] + latency += 4 + comment = "with load" + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency, comment + ) + if store and vec: + port_pressure = port_pressure + [[1, ["4"]], [1, ["13"]]] + operands = ["mem" if o == "mem" else o for o in operand_types] + latency += 0 + return MOVEntryBuilder.build_description( + self, + instruction_name, + operands, + port_pressure, + latency, + "with store", + ) + elif store: + port_pressure = port_pressure + [[1, ["12", "13"]]] + operands = ["mem" if o == "mem" else o for o in operand_types] + latency += 0 + return MOVEntryBuilder.build_description( + self, + instruction_name, + operands, + port_pressure, + latency, + "with store", + ) + # Register only: + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency + ) + + +############################################################################# + +z3 = MOVEntryBuilderAMDZen3() + +zen3_mov_instructions = [ + # https://www.felixcloutier.com/x86/mov + ("mov gpr gpr", ("1*p6789", 1)), + ("mov gpr mem", ("", 0)), + ("mov mem gpr", ("", 0)), + ("mov imd gpr", ("1*p6789", 1)), + ("mov imd mem", ("", 0)), + ("movabs imd gpr", ("1*p6789", 1)), # AT&T version, port util to be verified + # https://www.felixcloutier.com/x86/movapd + ("movapd xmm xmm", ("1*p0123", 1)), + ("movapd xmm mem", ("", 0)), + ("movapd mem xmm", ("", 0)), + ("vmovapd xmm xmm", ("1*p0123", 1)), + ("vmovapd xmm mem", ("", 0)), + ("vmovapd mem xmm", ("", 0)), + ("vmovapd ymm ymm", ("1*p0123", 1)), + ("vmovapd ymm mem", ("", 0)), + ("vmovapd mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movaps + ("movaps xmm xmm", ("1*p0123", 1)), + ("movaps xmm mem", ("", 0)), + ("movaps mem xmm", ("", 0)), + ("vmovaps xmm xmm", ("1*p0123", 1)), + ("vmovaps xmm mem", ("", 0)), + ("vmovaps mem xmm", ("", 0)), + ("vmovaps ymm ymm", ("1*p0123", 1)), + ("vmovaps ymm mem", ("", 0)), + ("vmovaps mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movd:movq + ("movd gpr mm", ("1*p0123", 1)), + ("movd mem mm", ("", 0)), + ("movq gpr mm", ("1*p0123", 1)), + ("movq mem mm", ("", 0)), + ("movd mm gpr", ("1*p0123", 1)), + ("movd mm mem", ("", 0)), + ("movq mm gpr", ("1*p0123", 1)), + ("movq mm mem", ("", 0)), + ("movd gpr xmm", ("1*p0123", 1)), + ("movd mem xmm", ("", 0)), + ("movq gpr xmm", ("1*p0123", 1)), + ("movq mem xmm", ("", 0)), + ("movd xmm gpr", ("1*p0123", 1)), + ("movd xmm mem", ("", 0)), + ("movq xmm gpr", ("1*p0123", 1)), + ("movq xmm mem", ("", 0)), + ("vmovd gpr xmm", ("1*p0123", 1)), + ("vmovd mem xmm", ("", 0)), + ("vmovq gpr xmm", ("1*p0123", 1)), + ("vmovq mem xmm", ("", 0)), + ("vmovd xmm gpr", ("1*p0123", 1)), + ("vmovd xmm mem", ("", 0)), + ("vmovq xmm gpr", ("1*p0123", 1)), + ("vmovq xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movddup + ("movddup xmm xmm", ("1*p12", 1)), + ("movddup mem xmm", ("", 0)), + ("vmovddup xmm xmm", ("1*p12", 1)), + ("vmovddup mem xmm", ("", 0)), + ("vmovddup ymm ymm", ("1*p12", 1)), + ("vmovddup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movdq2q + ("movdq2q xmm mm", ("1*p0123", 1)), + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + ("movdqa xmm xmm", ("1*p0123", 1)), + ("movdqa mem xmm", ("", 0)), + ("movdqa xmm mem", ("", 0)), + ("vmovdqa xmm xmm", ("1*p0123", 1)), + ("vmovdqa mem xmm", ("", 0)), + ("vmovdqa xmm mem", ("", 0)), + ("vmovdqa ymm ymm", ("1*p0123", 1)), + ("vmovdqa mem ymm", ("", 0)), + ("vmovdqa ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + ("movdqu xmm xmm", ("1*p0123", 1)), + ("movdqu mem xmm", ("", 0)), + ("movdqu xmm mem", ("", 0)), + ("vmovdqu xmm xmm", ("1*p0123", 1)), + ("vmovdqu mem xmm", ("", 0)), + ("vmovdqu xmm mem", ("", 0)), + ("vmovdqu ymm ymm", ("1*p0123", 1)), + ("vmovdqu mem ymm", ("", 0)), + ("vmovdqu ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movhlps + ("movhlps xmm xmm", ("1*p12", 1)), + ("vmovhlps xmm xmm xmm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/movhpd + ("movhpd mem xmm", ("1*p12", 1)), + ("vmovhpd mem xmm xmm", ("1*p12", 1)), + ("movhpd xmm mem", ("", 0)), + ("vmovhpd mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movhps + ("movhps mem xmm", ("1*p12", 1)), + ("vmovhps mem xmm xmm", ("1*p12", 1)), + ("movhps xmm mem", ("", 0)), + ("vmovhps mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movlhps + ("movlhps xmm xmm", ("1*p12", 1)), + ("vmovlhps xmm xmm xmm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/movlpd + ("movlpd mem xmm", ("1*p12", 1)), + ("vmovlpd mem xmm xmm", ("1*p12", 1)), + ("movlpd xmm mem", ("1*p12", 0)), + ("vmovlpd mem xmm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/movlps + ("movlps mem xmm", ("1*p12", 1)), + ("vmovlps mem xmm xmm", ("1*p12", 1)), + ("movlps xmm mem", ("1*p12", 0)), + ("vmovlps mem xmm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/movmskpd + ("movmskpd xmm gpr", ("1*p0123", 1)), + ("vmovmskpd xmm gpr", ("1*p0123", 1)), + ("vmovmskpd ymm gpr", ("1*p0123", 1)), + # https://www.felixcloutier.com/x86/movmskps + ("movmskps xmm gpr", ("1*p0123", 1)), + ("vmovmskps xmm gpr", ("1*p0123", 1)), + ("vmovmskps ymm gpr", ("1*p0123", 1)), + # https://www.felixcloutier.com/x86/movntdq + ("movntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntdqa + ("movntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdqa mem ymm", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movnti + ("movnti gpr mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntpd + ("movntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntps + ("movntps xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntq + ("movntq mm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movq + ("movq mm mm", ("", 0)), + ("movq mem mm", ("", 0)), + ("movq mm mem", ("", 0)), + ("movq xmm xmm", ("1*p0123", 1)), + ("movq mem xmm", ("", 0)), + ("movq xmm mem", ("", 0)), + ("vmovq xmm xmm", ("1*p0123", 1)), + ("vmovq mem xmm", ("", 0)), + ("vmovq xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq + # TODO combined load-store is currently not supported + # ('movs mem mem', ()), + # https://www.felixcloutier.com/x86/movsd + ("movsd xmm xmm", ("1*p0123", 1)), + ("movsd mem xmm", ("", 0)), + ("movsd xmm mem", ("", 0)), + ("vmovsd xmm xmm xmm", ("1*p0123", 1)), + ("vmovsd mem xmm", ("", 0)), + ("vmovsd xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movshdup + ("movshdup xmm xmm", ("1*p12", 1)), + ("movshdup mem xmm", ("", 0)), + ("vmovshdup xmm xmm", ("1*p12", 1)), + ("vmovshdup mem xmm", ("", 0)), + ("vmovshdup ymm ymm", ("1*p12", 1)), + ("vmovshdup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movsldup + ("movsldup xmm xmm", ("1*p12", 1)), + ("movsldup mem xmm", ("", 0)), + ("vmovsldup xmm xmm", ("1*p12", 1)), + ("vmovsldup mem xmm", ("", 0)), + ("vmovsldup ymm ymm", ("1*p12", 1)), + ("vmovsldup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movss + ("movss xmm xmm", ("1*p0123", 1)), + ("movss mem xmm", ("", 0)), + ("vmovss xmm xmm xmm", ("1*p0123", 1)), + ("vmovss mem xmm", ("", 0)), + ("vmovss xmm xmm", ("1*p0123", 1)), + ("vmovss xmm mem", ("", 0)), + ("movss mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movsx:movsxd + ("movsx gpr gpr", ("1*p6789", 1)), + ("movsx mem gpr", ("", 0)), + ("movsxd gpr gpr", ("", 0)), + ("movsxd mem gpr", ("", 0)), + ("movsb gpr gpr", ("1*p6789", 1)), # AT&T version + ("movsb mem gpr", ("", 0)), # AT&T version + ("movsw gpr gpr", ("1*p6789", 1)), # AT&T version + ("movsw mem gpr", ("", 0)), # AT&T version + ("movsl gpr gpr", ("1*p6789", 1)), # AT&T version + ("movsl mem gpr", ("", 0)), # AT&T version + ("movsq gpr gpr", ("1*p6789", 1)), # AT&T version + ("movsq mem gpr", ("", 0)), # AT&T version + # https://www.felixcloutier.com/x86/movupd + ("movupd xmm xmm", ("1*p0123", 1)), + ("movupd mem xmm", ("", 0)), + ("movupd xmm mem", ("", 0)), + ("vmovupd xmm xmm", ("1*p0123", 1)), + ("vmovupd mem xmm", ("", 0)), + ("vmovupd xmm mem", ("", 0)), + ("vmovupd ymm ymm", ("1*p0123", 1)), + ("vmovupd mem ymm", ("", 0)), + ("vmovupd ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movups + ("movups xmm xmm", ("1*p0123", 1)), + ("movups mem xmm", ("", 0)), + ("movups xmm mem", ("", 0)), + ("vmovups xmm xmm", ("1*p0123", 1)), + ("vmovups mem xmm", ("", 0)), + ("vmovups xmm mem", ("", 0)), + ("vmovups ymm ymm", ("1*p0123", 1)), + ("vmovups mem ymm", ("", 0)), + ("vmovups ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movzx + ("movzx gpr gpr", ("1*p6789", 1)), + ("movzx mem gpr", ("", 0)), + ("movzb gpr gpr", ("1*p6789", 1)), # AT&T version + ("movzb mem gpr", ("", 0)), # AT&T version + ("movzw gpr gpr", ("1*p6789", 1)), # AT&T version + ("movzw mem gpr", ("", 0)), # AT&T version + ("movzl gpr gpr", ("1*p6789", 1)), # AT&T version + ("movzl mem gpr", ("", 0)), # AT&T version + ("movzq gpr gpr", ("1*p6789", 1)), # AT&T version + ("movzq mem gpr", ("", 0)), # AT&T version + # https://www.felixcloutier.com/x86/cmovcc + ("cmova gpr gpr", ("1*p69", 1)), + ("cmova mem gpr", ("", 0)), + ("cmovae gpr gpr", ("1*p69", 1)), + ("cmovae mem gpr", ("", 0)), + ("cmovb gpr gpr", ("1*p69", 1)), + ("cmovb mem gpr", ("", 0)), + ("cmovbe gpr gpr", ("1*p69", 1)), + ("cmovbe mem gpr", ("", 0)), + ("cmovc gpr gpr", ("1*p69", 1)), + ("cmovc mem gpr", ("", 0)), + ("cmove gpr gpr", ("1*p69", 1)), + ("cmove mem gpr", ("", 0)), + ("cmovg gpr gpr", ("1*p69", 1)), + ("cmovg mem gpr", ("", 0)), + ("cmovge gpr gpr", ("1*p69", 1)), + ("cmovge mem gpr", ("", 0)), + ("cmovl gpr gpr", ("1*p69", 1)), + ("cmovl mem gpr", ("", 0)), + ("cmovle gpr gpr", ("1*p69", 1)), + ("cmovle mem gpr", ("", 0)), + ("cmovna gpr gpr", ("1*p69", 1)), + ("cmovna mem gpr", ("", 0)), + ("cmovnae gpr gpr", ("1*p69", 1)), + ("cmovnae mem gpr", ("", 0)), + ("cmovnb gpr gpr", ("1*p69", 1)), + ("cmovnb mem gpr", ("", 0)), + ("cmovnbe gpr gpr", ("1*p69", 1)), + ("cmovnbe mem gpr", ("", 0)), + ("cmovnc gpr gpr", ("1*p69", 1)), + ("cmovnc mem gpr", ("", 0)), + ("cmovne gpr gpr", ("1*p69", 1)), + ("cmovne mem gpr", ("", 0)), + ("cmovng gpr gpr", ("1*p69", 1)), + ("cmovng mem gpr", ("", 0)), + ("cmovnge gpr gpr", ("1*p69", 1)), + ("cmovnge mem gpr", ("", 0)), + ("cmovnl gpr gpr", ("1*p69", 1)), + ("cmovnl mem gpr", ("", 0)), + ("cmovno gpr gpr", ("1*p69", 1)), + ("cmovno mem gpr", ("", 0)), + ("cmovnp gpr gpr", ("1*p69", 1)), + ("cmovnp mem gpr", ("", 0)), + ("cmovns gpr gpr", ("1*p69", 1)), + ("cmovns mem gpr", ("", 0)), + ("cmovnz gpr gpr", ("1*p69", 1)), + ("cmovnz mem gpr", ("", 0)), + ("cmovo gpr gpr", ("1*p69", 1)), + ("cmovo mem gpr", ("", 0)), + ("cmovp gpr gpr", ("1*p69", 1)), + ("cmovp mem gpr", ("", 0)), + ("cmovpe gpr gpr", ("1*p69", 1)), + ("cmovpe mem gpr", ("", 0)), + ("cmovpo gpr gpr", ("1*p69", 1)), + ("cmovpo mem gpr", ("", 0)), + ("cmovs gpr gpr", ("1*p69", 1)), + ("cmovs mem gpr", ("", 0)), + ("cmovz gpr gpr", ("1*p69", 1)), + ("cmovz mem gpr", ("", 0)), + # https://www.felixcloutier.com/x86/pmovmskb + ("pmovmskb mm gpr", ("1*p0123", 1)), + ("pmovmskb xmm gpr", ("1*p0123", 1)), + ("vpmovmskb xmm gpr", ("1*p0123", 1)), + # https://www.felixcloutier.com/x86/pmovsx + ("pmovsxbw xmm xmm", ("1*p12", 1)), + ("pmovsxbw mem xmm", ("1*p12", 1)), + ("pmovsxbd xmm xmm", ("1*p12", 1)), + ("pmovsxbd mem xmm", ("1*p12", 1)), + ("pmovsxbq xmm xmm", ("1*p12", 1)), + ("pmovsxbq mem xmm", ("1*p12", 1)), + ("vpmovsxbw xmm xmm", ("1*p12", 1)), + ("vpmovsxbw mem xmm", ("1*p12", 1)), + ("vpmovsxbd xmm xmm", ("1*p12", 1)), + ("vpmovsxbd mem xmm", ("1*p12", 1)), + ("vpmovsxbq xmm xmm", ("1*p12", 1)), + ("vpmovsxbq mem xmm", ("1*p12", 1)), + ("vpmovsxbw xmm ymm", ("1*p0123", 1)), + ("vpmovsxbw mem ymm", ("1*p12", 1)), + ("vpmovsxbd xmm ymm", ("1*p0123", 1)), + ("vpmovsxbd mem ymm", ("1*p12", 1)), + ("vpmovsxbq xmm ymm", ("1*p0123", 1)), + ("vpmovsxbq mem ymm", ("1*p12", 1)), + # https://www.felixcloutier.com/x86/pmovzx + ("pmovzxbw xmm xmm", ("1*p12", 1)), + ("pmovzxbw mem xmm", ("1*p12", 1)), + ("vpmovzxbw xmm xmm", ("1*p12", 1)), + ("vpmovzxbw mem xmm", ("1*p12", 1)), + ("vpmovzxbw xmm ymm", ("1*p0123", 1)), + ("vpmovzxbw mem ymm", ("1*p12", 1)), + ################################################################# + # https://www.felixcloutier.com/x86/movbe + ("movbe gpr mem", ("1*p67", 5)), + ("movbe mem gpr", ("1*p67", 5)), + ################################################ + # https://www.felixcloutier.com/x86/movq2dq + ("movq2dq mm xmm", ("2*p0123", 1)), +] + + +p9 = MOVEntryBuilderIntelPort9() + +icx_mov_instructions = [ + # https://www.felixcloutier.com/x86/mov + ("mov gpr gpr", ("1*p0156", 1)), + ("mov gpr mem", ("", 0)), + ("mov mem gpr", ("", 0)), + ("mov imd gpr", ("1*p0156", 1)), + ("mov imd mem", ("", 0)), + ("movabs imd gpr", ("1*p0156", 1)), # AT&T version + # https://www.felixcloutier.com/x86/movapd + ("movapd xmm xmm", ("1*p015", 1)), + ("movapd xmm mem", ("", 0)), + ("movapd mem xmm", ("", 0)), + ("vmovapd xmm xmm", ("1*p015", 1)), + ("vmovapd xmm mem", ("", 0)), + ("vmovapd mem xmm", ("", 0)), + ("vmovapd ymm ymm", ("1*p015", 1)), + ("vmovapd ymm mem", ("", 0)), + ("vmovapd mem ymm", ("", 0)), + ("vmovapd zmm zmm", ("1*p05", 1)), + ("vmovapd zmm mem", ("", 0)), + ("vmovapd mem zmm", ("", 0)), + # https://www.felixcloutier.com/x86/movaps + ("movaps xmm xmm", ("1*p015", 1)), + ("movaps xmm mem", ("", 0)), + ("movaps mem xmm", ("", 0)), + ("vmovaps xmm xmm", ("1*p015", 1)), + ("vmovaps xmm mem", ("", 0)), + ("vmovaps mem xmm", ("", 0)), + ("vmovaps ymm ymm", ("1*p015", 1)), + ("vmovaps ymm mem", ("", 0)), + ("vmovaps mem ymm", ("", 0)), + ("vmovaps zmm zmm", ("1*p05", 1)), + ("vmovaps zmm mem", ("", 0)), + ("vmovaps mem zmm", ("", 0)), + # https://www.felixcloutier.com/x86/movd:movq + ("movd gpr mm", ("1*p5", 1)), + ("movd mem mm", ("", 0)), + ("movq gpr mm", ("1*p5", 1)), + ("movq mem mm", ("", 0)), + ("movd mm gpr", ("1*p0", 1)), + ("movd mm mem", ("", 0)), + ("movq mm gpr", ("1*p0", 1)), + ("movq mm mem", ("", 0)), + ("movd gpr xmm", ("1*p5", 1)), + ("movd mem xmm", ("", 0)), + ("movq gpr xmm", ("1*p5", 1)), + ("movq mem xmm", ("", 0)), + ("movd xmm gpr", ("1*p0", 1)), + ("movd xmm mem", ("", 0)), + ("movq xmm gpr", ("1*p0", 1)), + ("movq xmm mem", ("", 0)), + ("vmovd gpr xmm", ("1*p5", 1)), + ("vmovd mem xmm", ("", 0)), + ("vmovq gpr xmm", ("1*p5", 1)), + ("vmovq mem xmm", ("", 0)), + ("vmovd xmm gpr", ("1*p0", 1)), + ("vmovd xmm mem", ("", 0)), + ("vmovq xmm gpr", ("1*p0", 1)), + ("vmovq xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movddup + ("movddup xmm xmm", ("1*p5", 1)), + ("movddup mem xmm", ("", 0)), + ("vmovddup xmm xmm", ("1*p5", 1)), + ("vmovddup mem xmm", ("", 0)), + ("vmovddup ymm ymm", ("1*p5", 1)), + ("vmovddup mem ymm", ("", 0)), + ("vmovddup zmm zmm", ("1*p5", 1)), + ("vmovddup mem zmm", ("", 0)), + # https://www.felixcloutier.com/x86/movdq2q + ("movdq2q xmm mm", ("1*p015+1*p5", 1)), + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + ("movdqa xmm xmm", ("1*p015", 1)), + ("movdqa mem xmm", ("", 0)), + ("movdqa xmm mem", ("", 0)), + ("vmovdqa xmm xmm", ("1*p015", 1)), + ("vmovdqa mem xmm", ("", 0)), + ("vmovdqa xmm mem", ("", 0)), + ("vmovdqa ymm ymm", ("1*p015", 1)), + ("vmovdqa mem ymm", ("", 0)), + ("vmovdqa ymm mem", ("", 0)), + ("vmovdqa32 xmm xmm", ("1*p0156", 1)), + ("vmovdqa32 mem xmm", ("", 0)), + ("vmovdqa32 xmm mem", ("", 0)), + ("vmovdqa32 ymm ymm", ("1*p015", 1)), + ("vmovdqa32 mem ymm", ("", 0)), + ("vmovdqa32 ymm mem", ("", 0)), + ("vmovdqa32 zmm zmm", ("1*p05", 1)), + ("vmovdqa32 mem zmm", ("", 0)), + ("vmovdqa32 zmm mem", ("", 0)), + ("vmovdqa64 xmm xmm", ("1*p0156", 1)), + ("vmovdqa64 mem xmm", ("", 0)), + ("vmovdqa64 xmm mem", ("", 0)), + ("vmovdqa64 ymm ymm", ("1*p015", 1)), + ("vmovdqa64 mem ymm", ("", 0)), + ("vmovdqa64 ymm mem", ("", 0)), + ("vmovdqa64 zmm zmm", ("1*p05", 1)), + ("vmovdqa64 mem zmm", ("", 0)), + ("vmovdqa64 zmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + ("movdqu xmm xmm", ("1*p015", 1)), + ("movdqu mem xmm", ("", 0)), + ("movdqu xmm mem", ("", 0)), + ("vmovdqu xmm xmm", ("1*p015", 1)), + ("vmovdqu mem xmm", ("", 0)), + ("vmovdqu xmm mem", ("", 0)), + ("vmovdqu ymm ymm", ("1*p015", 1)), + ("vmovdqu mem ymm", ("", 0)), + ("vmovdqu ymm mem", ("", 0)), + ("vmovdqu8 xmm xmm", ("1*p0156", 1)), + ("vmovdqu8 mem xmm", ("", 0)), + ("vmovdqu8 xmm mem", ("", 0)), + ("vmovdqu8 ymm ymm", ("1*p015", 1)), + ("vmovdqu8 mem ymm", ("", 0)), + ("vmovdqu8 ymm mem", ("", 0)), + ("vmovdqu8 zmm zmm", ("1*p05", 1)), + ("vmovdqu8 mem zmm", ("", 0)), + ("vmovdqu8 zmm mem", ("", 0)), + ("vmovdqu16 xmm xmm", ("1*p0156", 1)), + ("vmovdqu16 mem xmm", ("", 0)), + ("vmovdqu16 xmm mem", ("", 0)), + ("vmovdqu16 ymm ymm", ("1*p015", 1)), + ("vmovdqu16 mem ymm", ("", 0)), + ("vmovdqu16 ymm mem", ("", 0)), + ("vmovdqu16 zmm zmm", ("1*p05", 1)), + ("vmovdqu16 mem zmm", ("", 0)), + ("vmovdqu16 zmm mem", ("", 0)), + ("vmovdqu32 xmm xmm", ("1*p0156", 1)), + ("vmovdqu32 mem xmm", ("", 0)), + ("vmovdqu32 xmm mem", ("", 0)), + ("vmovdqu32 ymm ymm", ("1*p015", 1)), + ("vmovdqu32 mem ymm", ("", 0)), + ("vmovdqu32 ymm mem", ("", 0)), + ("vmovdqu32 zmm zmm", ("1*p05", 1)), + ("vmovdqu32 mem zmm", ("", 0)), + ("vmovdqu32 zmm mem", ("", 0)), + ("vmovdqu64 xmm xmm", ("1*p0156", 1)), + ("vmovdqu64 mem xmm", ("", 0)), + ("vmovdqu64 xmm mem", ("", 0)), + ("vmovdqu64 ymm ymm", ("1*p015", 1)), + ("vmovdqu64 mem ymm", ("", 0)), + ("vmovdqu64 ymm mem", ("", 0)), + ("vmovdqu64 zmm zmm", ("1*p05", 1)), + ("vmovdqu64 mem zmm", ("", 0)), + ("vmovdqu64 zmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movhlps + ("movhlps xmm xmm", ("1*p5", 1)), + ("vmovhlps xmm xmm xmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/movhpd + ("movhpd mem xmm", ("1*p5", 1)), + ("vmovhpd mem xmm xmm", ("1*p5", 1)), + ("movhpd xmm mem", ("", 0)), + ("vmovhpd mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movhps + ("movhps mem xmm", ("1*p5", 1)), + ("vmovhps mem xmm xmm", ("1*p5", 1)), + ("movhps xmm mem", ("", 0)), + ("vmovhps mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movlhps + ("movlhps xmm xmm", ("1*p5", 1)), + ("vmovlhps xmm xmm xmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/movlpd + ("movlpd mem xmm", ("1*p5", 1)), + ("vmovlpd mem xmm xmm", ("1*p5", 1)), + ("movlpd xmm mem", ("", 0)), + ("vmovlpd mem xmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/movlps + ("movlps mem xmm", ("1*p5", 1)), + ("vmovlps mem xmm xmm", ("1*p5", 1)), + ("movlps xmm mem", ("", 0)), + ("vmovlps mem xmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/movmskpd + ("movmskpd xmm gpr", ("1*p0", 1)), + ("vmovmskpd xmm gpr", ("1*p0", 1)), + ("vmovmskpd ymm gpr", ("1*p0", 1)), + # https://www.felixcloutier.com/x86/movmskps + ("movmskps xmm gpr", ("1*p0", 1)), + ("vmovmskps xmm gpr", ("1*p0", 1)), + ("vmovmskps ymm gpr", ("1*p0", 1)), + # https://www.felixcloutier.com/x86/movntdq + ("movntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq ymm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq zmm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntdqa + ("movntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdqa mem xmm", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdqa mem ymm", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdqa mem zmm", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movnti + ("movnti gpr mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntpd + ("movntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd ymm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd zmm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntps + ("movntps xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps ymm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps zmm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntq + ("movntq mm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movq + ("movq mm mm", ("", 0)), + ("movq mem mm", ("", 0)), + ("movq mm mem", ("", 0)), + ("movq xmm xmm", ("1*p015", 1)), + ("movq mem xmm", ("", 0)), + ("movq xmm mem", ("", 0)), + ("vmovq xmm xmm", ("1*p015", 1)), + ("vmovq mem xmm", ("", 0)), + ("vmovq xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq + # TODO combined load-store is currently not supported + # ('movs mem mem', ()), + # https://www.felixcloutier.com/x86/movsd + ("movsd xmm xmm", ("1*p015", 1)), + ("movsd mem xmm", ("", 0)), + ("movsd xmm mem", ("", 0)), + ("vmovsd xmm xmm xmm", ("1*p015", 1)), + ("vmovsd mem xmm", ("", 0)), + ("vmovsd xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movshdup + ("movshdup xmm xmm", ("1*p15", 1)), + ("movshdup mem xmm", ("", 0)), + ("vmovshdup xmm xmm", ("1*p15", 1)), + ("vmovshdup mem xmm", ("", 0)), + ("vmovshdup ymm ymm", ("1*p15", 1)), + ("vmovshdup mem ymm", ("", 0)), + ("vmovshdup zmm zmm", ("1*p5", 1)), + ("vmovshdup mem zmm", ("", 0)), + # https://www.felixcloutier.com/x86/movsldup + ("movsldup xmm xmm", ("1*p15", 1)), + ("movsldup mem xmm", ("", 0)), + ("vmovsldup xmm xmm", ("1*p15", 1)), + ("vmovsldup mem xmm", ("", 0)), + ("vmovsldup ymm ymm", ("1*p15", 1)), + ("vmovsldup mem ymm", ("", 0)), + ("vmovsldup zmm zmm", ("1*p5", 1)), + ("vmovsldup mem zmm", ("", 0)), + # https://www.felixcloutier.com/x86/movss + ("movss xmm xmm", ("1*p015", 1)), + ("movss mem xmm", ("", 0)), + ("vmovss xmm xmm xmm", ("1*p015", 1)), + ("vmovss mem xmm", ("", 0)), + ("vmovss xmm xmm", ("1*p015", 1)), + ("vmovss xmm mem", ("", 0)), + ("movss mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movsx:movsxd + ("movsx gpr gpr", ("1*p0156", 1)), + ("movsx mem gpr", ("", 0)), + ("movsxd gpr gpr", ("", 0)), + ("movsxd mem gpr", ("", 0)), + ("movsb gpr gpr", ("1*p0156", 1)), # AT&T version + ("movsb mem gpr", ("", 0)), # AT&T version + ("movsw gpr gpr", ("1*p0156", 1)), # AT&T version + ("movsw mem gpr", ("", 0)), # AT&T version + ("movsl gpr gpr", ("1*p0156", 1)), # AT&T version + ("movsl mem gpr", ("", 0)), # AT&T version + ("movsq gpr gpr", ("1*p0156", 1)), # AT&T version + ("movsq mem gpr", ("", 0)), # AT&T version + # https://www.felixcloutier.com/x86/movupd + ("movupd xmm xmm", ("1*p015", 1)), + ("movupd mem xmm", ("", 0)), + ("movupd xmm mem", ("", 0)), + ("vmovupd xmm xmm", ("1*p015", 1)), + ("vmovupd mem xmm", ("", 0)), + ("vmovupd xmm mem", ("", 0)), + ("vmovupd ymm ymm", ("1*p015", 1)), + ("vmovupd mem ymm", ("", 0)), + ("vmovupd ymm mem", ("", 0)), + ("vmovupd zmm zmm", ("1*p05", 1)), + ("vmovupd mem zmm", ("", 0)), + ("vmovupd zmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movups + ("movups xmm xmm", ("1*p015", 1)), + ("movups mem xmm", ("", 0)), + ("movups xmm mem", ("", 0)), + ("vmovups xmm xmm", ("1*p015", 1)), + ("vmovups mem xmm", ("", 0)), + ("vmovups xmm mem", ("", 0)), + ("vmovups ymm ymm", ("1*p015", 1)), + ("vmovups mem ymm", ("", 0)), + ("vmovups ymm mem", ("", 0)), + ("vmovups zmm zmm", ("1*p05", 1)), + ("vmovups mem zmm", ("", 0)), + ("vmovups zmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movzx + ("movzx gpr gpr", ("1*p0156", 1)), + ("movzx mem gpr", ("", 0)), + ("movzb gpr gpr", ("1*p0156", 1)), # AT&T version + ("movzb mem gpr", ("", 0)), # AT&T version + ("movzw gpr gpr", ("1*p0156", 1)), # AT&T version + ("movzw mem gpr", ("", 0)), # AT&T version + ("movzl gpr gpr", ("1*p0156", 1)), # AT&T version + ("movzl mem gpr", ("", 0)), # AT&T version + ("movzq gpr gpr", ("1*p0156", 1)), # AT&T version + ("movzq mem gpr", ("", 0)), # AT&T version + # https://www.felixcloutier.com/x86/cmovcc + ("cmova gpr gpr", ("2*p06", 1)), + ("cmova mem gpr", ("", 0)), + ("cmovae gpr gpr", ("1*p06", 1)), + ("cmovae mem gpr", ("", 0)), + ("cmovb gpr gpr", ("2*p06", 1)), + ("cmovb mem gpr", ("", 0)), + ("cmovbe gpr gpr", ("2*p06", 1)), + ("cmovbe mem gpr", ("", 0)), + ("cmovc gpr gpr", ("1*p06", 1)), + ("cmovc mem gpr", ("", 0)), + ("cmove gpr gpr", ("1*p06", 1)), + ("cmove mem gpr", ("", 0)), + ("cmovg gpr gpr", ("1*p06", 1)), + ("cmovg mem gpr", ("", 0)), + ("cmovge gpr gpr", ("1*p06", 1)), + ("cmovge mem gpr", ("", 0)), + ("cmovl gpr gpr", ("1*p06", 1)), + ("cmovl mem gpr", ("", 0)), + ("cmovle gpr gpr", ("1*p06", 1)), + ("cmovle mem gpr", ("", 0)), + ("cmovna gpr gpr", ("2*p06", 1)), + ("cmovna mem gpr", ("", 0)), + ("cmovnae gpr gpr", ("1*p06", 1)), + ("cmovnae mem gpr", ("", 0)), + ("cmovnb gpr gpr", ("1*p06", 1)), + ("cmovnb mem gpr", ("", 0)), + ("cmovnbe gpr gpr", ("2*p06", 1)), + ("cmovnbe mem gpr", ("", 0)), + ("cmovnc gpr gpr", ("1*p06", 1)), + ("cmovnc mem gpr", ("", 0)), + ("cmovne gpr gpr", ("1*p06", 1)), + ("cmovne mem gpr", ("", 0)), + ("cmovng gpr gpr", ("1*p06", 1)), + ("cmovng mem gpr", ("", 0)), + ("cmovnge gpr gpr", ("1*p06", 1)), + ("cmovnge mem gpr", ("", 0)), + ("cmovnl gpr gpr", ("1*p06", 1)), + ("cmovnl mem gpr", ("", 0)), + ("cmovno gpr gpr", ("1*p06", 1)), + ("cmovno mem gpr", ("", 0)), + ("cmovnp gpr gpr", ("1*p06", 1)), + ("cmovnp mem gpr", ("", 0)), + ("cmovns gpr gpr", ("1*p06", 1)), + ("cmovns mem gpr", ("", 0)), + ("cmovnz gpr gpr", ("1*p06", 1)), + ("cmovnz mem gpr", ("", 0)), + ("cmovo gpr gpr", ("1*p06", 1)), + ("cmovo mem gpr", ("", 0)), + ("cmovp gpr gpr", ("1*p06", 1)), + ("cmovp mem gpr", ("", 0)), + ("cmovpe gpr gpr", ("1*p06", 1)), + ("cmovpe mem gpr", ("", 0)), + ("cmovpo gpr gpr", ("1*p06", 1)), + ("cmovpo mem gpr", ("", 0)), + ("cmovs gpr gpr", ("1*p06", 1)), + ("cmovs mem gpr", ("", 0)), + ("cmovz gpr gpr", ("1*p06", 1)), + ("cmovz mem gpr", ("", 0)), + # https://www.felixcloutier.com/x86/pmovmskb + ("pmovmskb mm gpr", ("1*p0", 1)), + ("pmovmskb xmm gpr", ("1*p0", 1)), + ("vpmovmskb xmm gpr", ("1*p0", 1)), + # https://www.felixcloutier.com/x86/pmovsx + ("pmovsxbw xmm xmm", ("1*p15", 1)), + ("pmovsxbw mem xmm", ("1*p15", 1)), + ("pmovsxbd xmm xmm", ("1*p15", 1)), + ("pmovsxbd mem xmm", ("1*p15", 1)), + ("pmovsxbq xmm xmm", ("1*p15", 1)), + ("pmovsxbq mem xmm", ("1*p15", 1)), + ("vpmovsxbw xmm xmm", ("1*p15", 1)), + ("vpmovsxbw mem xmm", ("1*p15", 1)), + ("vpmovsxbd xmm xmm", ("1*p15", 1)), + ("vpmovsxbd mem xmm", ("1*p15", 1)), + ("vpmovsxbq xmm xmm", ("1*p15", 1)), + ("vpmovsxbq mem xmm", ("1*p15", 1)), + ("vpmovsxbw xmm ymm", ("1*p5", 1)), + ("vpmovsxbw mem ymm", ("1*p5", 1)), + ("vpmovsxbd xmm ymm", ("1*p5", 1)), + ("vpmovsxbd mem ymm", ("1*p5", 1)), + ("vpmovsxbq xmm ymm", ("1*p5", 1)), + ("vpmovsxbq mem ymm", ("1*p5", 1)), + ("vpmovsxbw ymm zmm", ("1*p5", 3)), + ("vpmovsxbw mem zmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/pmovzx + ("pmovzxbw xmm xmm", ("1*p15", 1)), + ("pmovzxbw mem xmm", ("1*p15", 1)), + ("vpmovzxbw xmm xmm", ("1*p15", 1)), + ("vpmovzxbw mem xmm", ("1*p15", 1)), + ("vpmovzxbw xmm ymm", ("1*p5", 1)), + ("vpmovzxbw mem ymm", ("1*p5", 1)), + ("vpmovzxbw ymm zmm", ("1*p5", 1)), + ("vpmovzxbw mem zmm", ("1*p5", 1)), + ################################################################# + # https://www.felixcloutier.com/x86/movbe + ("movbe gpr mem", ("1*p15", 6)), + ("movbe mem gpr", ("1*p15", 6)), + ################################################ + # https://www.felixcloutier.com/x86/movapd + # TODO with masking! + # https://www.felixcloutier.com/x86/movaps + # TODO with masking! + # https://www.felixcloutier.com/x86/movddup + # TODO with masking! + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + # TODO with masking! + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + # TODO with masking! + # https://www.felixcloutier.com/x86/movq2dq + ("movq2dq mm xmm", ("1*p0+1*p015", 1)), + # https://www.felixcloutier.com/x86/movsd + # TODO with masking! + # https://www.felixcloutier.com/x86/movshdup + # TODO with masking! + # https://www.felixcloutier.com/x86/movsldup + # TODO with masking! + # https://www.felixcloutier.com/x86/movss + # TODO with masking! + # https://www.felixcloutier.com/x86/movupd + # TODO with masking! + # https://www.felixcloutier.com/x86/movups + # TODO with masking! + # https://www.felixcloutier.com/x86/pmovsx + # TODO with masking! +] + + +class MOVEntryBuilderIntelWithPort7AGU(MOVEntryBuilder): + # for HSW, BDW, SKX and CSX + + def build_description(self, instruction_name, operand_types, port_pressure=[], latency=0): + load, store, vec = self.classify(operand_types) + + if load: + port_pressure += [[1, "23"], [1, ["2D", "3D"]]] + latency += 4 + comment = "with load" + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency, comment + ) + if store: + port_pressure_simple = port_pressure + [[1, "237"], [1, "4"]] + operands_simple = ["mem_simple" if o == "mem" else o for o in operand_types] + port_pressure_complex = port_pressure + [[1, "23"], [1, "4"]] + operands_complex = ["mem_complex" if o == "mem" else o for o in operand_types] + latency += 0 + return ( + MOVEntryBuilder.build_description( + self, + instruction_name, + operands_simple, + port_pressure_simple, + latency, + "with store, simple AGU", + ) + + "\n" + + MOVEntryBuilder.build_description( + self, + instruction_name, + operands_complex, + port_pressure_complex, + latency, + "with store, complex AGU", + ) + ) + + # Register only: + return MOVEntryBuilder.build_description( + self, instruction_name, operand_types, port_pressure, latency + ) + + +np7 = MOVEntryBuilderIntelNoPort7AGU() +p7 = MOVEntryBuilderIntelWithPort7AGU() + +# SNB +snb_mov_instructions = [ + # https://www.felixcloutier.com/x86/mov + ("mov gpr gpr", ("1*p015", 1)), + ("mov gpr mem", ("", 0)), + ("mov mem gpr", ("", 0)), + ("mov imd gpr", ("1*p015", 1)), + ("mov imd mem", ("", 0)), + ("movabs imd gpr", ("1*p015", 1)), # AT&T version + # https://www.felixcloutier.com/x86/movapd + ("movapd xmm xmm", ("1*p5", 1)), + ("movapd xmm mem", ("", 0)), + ("movapd mem xmm", ("", 0)), + ("vmovapd xmm xmm", ("1*p5", 1)), + ("vmovapd xmm mem", ("", 0)), + ("vmovapd mem xmm", ("", 0)), + ("vmovapd ymm ymm", ("1*p5", 1)), + ("vmovapd ymm mem", ("", 0)), + ("vmovapd mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movaps + ("movaps xmm xmm", ("1*p5", 1)), + ("movaps xmm mem", ("", 0)), + ("movaps mem xmm", ("", 0)), + ("vmovaps xmm xmm", ("1*p5", 1)), + ("movaps xmm mem", ("", 0)), + ("movaps mem xmm", ("", 0)), + ("vmovaps ymm ymm", ("1*p5", 1)), + ("movaps ymm mem", ("", 0)), + ("movaps mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movd:movq + ("movd gpr mm", ("1*p5", 1)), + ("movd mem mm", ("", 0)), + ("movq gpr mm", ("1*p5", 1)), + ("movq mem mm", ("", 0)), + ("movd mm gpr", ("1*p0", 1)), + ("movd mm mem", ("", 0)), + ("movq mm gpr", ("1*p0", 1)), + ("movq mm mem", ("", 0)), + ("movd gpr xmm", ("1*p5", 1)), + ("movd mem xmm", ("", 0)), + ("movq gpr xmm", ("1*p5", 1)), + ("movq mem xmm", ("", 0)), + ("movd xmm gpr", ("1*p0", 1)), + ("movd xmm mem", ("", 0)), + ("movq xmm gpr", ("1*p0", 1)), + ("movq xmm mem", ("", 0)), + ("vmovd gpr xmm", ("1*p5", 1)), + ("vmovd mem xmm", ("", 0)), + ("vmovq gpr xmm", ("1*p5", 1)), + ("vmovq mem xmm", ("", 0)), + ("vmovd xmm gpr", ("1*p0", 1)), + ("vmovd xmm mem", ("", 0)), + ("vmovq xmm gpr", ("1*p0", 1)), + ("vmovq xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movddup + ("movddup xmm xmm", ("1*p5", 1)), + ("movddup mem xmm", ("", 0)), + ("vmovddup xmm xmm", ("1*p5", 1)), + ("vmovddup mem xmm", ("", 0)), + ("vmovddup ymm ymm", ("1*p5", 1)), + ("vmovddup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movdq2q + ("movdq2q xmm mm", ("1*p015+1*p5", 1)), + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + ("movdqa xmm xmm", ("1*p015", 1)), + ("movdqa mem xmm", ("", 0)), + ("movdqa xmm mem", ("", 0)), + ("vmovdqa xmm xmm", ("1*p015", 1)), + ("vmovdqa mem xmm", ("", 0)), + ("vmovdqa xmm mem", ("", 0)), + ("vmovdqa ymm ymm", ("1*p05", 1)), + ("vmovdqa mem ymm", ("", 0)), + ("vmovdqa ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + ("movdqu xmm xmm", ("1*p015", 1)), + ("movdqu mem xmm", ("", 0)), + ("movdqu xmm mem", ("", 0)), + ("vmovdqu xmm xmm", ("1*p015", 1)), + ("vmovdqu mem xmm", ("", 0)), + ("vmovdqu xmm mem", ("", 0)), + ("vmovdqu ymm ymm", ("1*p05", 1)), + ("vmovdqu mem ymm", ("", 0)), + ("vmovdqu ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movhlps + ("movhlps xmm xmm", ("1*p5", 1)), + ("vmovhlps xmm xmm xmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/movhpd + ("movhpd mem xmm", ("1*p5", 1)), + ("vmovhpd mem xmm xmm", ("1*p5", 1)), + ("movhpd xmm mem", ("", 0)), + ("vmovhpd mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movhps + ("movhps mem xmm", ("1*p5", 1)), + ("vmovhps mem xmm xmm", ("1*p5", 1)), + ("movhps xmm mem", ("", 0)), + ("vmovhps mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movlhps + ("movlhps xmm xmm", ("1*p5", 1)), + ("vmovlhps xmm xmm xmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/movlpd + ("movlpd mem xmm", ("1*p5", 1)), + ("vmovlpd mem xmm xmm", ("1*p5", 1)), + ("movlpd xmm mem", ("", 0)), + ("vmovlpd mem xmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/movlps + ("movlps mem xmm", ("1*p5", 1)), + ("vmovlps mem xmm xmm", ("1*p5", 1)), + ("movlps xmm mem", ("", 0)), + ("vmovlps mem xmm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/movmskpd + ("movmskpd xmm gpr", ("1*p0", 2)), + ("vmovmskpd xmm gpr", ("1*p0", 2)), + ("vmovmskpd ymm gpr", ("1*p0", 2)), + # https://www.felixcloutier.com/x86/movmskps + ("movmskps xmm gpr", ("1*p0", 1)), + ("vmovmskps xmm gpr", ("1*p0", 1)), + ("vmovmskps ymm gpr", ("1*p0", 1)), + # https://www.felixcloutier.com/x86/movntdq + ("movntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntdq ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntdqa + ("movntdqa mem xmm", ("", 0)), + ("vmovntdqa mem xmm", ("", 0)), + ("vmovntdqa mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movnti + ("movnti gpr mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntpd + ("movntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntpd ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntps + ("movntps xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps xmm mem", ("", 0)), # TODO NT-store: what latency to use? + ("vmovntps ymm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntq + ("movntq mm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movq + ("movq mm mm", ("", 0)), + ("movq mem mm", ("", 0)), + ("movq mm mem", ("", 0)), + ("movq xmm xmm", ("1*p015", 1)), + ("movq mem xmm", ("", 0)), + ("movq xmm mem", ("", 0)), + ("vmovq xmm xmm", ("1*p015", 1)), + ("vmovq mem xmm", ("", 0)), + ("vmovq xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movq2dq + ("movq2dq mm xmm", ("1*p015", 1)), + # https://www.felixcloutier.com/x86/movs:movsb:movsw:movsd:movsq + # TODO combined load-store is currently not supported + # ('movs mem mem', ()), + # https://www.felixcloutier.com/x86/movsd + ("movsd xmm xmm", ("1*p5", 1)), + ("movsd mem xmm", ("", 0)), + ("movsd xmm mem", ("", 0)), + ("vmovsd xmm xmm xmm", ("1*p5", 1)), + ("vmovsd mem xmm", ("", 0)), + ("vmovsd xmm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movshdup + ("movshdup xmm xmm", ("1*p5", 1)), + ("movshdup mem xmm", ("", 0)), + ("vmovshdup xmm xmm", ("1*p5", 1)), + ("vmovshdup mem xmm", ("", 0)), + ("vmovshdup ymm ymm", ("1*p5", 1)), + ("vmovshdup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movsldup + ("movsldup xmm xmm", ("1*p5", 1)), + ("movsldup mem xmm", ("", 0)), + ("vmovsldup xmm xmm", ("1*p5", 1)), + ("vmovsldup mem xmm", ("", 0)), + ("vmovsldup ymm ymm", ("1*p5", 1)), + ("vmovsldup mem ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movss + ("movss xmm xmm", ("1*p5", 1)), + ("movss mem xmm", ("", 0)), + ("vmovss xmm xmm xmm", ("1*p5", 1)), + ("vmovss mem xmm", ("", 0)), + ("vmovss xmm mem", ("", 0)), + ("movss mem xmm", ("", 0)), + # https://www.felixcloutier.com/x86/movsx:movsxd + ("movsx gpr gpr", ("1*p015", 1)), + ("movsx mem gpr", ("", 0)), + ("movsxd gpr gpr", ("", 0)), + ("movsxd mem gpr", ("", 0)), + ("movsb gpr gpr", ("1*p015", 1)), # AT&T version + ("movsb mem gpr", ("", 0)), # AT&T version + ("movsw gpr gpr", ("1*p015", 1)), # AT&T version + ("movsw mem gpr", ("", 0)), # AT&T version + ("movsl gpr gpr", ("1*p015", 1)), # AT&T version + ("movsl mem gpr", ("", 0)), # AT&T version + ("movsq gpr gpr", ("1*p015", 1)), # AT&T version + ("movsq mem gpr", ("", 0)), # AT&T version + # https://www.felixcloutier.com/x86/movupd + ("movupd xmm xmm", ("1*p5", 1)), + ("movupd mem xmm", ("", 0)), + ("movupd xmm mem", ("", 0)), + ("vmovupd xmm xmm", ("1*p5", 1)), + ("vmovupd mem xmm", ("", 0)), + ("vmovupd xmm mem", ("", 0)), + ("vmovupd ymm ymm", ("1*p5", 1)), + ("vmovupd mem ymm", ("", 0)), + ("vmovupd ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movups + ("movups xmm xmm", ("1*p5", 1)), + ("movups mem xmm", ("", 0)), + ("movups xmm mem", ("", 0)), + ("vmovups xmm xmm", ("1*p5", 1)), + ("vmovups mem xmm", ("", 0)), + ("vmovups xmm mem", ("", 0)), + ("vmovups ymm ymm", ("1*p5", 1)), + ("vmovups mem ymm", ("", 0)), + ("vmovups ymm mem", ("", 0)), + # https://www.felixcloutier.com/x86/movzx + ("movzx gpr gpr", ("1*p015", 1)), + ("movzx mem gpr", ("", 0)), + ("movzb gpr gpr", ("1*p015", 1)), # AT&T version + ("movzb mem gpr", ("", 0)), # AT&T version + ("movzw gpr gpr", ("1*p015", 1)), # AT&T version + ("movzw mem gpr", ("", 0)), # AT&T version + ("movzl gpr gpr", ("1*p015", 1)), # AT&T version + ("movzl mem gpr", ("", 0)), # AT&T version + ("movzq gpr gpr", ("1*p015", 1)), # AT&T version + ("movzq mem gpr", ("", 0)), # AT&T version + # https://www.felixcloutier.com/x86/cmovcc + ("cmova gpr gpr", ("1*p015+2*p05", 2)), + ("cmova mem gpr", ("1*p015+2*p05", 2)), + ("cmovae gpr gpr", ("1*p015+1*p05", 2)), + ("cmovae mem gpr", ("1*p015+2*p05", 2)), + ("cmovb gpr gpr", ("1*p015+2*p05", 2)), + ("cmovb mem gpr", ("1*p015+1*p05", 2)), + ("cmovbe gpr gpr", ("1*p015+2*p05", 2)), + ("cmovbe mem gpr", ("1*p015+2*p05", 2)), + ("cmovc gpr gpr", ("1*p015+1*p05", 2)), + ("cmovc mem gpr", ("1*p015+1*p05", 2)), + ("cmove gpr gpr", ("1*p015+1*p05", 2)), + ("cmove mem gpr", ("1*p015+1*p05", 2)), + ("cmovg gpr gpr", ("1*p015+1*p05", 2)), + ("cmovg mem gpr", ("1*p015+1*p05", 2)), + ("cmovge gpr gpr", ("1*p015+1*p05", 2)), + ("cmovge mem gpr", ("1*p015+1*p05", 2)), + ("cmovl gpr gpr", ("1*p015+1*p05", 2)), + ("cmovl mem gpr", ("1*p015+1*p05", 2)), + ("cmovle gpr gpr", ("1*p015+1*p05", 2)), + ("cmovle mem gpr", ("1*p015+1*p05", 2)), + ("cmovna gpr gpr", ("1*p015+2*p05", 2)), + ("cmovna mem gpr", ("1*p015+2*p05", 2)), + ("cmovnae gpr gpr", ("1*p015+1*p05", 2)), + ("cmovnae mem gpr", ("1*p015+1*p05", 2)), + ("cmovnb gpr gpr", ("1*p015+1*p05", 2)), + ("cmovnb mem gpr", ("1*p015+1*p05", 2)), + ("cmovnbe gpr gpr", ("1*p015+2*p05", 2)), + ("cmovnbe mem gpr", ("1*p015+2*p05", 2)), + ("cmovnb gpr gpr", ("1*p015+1*p05", 2)), + ("cmovnb mem gpr", ("1*p015+1*p05", 2)), + ("cmovnc gpr gpr", ("1*p015+1*p05", 2)), + ("cmovnc mem gpr", ("1*p015+1*p05", 2)), + ("cmovne gpr gpr", ("1*p015+1*p05", 2)), + ("cmovne mem gpr", ("1*p015+1*p05", 2)), + ("cmovng gpr gpr", ("1*p015+1*p05", 2)), + ("cmovng mem gpr", ("1*p015+1*p05", 2)), + ("cmovnge gpr gpr", ("1*p015+1*p05", 2)), + ("cmovnge mem gpr", ("1*p015+1*p05", 2)), + ("cmovnl gpr gpr", ("1*p015+1*p05", 2)), + ("cmovnl mem gpr", ("1*p015+1*p05", 2)), + ("cmovno gpr gpr", ("1*p015+1*p05", 2)), + ("cmovno mem gpr", ("1*p015+1*p05", 2)), + ("cmovnp gpr gpr", ("1*p015+1*p05", 2)), + ("cmovnp mem gpr", ("1*p015+1*p05", 2)), + ("cmovns gpr gpr", ("1*p015+1*p05", 2)), + ("cmovns mem gpr", ("1*p015+1*p05", 2)), + ("cmovnz gpr gpr", ("1*p015+1*p05", 2)), + ("cmovnz mem gpr", ("1*p015+1*p05", 2)), + ("cmovo gpr gpr", ("1*p015+1*p05", 2)), + ("cmovo mem gpr", ("1*p015+1*p05", 2)), + ("cmovp gpr gpr", ("1*p015+1*p05", 2)), + ("cmovp mem gpr", ("1*p015+1*p05", 2)), + ("cmovpe gpr gpr", ("1*p015+1*p05", 2)), + ("cmovpe mem gpr", ("1*p015+1*p05", 2)), + ("cmovpo gpr gpr", ("1*p015+1*p05", 2)), + ("cmovpo mem gpr", ("1*p015+1*p05", 2)), + ("cmovs gpr gpr", ("1*p015+1*p05", 2)), + ("cmovs mem gpr", ("1*p015+1*p05", 2)), + ("cmovz gpr gpr", ("1*p015+1*p05", 2)), + ("cmovz mem gpr", ("1*p015+1*p05", 2)), + # https://www.felixcloutier.com/x86/pmovmskb + ("pmovmskb mm gpr", ("1*p0", 2)), + ("pmovmskb xmm gpr", ("1*p0", 2)), + ("vpmovmskb xmm gpr", ("1*p0", 2)), + # https://www.felixcloutier.com/x86/pmovsx + ("pmovsxbw xmm xmm", ("1*p15", 1)), + ("pmovsxbw mem xmm", ("1*p15", 1)), + ("pmovsxbd xmm xmm", ("1*p15", 1)), + ("pmovsxbd mem xmm", ("1*p15", 1)), + ("pmovsxbq xmm xmm", ("1*p15", 1)), + ("pmovsxbq mem xmm", ("1*p15", 1)), + ("vpmovsxbw xmm xmm", ("1*p15", 1)), + ("vpmovsxbw mem xmm", ("1*p15", 1)), + ("vpmovsxbd xmm xmm", ("1*p15", 1)), + ("vpmovsxbd mem xmm", ("1*p15", 1)), + ("vpmovsxbq xmm xmm", ("1*p15", 1)), + ("vpmovsxbq mem xmm", ("1*p15", 1)), + ("vpmovsxbw xmm ymm", ("1*p15", 1)), + ("vpmovsxbw mem ymm", ("1*p15", 1)), + ("vpmovsxbd xmm ymm", ("1*p15", 1)), + ("vpmovsxbd mem ymm", ("1*p15", 1)), + ("vpmovsxbq xmm ymm", ("1*p15", 1)), + ("vpmovsxbq mem ymm", ("1*p15", 1)), + # https://www.felixcloutier.com/x86/pmovzx + ("pmovzxbw xmm xmm", ("1*p15", 1)), + ("pmovzxbw mem xmm", ("1*p15", 1)), + ("vpmovzxbw xmm xmm", ("1*p15", 1)), + ("vpmovzxbw mem xmm", ("1*p15", 1)), + ("vpmovzxbw ymm ymm", ("1*p15", 1)), + ("vpmovzxbw mem ymm", ("1*p15", 1)), +] + +ivb_mov_instructions = list( + OrderedDict( + snb_mov_instructions + + [ + # https://www.felixcloutier.com/x86/mov + ("mov gpr gpr", ("", 0)), + ("mov imd gpr", ("", 0)), + # https://www.felixcloutier.com/x86/movapd + ("movapd xmm xmm", ("", 0)), + ("vmovapd xmm xmm", ("", 0)), + ("vmovapd ymm ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movaps + ("movaps xmm xmm", ("", 0)), + ("vmovaps xmm xmm", ("", 0)), + ("vmovaps ymm ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + ("movdqa xmm xmm", ("", 0)), + ("vmovdqa xmm xmm", ("", 0)), + ("vmovdqa ymm ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + ("movdqu xmm xmm", ("", 0)), + ("vmovdqu xmm xmm", ("", 0)), + ("vmovdqu ymm ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movupd + ("movupd xmm xmm", ("", 0)), + ("vmovupd xmm xmm", ("", 0)), + ("vmovupd ymm ymm", ("", 0)), + # https://www.felixcloutier.com/x86/movupd + ("movups xmm xmm", ("", 0)), + ("vmovups xmm xmm", ("", 0)), + ("vmovups ymm ymm", ("", 0)), + ] + ).items() +) + +hsw_mov_instructions = list( + OrderedDict( + ivb_mov_instructions + + [ + # https://www.felixcloutier.com/x86/mov + ("mov imd gpr", ("1*p0156", 1)), + ("mov gpr gpr", ("1*p0156", 1)), + ("movabs imd gpr", ("1*p0156", 1)), # AT&T version + # https://www.felixcloutier.com/x86/movbe + ("movbe gpr mem", ("1*p15", 6)), + ("movbe mem gpr", ("1*p15", 6)), + # https://www.felixcloutier.com/x86/movmskpd + ("movmskpd xmm gpr", ("1*p0", 3)), + ("vmovmskpd xmm gpr", ("1*p0", 3)), + ("vmovmskpd ymm gpr", ("1*p0", 3)), + # https://www.felixcloutier.com/x86/movmskps + ("movmskps xmm gpr", ("1*p0", 3)), + ("vmovmskps xmm gpr", ("1*p0", 3)), + ("vmovmskps ymm gpr", ("1*p0", 3)), + # https://www.felixcloutier.com/x86/movsx:movsxd + ("movsx gpr gpr", ("1*p0156", 1)), + ("movsb gpr gpr", ("1*p0156", 1)), # AT&T version + ("movsw gpr gpr", ("1*p0156", 1)), # AT&T version + ("movsl gpr gpr", ("1*p0156", 1)), # AT&T version + ("movsq gpr gpr", ("1*p0156", 1)), # AT&T version + # https://www.felixcloutier.com/x86/movzx + ("movzx gpr gpr", ("1*p0156", 1)), + ("movzb gpr gpr", ("1*p0156", 1)), # AT&T version + ("movzw gpr gpr", ("1*p0156", 1)), # AT&T version + ("movzl gpr gpr", ("1*p0156", 1)), # AT&T version + ("movzq gpr gpr", ("1*p0156", 1)), # AT&T version + # https://www.felixcloutier.com/x86/cmovcc + ("cmova gpr gpr", ("1*p0156+2*p06", 2)), + ("cmova mem gpr", ("1*p0156+2*p06", 2)), + ("cmovae gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovae mem gpr", ("1*p0156+2*p06", 2)), + ("cmovb gpr gpr", ("1*p0156+2*p06", 2)), + ("cmovb mem gpr", ("1*p0156+1*p06", 2)), + ("cmovbe gpr gpr", ("1*p0156+2*p06", 2)), + ("cmovbe mem gpr", ("1*p0156+2*p06", 2)), + ("cmovc gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovc mem gpr", ("1*p0156+1*p06", 2)), + ("cmove gpr gpr", ("1*p0156+1*p06", 2)), + ("cmove mem gpr", ("1*p0156+1*p06", 2)), + ("cmovg gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovg mem gpr", ("1*p0156+1*p06", 2)), + ("cmovge gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovge mem gpr", ("1*p0156+1*p06", 2)), + ("cmovl gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovl mem gpr", ("1*p0156+1*p06", 2)), + ("cmovle gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovle mem gpr", ("1*p0156+1*p06", 2)), + ("cmovna gpr gpr", ("1*p0156+2*p06", 2)), + ("cmovna mem gpr", ("1*p0156+2*p06", 2)), + ("cmovnae gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovnae mem gpr", ("1*p0156+1*p06", 2)), + ("cmovnb gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovnb mem gpr", ("1*p0156+1*p06", 2)), + ("cmovnbe gpr gpr", ("1*p0156+2*p06", 2)), + ("cmovnbe mem gpr", ("1*p0156+2*p06", 2)), + ("cmovnb gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovnb mem gpr", ("1*p0156+1*p06", 2)), + ("cmovnc gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovnc mem gpr", ("1*p0156+1*p06", 2)), + ("cmovne gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovne mem gpr", ("1*p0156+1*p06", 2)), + ("cmovng gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovng mem gpr", ("1*p0156+1*p06", 2)), + ("cmovnge gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovnge mem gpr", ("1*p0156+1*p06", 2)), + ("cmovnl gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovnl mem gpr", ("1*p0156+1*p06", 2)), + ("cmovno gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovno mem gpr", ("1*p0156+1*p06", 2)), + ("cmovnp gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovnp mem gpr", ("1*p0156+1*p06", 2)), + ("cmovns gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovns mem gpr", ("1*p0156+1*p06", 2)), + ("cmovnz gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovnz mem gpr", ("1*p0156+1*p06", 2)), + ("cmovo gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovo mem gpr", ("1*p0156+1*p06", 2)), + ("cmovp gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovp mem gpr", ("1*p0156+1*p06", 2)), + ("cmovpe gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovpe mem gpr", ("1*p0156+1*p06", 2)), + ("cmovpo gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovpo mem gpr", ("1*p0156+1*p06", 2)), + ("cmovs gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovs mem gpr", ("1*p0156+1*p06", 2)), + ("cmovz gpr gpr", ("1*p0156+1*p06", 2)), + ("cmovz mem gpr", ("1*p0156+1*p06", 2)), + # https://www.felixcloutier.com/x86/pmovmskb + ("pmovmskb mm gpr", ("1*p0", 3)), + ("pmovmskb xmm gpr", ("1*p0", 3)), + ("vpmovmskb xmm gpr", ("1*p0", 3)), + ("vpmovmskb ymm gpr", ("1*p0", 3)), + # https://www.felixcloutier.com/x86/pmovsx + ("pmovsxbw xmm xmm", ("1*p5", 1)), + ("pmovsxbw mem xmm", ("1*p5", 1)), + ("pmovsxbd xmm xmm", ("1*p5", 1)), + ("pmovsxbd mem xmm", ("1*p5", 1)), + ("pmovsxbq xmm xmm", ("1*p5", 1)), + ("pmovsxbq mem xmm", ("1*p5", 1)), + ("vpmovsxbw xmm xmm", ("1*p5", 1)), + ("vpmovsxbw mem xmm", ("1*p5", 1)), + ("vpmovsxbd xmm xmm", ("1*p5", 1)), + ("vpmovsxbd mem xmm", ("1*p5", 1)), + ("vpmovsxbq xmm xmm", ("1*p5", 1)), + ("vpmovsxbq mem xmm", ("1*p5", 1)), + ("vpmovsxbw xmm ymm", ("1*p5", 1)), + ("vpmovsxbw mem ymm", ("1*p5", 1)), + ("vpmovsxbd xmm ymm", ("1*p5", 1)), + ("vpmovsxbd mem ymm", ("1*p5", 1)), + ("vpmovsxbq xmm ymm", ("1*p5", 1)), + ("vpmovsxbq mem ymm", ("1*p5", 1)), + # https://www.felixcloutier.com/x86/pmovzx + ("pmovzxbw xmm xmm", ("1*p5", 1)), + ("pmovzxbw mem xmm", ("1*p5", 1)), + ("vpmovzxbw xmm xmm", ("1*p5", 1)), + ("vpmovzxbw mem xmm", ("1*p5", 1)), + ("vpmovzxbw ymm ymm", ("1*p5", 1)), + ("vpmovzxbw mem ymm", ("1*p5", 1)), + ] + ).items() +) + +bdw_mov_instructions = list( + OrderedDict( + hsw_mov_instructions + + [ + # https://www.felixcloutier.com/x86/cmovcc + ("cmova gpr gpr", ("2*p06", 1)), + ("cmova mem gpr", ("2*p06", 1)), + ("cmovae gpr gpr", ("1*p06", 1)), + ("cmovae mem gpr", ("2*p06", 1)), + ("cmovb gpr gpr", ("2*p06", 1)), + ("cmovb mem gpr", ("1*p06", 1)), + ("cmovbe gpr gpr", ("2*p06", 1)), + ("cmovbe mem gpr", ("2*p06", 1)), + ("cmovc gpr gpr", ("1*p06", 1)), + ("cmovc mem gpr", ("1*p06", 1)), + ("cmove gpr gpr", ("1*p06", 1)), + ("cmove mem gpr", ("1*p06", 1)), + ("cmovg gpr gpr", ("1*p06", 1)), + ("cmovg mem gpr", ("1*p06", 1)), + ("cmovge gpr gpr", ("1*p06", 1)), + ("cmovge mem gpr", ("1*p06", 1)), + ("cmovl gpr gpr", ("1*p06", 1)), + ("cmovl mem gpr", ("1*p06", 1)), + ("cmovle gpr gpr", ("1*p06", 1)), + ("cmovle mem gpr", ("1*p06", 1)), + ("cmovna gpr gpr", ("2*p06", 1)), + ("cmovna mem gpr", ("2*p06", 1)), + ("cmovnae gpr gpr", ("1*p06", 1)), + ("cmovnae mem gpr", ("1*p06", 1)), + ("cmovnb gpr gpr", ("1*p06", 1)), + ("cmovnb mem gpr", ("1*p06", 1)), + ("cmovnbe gpr gpr", ("2*p06", 1)), + ("cmovnbe mem gpr", ("2*p06", 1)), + ("cmovnb gpr gpr", ("1*p06", 1)), + ("cmovnb mem gpr", ("1*p06", 1)), + ("cmovnc gpr gpr", ("1*p06", 1)), + ("cmovnc mem gpr", ("1*p06", 1)), + ("cmovne gpr gpr", ("1*p06", 1)), + ("cmovne mem gpr", ("1*p06", 1)), + ("cmovng gpr gpr", ("1*p06", 1)), + ("cmovng mem gpr", ("1*p06", 1)), + ("cmovnge gpr gpr", ("1*p06", 1)), + ("cmovnge mem gpr", ("1*p06", 1)), + ("cmovnl gpr gpr", ("1*p06", 1)), + ("cmovnl mem gpr", ("1*p06", 1)), + ("cmovno gpr gpr", ("1*p06", 1)), + ("cmovno mem gpr", ("1*p06", 1)), + ("cmovnp gpr gpr", ("1*p06", 1)), + ("cmovnp mem gpr", ("1*p06", 1)), + ("cmovns gpr gpr", ("1*p06", 1)), + ("cmovns mem gpr", ("1*p06", 1)), + ("cmovnz gpr gpr", ("1*p06", 1)), + ("cmovnz mem gpr", ("1*p06", 1)), + ("cmovo gpr gpr", ("1*p06", 1)), + ("cmovo mem gpr", ("1*p06", 1)), + ("cmovp gpr gpr", ("1*p06", 1)), + ("cmovp mem gpr", ("1*p06", 1)), + ("cmovpe gpr gpr", ("1*p06", 1)), + ("cmovpe mem gpr", ("1*p06", 1)), + ("cmovpo gpr gpr", ("1*p06", 1)), + ("cmovpo mem gpr", ("1*p06", 1)), + ("cmovs gpr gpr", ("1*p06", 1)), + ("cmovs mem gpr", ("1*p06", 1)), + ("cmovz gpr gpr", ("1*p06", 1)), + ("cmovz mem gpr", ("1*p06", 1)), + ] + ).items() +) + +skx_mov_instructions = list( + OrderedDict( + bdw_mov_instructions + + [ + # https://www.felixcloutier.com/x86/movapd + # TODO with masking! + # TODO the following may eliminate or be bound to 1*p0156: + # ('movapd xmm xmm', ('1*p5', 1)), + # ('vmovapd xmm xmm', ('1*p5', 1)), + # ('vmovapd ymm ymm', ('1*p5', 1)), + ("vmovapd zmm zmm", ("", 0)), + # https://www.felixcloutier.com/x86/movaps + # TODO with masking! + # TODO the following may eliminate or be bound to 1*p0156: + # ('movaps xmm xmm', ('1*p5', 1)), + # ('vmovaps xmm xmm', ('1*p5', 1)), + # ('vmovaps ymm ymm', ('1*p5', 1)), + ("vmovaps zmm zmm", ("", 0)), + # https://www.felixcloutier.com/x86/movbe + ("movbe gpr mem", ("1*p15", 4)), + ("movbe mem gpr", ("1*p15", 4)), + # https://www.felixcloutier.com/x86/movddup + # TODO with masking! + # https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 + # TODO with masking! + # https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + # TODO with masking! + # https://www.felixcloutier.com/x86/movntdq + ("vmovntdq zmm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntdqa + ("vmovntdqa mem zmm", ("", 0)), + # https://www.felixcloutier.com/x86/movntpd + ("vmovntpd zmm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movntps + ("vmovntps zmm mem", ("", 0)), # TODO NT-store: what latency to use? + # https://www.felixcloutier.com/x86/movq2dq + ("movq2dq mm xmm", ("1*p0+1*p015", 1)), + # https://www.felixcloutier.com/x86/movsd + # TODO with masking! + # https://www.felixcloutier.com/x86/movshdup + # TODO with masking! + # https://www.felixcloutier.com/x86/movsldup + # TODO with masking! + # https://www.felixcloutier.com/x86/movss + # TODO with masking! + # https://www.felixcloutier.com/x86/movupd + # TODO with masking! + # https://www.felixcloutier.com/x86/movups + # TODO with masking! + # https://www.felixcloutier.com/x86/pmovsx + # TODO with masking! + ("vpmovsxbw ymm zmm", ("1*p5", 3)), + ("vpmovsxbw mem zmm", ("1*p5", 1)), + ] + ).items() +) + +csx_mov_instructions = OrderedDict(skx_mov_instructions + []).items() + + +def get_description(arch, rhs_comment=None): + descriptions = { + "snb": "\n".join([np7.process_item(*item) for item in snb_mov_instructions]), + "ivb": "\n".join([np7.process_item(*item) for item in ivb_mov_instructions]), + "hsw": "\n".join([p7.process_item(*item) for item in hsw_mov_instructions]), + "bdw": "\n".join([p7.process_item(*item) for item in bdw_mov_instructions]), + "skx": "\n".join([p7.process_item(*item) for item in skx_mov_instructions]), + "csx": "\n".join([p7.process_item(*item) for item in csx_mov_instructions]), + "icx": "\n".join([p9.process_item(*item) for item in icx_mov_instructions]), + "zen3": "\n".join([z3.process_item(*item) for item in zen3_mov_instructions]), + } + + description = descriptions[arch] + + if rhs_comment is not None: + max_length = max([len(line) for line in descriptions[arch].split("\n")]) + + commented_description = "" + for line in descriptions[arch].split("\n"): + commented_description += ("{:<" + str(max_length) + "} # {}\n").format( + line, rhs_comment + ) + description = commented_description + + return description + + +if __name__ == "__main__": + import sys + + if len(sys.argv) != 2: + print("Usage: {} (snb|ivb|hsw|bdw|skx|csx|icx|zen3)".format(sys.argv[0])) + sys.exit(0) + + try: + print(get_description(sys.argv[1], rhs_comment=" ".join(sys.argv))) + except KeyError: + print("Unknown architecture.") + sys.exit(1) \ No newline at end of file diff --git a/osaca/data/model_importer.py b/osaca/data/model_importer.py new file mode 100644 index 0000000..d41ad0d --- /dev/null +++ b/osaca/data/model_importer.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +import argparse +import os.path +import sys +import xml.etree.ElementTree as ET +from distutils.version import StrictVersion + +from osaca.parser import get_parser +from osaca.semantics import MachineModel + +intel_archs = [ + "CON", + "WOL", + "NHM", + "WSM", + "SNB", + "IVB", + "HSW", + "BDW", + "SKL", + "SKX", + "KBL", + "CFL", + "CNL", + "ICL", +] +amd_archs = ["ZEN1", "ZEN+", "ZEN2"] + + +def port_pressure_from_tag_attributes(attrib): + # '1*p015+1*p1+1*p23+1*p4+3*p5' -> + # [[1, '015'], [1, '1'], [1, '23'], [1, '4'], [3, '5']] + port_occupation = [] + for p in attrib["ports"].split("+"): + cycles, ports = p.split("*") + ports = ports.lstrip("p") + ports = ports.lstrip("FP") + port_occupation.append([int(cycles), ports]) + + # Also consider div on DIV pipeline + if "div_cycles" in attrib: + port_occupation.append([int(attrib["div_cycles"]), ["DIV"]]) + + return port_occupation + + +def extract_paramters(instruction_tag, parser, isa): + # Extract parameter components + parameters = [] # used to store string representations + parameter_tags = sorted(instruction_tag.findall("operand"), key=lambda p: int(p.attrib["idx"])) + for parameter_tag in parameter_tags: + parameter = {} + # Ignore parameters with suppressed=1 + if int(parameter_tag.attrib.get("suppressed", "0")): + continue + + p_type = parameter_tag.attrib["type"] + if p_type == "imm": + parameter["class"] = "immediate" + parameter["imd"] = "int" + parameters.append(parameter) + elif p_type == "mem": + parameter["class"] = "memory" + parameter["base"] = "*" + parameter["offset"] = "*" + parameter["index"] = "*" + parameter["scale"] = "*" + parameters.append(parameter) + elif p_type == "reg": + parameter["class"] = "register" + possible_regs = [parser.parse_register("%" + r) for r in parameter_tag.text.split(",")] + if possible_regs[0] is None: + raise ValueError( + "Unknown register type for {} with {}.".format( + parameter_tag.attrib, parameter_tag.text + ) + ) + if isa == "x86": + if parser.is_vector_register(possible_regs[0]["register"]): + possible_regs[0]["register"]["name"] = possible_regs[0]["register"][ + "name" + ].lower()[:3] + if "mask" in possible_regs[0]["register"]: + possible_regs[0]["register"]["mask"] = True + else: + possible_regs[0]["register"]["name"] = "gpr" + elif isa == "aarch64": + del possible_regs["register"]["name"] + for key in possible_regs[0]["register"]: + parameter[key] = possible_regs[0]["register"][key] + parameters.append(parameter) + elif p_type == "relbr": + parameter["class"] = "identifier" + parameters.append(parameter) + elif p_type == "agen": + parameter["class"] = "memory" + parameter["base"] = "*" + parameter["offset"] = "*" + parameter["index"] = "*" + parameter["scale"] = "*" + parameters.append(parameter) + else: + raise ValueError("Unknown paramter type {}".format(parameter_tag.attrib)) + return parameters + + +def extract_model(tree, arch, skip_mem=True): + try: + isa = MachineModel.get_isa_for_arch(arch) + except Exception: + print("Skipping...", file=sys.stderr) + return None + mm = MachineModel(isa=isa) + parser = get_parser(isa) + + for instruction_tag in tree.findall(".//instruction"): + ignore = False + + mnemonic = instruction_tag.attrib["asm"] + iform = instruction_tag.attrib["iform"] + # reduce to second part if mnemonic contain space (e.g., "REX CRC32") + if " " in mnemonic: + mnemonic = mnemonic.split(" ", 1)[1] + + # Extract parameter components + try: + parameters = extract_paramters(instruction_tag, parser, isa) + if isa == "x86": + parameters.reverse() + except ValueError as e: + print(e, file=sys.stderr) + + # Extract port occupation, throughput and latency + port_pressure, throughput, latency, uops = [], None, None, None + arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]') + if arch_tag is None: + continue + # skip any instructions without port utilization + if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]): + print("Couldn't find port utilization, skip: ", iform, file=sys.stderr) + continue + # skip if measured TP is smaller than computed + if [ + float(x.attrib["TP_ports"]) + > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"])) + for x in arch_tag.findall("measurement") + ][0]: + print( + "Calculated TP is greater than measured TP.", + iform, + file=sys.stderr, + ) + # skip if instruction contains memory operand + if skip_mem and any( + [x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")] + ): + print("Contains memory operand, skip: ", iform, file=sys.stderr) + continue + # We collect all measurement and IACA information and compare them later + for measurement_tag in arch_tag.iter("measurement"): + if "TP_ports" in measurement_tag.attrib: + throughput = float(measurement_tag.attrib["TP_ports"]) + else: + throughput = min( + measurement_tag.attrib.get("TP_loop", float("inf")), + measurement_tag.attrib.get("TP_unroll", float("inf")), + measurement_tag.attrib.get("TP", float("inf")), + ) + if throughput == float("inf"): + throughput = None + uops = ( + int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None + ) + if "ports" in measurement_tag.attrib: + port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib)) + latencies = [ + int(l_tag.attrib["cycles"]) + for l_tag in measurement_tag.iter("latency") + if "cycles" in l_tag.attrib + ] + if len(latencies) == 0: + latencies = [ + int(l_tag.attrib["max_cycles"]) + for l_tag in measurement_tag.iter("latency") + if "max_cycles" in l_tag.attrib + ] + if latencies[1:] != latencies[:-1]: + print( + "Contradicting latencies found, using smallest:", + iform, + latencies, + file=sys.stderr, + ) + if latencies: + latency = min(latencies) + if ignore: + continue + + # Ordered by IACA version (newest last) + for iaca_tag in sorted( + arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"]) + ): + if "ports" in iaca_tag.attrib: + port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib)) + + # Check if all are equal + if port_pressure: + if port_pressure[1:] != port_pressure[:-1]: + print( + "Contradicting port occupancies, using latest IACA:", + iform, + file=sys.stderr, + ) + port_pressure = port_pressure[-1] + else: + # print("No data available for this architecture:", mnemonic, file=sys.stderr) + continue + + # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake: + if arch.upper() in intel_archs and not arch.upper() in ["ICL"]: + if any([p["class"] == "memory" for p in parameters]): + # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D + # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode + port_23 = False + port_4 = False + for i, pp in enumerate(port_pressure): + if "2" in pp[1] and "3" in pp[1]: + port_23 = True + if "4" in pp[1]: + port_4 = True + # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4) + if port_23 and not port_4: + if ( + arch.upper() in ["SNB", "IVB"] + and any([p.get("name", "") == "ymm" for p in parameters]) + and not ("128" in mnemonic) + ): + # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in + # instruction name + port2D3D_pressure = 2 + else: + # otherwiese x = 1 + port2D3D_pressure = 1 + port_pressure.append((port2D3D_pressure, ["2D", "3D"])) + + # Add missing ports: + for ports in [pp[1] for pp in port_pressure]: + for p in ports: + mm.add_port(p) + + throughput = max(mm.average_port_pressure(port_pressure)) + mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops) + # TODO eliminate entries which could be covered by automatic load / store expansion + return mm + + +def rhs_comment(uncommented_string, comment): + max_length = max([len(line) for line in uncommented_string.split("\n")]) + + commented_string = "" + for line in uncommented_string.split("\n"): + commented_string += ("{:<" + str(max_length) + "} # {}\n").format(line, comment) + return commented_string + + +def architectures(tree): + return set([a.attrib["name"] for a in tree.findall(".//architecture")]) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("xml", help="path of instructions.xml from http://uops.info") + parser.add_argument( + "arch", + nargs="?", + help="architecture to extract, use IACA abbreviations (e.g., SNB). " + "if not given, all will be extracted and saved to file in CWD.", + ) + parser.add_argument( + "--mem", + dest="skip_mem", + action="store_false", + help="add instruction forms including memory addressing operands, which are " + "skipped by default", + ) + args = parser.parse_args() + basename = os.path.basename(__file__) + + tree = ET.parse(args.xml) + print("# Available architectures:", ", ".join(architectures(tree))) + if args.arch: + print("# Chosen architecture: {}".format(args.arch)) + model = extract_model(tree, args.arch, args.skip_mem) + if model is not None: + print(rhs_comment(model.dump(), "uops.info import")) + else: + for arch in architectures(tree): + print(arch, end="") + model = extract_model(tree, arch.lower(), args.skip_mem) + if model: + model_string = rhs_comment(model.dump(), basename + " " + arch) + + with open("{}.yml".format(arch.lower()), "w") as f: + f.write(model_string) + print(".") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/osaca/data/pmevo_importer.py b/osaca/data/pmevo_importer.py new file mode 100644 index 0000000..8333353 --- /dev/null +++ b/osaca/data/pmevo_importer.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +import argparse +import json +import math +import re +import sys + +from asmbench import bench, op +from osaca.semantics import MachineModel + + +def build_bench_instruction(name, operands): + # Converts an OSACA model instruction to an asmbench one. + # Returns `None` in case something went wrong. + asmbench_inst = name + direction = "dst" + separator = " " + shift = "" + for operand in operands: + if operand["class"] == "register" or operand["class"] == "register_shift": + if operand["prefix"] == "x": + shape = "i64" + constraint = "r" + elif operand["prefix"] == "s": + shape = "float" + constraint = "w" + elif operand["prefix"] == "d": + shape = "double" + constraint = "w" + elif operand["prefix"] == "v": + constraint = "w" + if operand["shape"] == "b": + shape = "<16 x i8>" + elif operand["shape"] == "h": + shape = "<8 x i16>" + elif operand["shape"] == "s": + shape = "<4 x float>" + elif operand["shape"] == "d": + shape = "<2 x double>" + else: + return None + else: + return None + if operand["class"] == "register_shift": + shift = ", {}".format(operand["shift_op"]) + if operand["shift"] is not None: + shift += " {}".format(operand["shift"]) + elif operand["class"] == "immediate" or operand["class"] == "immediate_shift": + shape = "i32" + # Different instructions have different ranges for literaly, + # so need to pick something "reasonable" for each. + if name in [ + "cmeq", + "cmge", + "cmgt", + "cmle", + "cmlt", + "fcmeq", + "fcmge", + "fcmgt", + "fcmle", + "fcmlt", + "fcmp", + ]: + constraint = "0" + elif name in ["and", "ands", "eor", "eors", "orr", "orrs"]: + constraint = "255" + elif name in ["bfi", "extr", "sbfiz", "sbfx", "shl", "sshr", "ubfiz", "ubfx", "ushr"]: + constraint = "7" + else: + constraint = "42" + if operand["class"] == "immediate_shift": + shift = ", {}".format(operand["shift_op"]) + if operand["shift"] is not None: + shift += " {}".format(operand["shift"]) + else: + return None + asmbench_inst += "{}{{{}:{}:{}}}{}".format(separator, direction, shape, constraint, shift) + direction = "src" + separator = ", " + return asmbench_inst + + +def bench_instruction(name, operands): + # Converts an OSACA model instruction to an asmbench one and benchmarks it. + # Returned tuple may contain a `None` in case something went wrong. + asmbench_inst = build_bench_instruction(name, operands) + if asmbench_inst is None: + return (None, None) + return bench.bench_instructions([op.Instruction.from_string(asmbench_inst)]) + + +def round_cycles(value): + if value < 0.9: + # Frequently found, so we might want to include them. + # Measurements over-estimate a lot here, hence the high bound. + return 0.5 + else: + # Measurements usually over-estimate, so usually round down, + # but still allow slightly smaller values. + return float(math.floor(value + 0.1)) + + +def operand_parse(op, state): + # Parses an operand from an PMEvo instruction and emits an OSACA model one. + # State object is used to keep track of types for future operands, e.g. literals. + # Future invocations may also modify previously returned objects. + parameter = {} + + if op.startswith("_((REG:"): + parts = op.split(".") + register = parts[0][7:-2] + read_write, register_type, bits = register.split(":") + + parameter["class"] = "register" + if register_type == "G": + if bits == "32": + parameter["prefix"] = "r" + elif bits == "64": + parameter["prefix"] = "x" + else: + raise ValueError("Invalid register bits for {} {}".format(register_type, bits)) + elif register_type == "F": + if bits == "32": + parameter["prefix"] = "s" + state["type"] = "float" + elif bits == "64": + parameter["prefix"] = "d" + state["type"] = "double" + elif bits == "128": + parameter["prefix"] = "q" + elif bits == "VEC": + vec_shape = parts[1] + parameter["prefix"] = "v" + if vec_shape == "16b": + parameter["shape"] = "b" + elif vec_shape == "8h": + parameter["shape"] = "h" + elif vec_shape == "4s": + parameter["shape"] = "s" + state["type"] = "float" + elif vec_shape == "2d": + parameter["shape"] = "d" + state["type"] = "double" + else: + raise ValueError("Invalid vector shape {}".format(vec_shape)) + else: + raise ValueError("Invalid register bits for {} {}".format(register_type, bits)) + else: + raise ValueError("Unknown register type {}".format(register_type)) + elif op.startswith("_[((MEM:"): + bits = op[8:-2].split(":")[0] + if bits == "64": + state["memory_base"] = "x" + else: + raise ValueError("Invalid register bits for MEM {}".format(bits)) + return None + elif op.startswith("_((MIMM:"): + bits = op[8:-3].split(":")[0] + if bits == "16": + parameter["class"] = "memory" + parameter["base"] = state["memory_base"] + parameter["offset"] = "imd" + parameter["index"] = "*" + parameter["scale"] = "*" + parameter["post-indexed"] = False + parameter["pre-indexed"] = False + else: + raise ValueError("Invalid register bits for MEM {}".format(bits)) + elif re.fullmatch("_#?-?(0x)?[0-9a-f]+", op): + parameter["class"] = "immediate" + parameter["imd"] = "int" + elif re.fullmatch("_#?-?[0-9]*\\.[0-9]*", op): + parameter["class"] = "immediate" + parameter["imd"] = state["type"] + elif re.fullmatch("_((sxt|uxt)[bhw]|lsl|lsr|asr|rol|ror)(_[0-9]+)?", op): + # split = op[1:].split('_') + # shift_op = split[0] + # shift = None + # if len(split) >= 2: + # shift = split[1] + # state['previous']['class'] += '_shift' + # state['previous']['shift_op'] = shift_op + # if shift != None: + # state['previous']['shift'] = shift + # return None + raise ValueError("Skipping instruction with shift operand: {}".format(op)) + else: + raise ValueError("Unknown operand {}".format(op)) + + state["previous"] = parameter + return parameter + + +def port_convert(ports): + # Try to merge repeated entries together and emit in OSACA's format. + # FIXME: This does not handle having more than 10 ports. + pressures = [] + previous = None + cycles = 0 + + for entry in ports: + possible_ports = "".join(entry) + + if possible_ports != previous: + if previous is not None: + pressures.append([cycles, previous]) + previous = possible_ports + cycles = 0 + + cycles += 1 + + if previous is not None: + pressures.append([cycles, previous]) + + return pressures + + +def throughput_guess(ports): + # Minimum amount of possible ports per cycle should determine throughput + # to some degree of accuracy. (THIS IS *NOT* ALWAYS TRUE!) + bottleneck_ports = min(map(lambda it: len(it), ports)) + return float(len(ports)) / bottleneck_ports + + +def latency_guess(ports): + # Each entry in the ports array equates to one cycle on any of the ports. + # So this is about as good as it is going to get. + return float(len(ports)) + + +def extract_model(mapping, arch, template_model, asmbench): + try: + isa = MachineModel.get_isa_for_arch(arch) + except ValueError: + print("Skipping...", file=sys.stderr) + return None + if template_model is None: + mm = MachineModel(isa=isa) + else: + mm = template_model + + for port in mapping["arch"]["ports"]: + mm.add_port(port) + + for insn in mapping["arch"]["insns"]: + try: + ports = mapping["assignment"][insn] + + # Parse instruction + insn_split = insn.split("_") + name = insn_split[1] + insn_parts = list(("_" + "_".join(insn_split[2:])).split(",")) + operands = [] + state = {} + for operand in insn_parts: + parsed = operand_parse(operand, state) + if parsed is not None: + operands.append(parsed) + + # Port pressures from mapping + port_pressure = port_convert(ports) + + # Initial guessed throughput and latency + throughput = throughput_guess(ports) + latency = latency_guess(ports) + + # Benchmark with asmbench + # print(build_bench_instruction(name, operands)) + if asmbench: + bench_latency, bench_throughput = bench_instruction(name, operands) + if bench_throughput is not None: + throughput = round_cycles(bench_throughput) + else: + print("Failed to measure throughput for instruction {}.".format(insn)) + if bench_latency is not None: + latency = round_cycles(bench_latency) + else: + print("Failed to measure latency for instruction {}.".format(insn)) + + # No u-ops data available + uops = None + + # Insert instruction if not already found (can happen with template) + if mm.get_instruction(name, operands) is None: + mm.set_instruction(name, operands, latency, port_pressure, throughput, uops) + except ValueError as e: + print("Failed to parse instruction {}: {}.".format(insn, e)) + + return mm + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("json", help="path of mapping.json") + parser.add_argument("yaml", help="path of template.yml", nargs="?") + parser.add_argument( + "--asmbench", help="Benchmark latency and throughput using asmbench.", action="store_true" + ) + args = parser.parse_args() + + json_file = open(args.json, "r") + mapping = json.load(json_file) + arch = mapping["arch"]["name"].lower() + json_file.close() + + template_model = None + if args.yaml is not None: + template_model = MachineModel(path_to_yaml=args.yaml) + + if args.asmbench: + bench.setup_llvm() + + model = extract_model(mapping, arch, template_model, args.asmbench) + + with open("{}.yml".format(arch.lower()), "w") as f: + f.write(model.dump()) + + +if __name__ == "__main__": + main() \ No newline at end of file