From: Sivan Shani Date: Wed, 17 Dec 2025 16:36:54 +0000 (+0000) Subject: AArch64: Add FEAT_F16F32DOT instructions X-Git-Url: https://git.feebdaed.xyz/?a=commitdiff_plain;h=4373edd2f649ee2f72a5bb202eedb706fa3e96da;p=0xmirror%2Fbinutils-gdb.git AArch64: Add FEAT_F16F32DOT instructions This includes the instructions for the F16F32DOT feature: - FDOT half-precision to single-precision, by element - FDOT half-precision to single-precision, vector --- diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c index f150c2c58ce..1121336abf6 100644 --- a/gas/config/tc-aarch64.c +++ b/gas/config/tc-aarch64.c @@ -10921,6 +10921,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = { AARCH64_FEATURES (2, MOPS, MEMTAG)}, {"sve2p3", AARCH64_FEATURE (SVE2p3), AARCH64_FEATURE (SVE2p2)}, {"sme2p3", AARCH64_FEATURE (SME2p3), AARCH64_FEATURES (2, SME2p2, SME_LUTv2)}, + {"f16f32dot", AARCH64_FEATURE (F16F32DOT), AARCH64_FEATURE (SIMD)}, {NULL, AARCH64_NO_FEATURES, AARCH64_NO_FEATURES}, }; diff --git a/gas/doc/c-aarch64.texi b/gas/doc/c-aarch64.texi index 308e98c3e78..214f8be031f 100644 --- a/gas/doc/c-aarch64.texi +++ b/gas/doc/c-aarch64.texi @@ -219,6 +219,8 @@ automatically cause those extensions to be disabled. @tab Enable Armv8.2 16-bit floating-point multiplication variant support. @item @code{fp16} @tab @code{fp} @tab Enable Armv8.2 16-bit floating-point support. +@item @code{f16f32dot} @tab @code{simd} + @tab Enable Armv9.7 f16f32dot instructions. @item @code{fprcvt} @tab @code{fp} @tab Enable Armv9.6 fprcvt instructions. @item @code{frintts} @tab @code{fp} diff --git a/gas/testsuite/gas/aarch64/f16f32dot-simd.d b/gas/testsuite/gas/aarch64/f16f32dot-simd.d new file mode 100644 index 00000000000..0e3f0a465fb --- /dev/null +++ b/gas/testsuite/gas/aarch64/f16f32dot-simd.d @@ -0,0 +1,24 @@ +#as: -march=armv8-a+f16f32dot +#objdump: -dr + +.*: file format .* + +Disassembly of section \.text: + +0+ <\.text>: + *[0-9a-f]+: 0f409000 fdot v0\.2s, v0\.4h, v0\.2h\[0] + *[0-9a-f]+: 0f60901f fdot v31\.2s, v0\.4h, v0\.2h\[1] + *[0-9a-f]+: 0f409be0 fdot v0\.2s, v31\.4h, v0\.2h\[2] + *[0-9a-f]+: 0f7f9800 fdot v0\.2s, v0\.4h, v31\.2h\[3] + *[0-9a-f]+: 4f409000 fdot v0\.4s, v0\.8h, v0\.2h\[0] + *[0-9a-f]+: 4f60901f fdot v31\.4s, v0\.8h, v0\.2h\[1] + *[0-9a-f]+: 4f409be0 fdot v0\.4s, v31\.8h, v0\.2h\[2] + *[0-9a-f]+: 4f7f9800 fdot v0\.4s, v0\.8h, v31\.2h\[3] + *[0-9a-f]+: 0e80fc00 fdot v0\.2s, v0\.4h, v0\.4h + *[0-9a-f]+: 0e80fc1f fdot v31\.2s, v0\.4h, v0\.4h + *[0-9a-f]+: 0e80ffe0 fdot v0\.2s, v31\.4h, v0\.4h + *[0-9a-f]+: 0e9ffc00 fdot v0\.2s, v0\.4h, v31\.4h + *[0-9a-f]+: 4e80fc00 fdot v0\.4s, v0\.8h, v0\.8h + *[0-9a-f]+: 4e80fc1f fdot v31\.4s, v0\.8h, v0\.8h + *[0-9a-f]+: 4e80ffe0 fdot v0\.4s, v31\.8h, v0\.8h + *[0-9a-f]+: 4e9ffc00 fdot v0\.4s, v0\.8h, v31\.8h diff --git a/gas/testsuite/gas/aarch64/f16f32dot-simd.s b/gas/testsuite/gas/aarch64/f16f32dot-simd.s new file mode 100644 index 00000000000..ff9203307b6 --- /dev/null +++ b/gas/testsuite/gas/aarch64/f16f32dot-simd.s @@ -0,0 +1,19 @@ +fdot v0.2s, v0.4h, v0.2h[0] +fdot v31.2s, v0.4h, v0.2h[1] +fdot v0.2s, v31.4h, v0.2h[2] +fdot v0.2s, v0.4h, v31.2h[3] + +fdot v0.4s, v0.8h, v0.2h[0] +fdot v31.4s, v0.8h, v0.2h[1] +fdot v0.4s, v31.8h, v0.2h[2] +fdot v0.4s, v0.8h, v31.2h[3] + +fdot v0.2s, v0.4h, v0.4h +fdot v31.2s, v0.4h, v0.4h +fdot v0.2s, v31.4h, v0.4h +fdot v0.2s, v0.4h, v31.4h + +fdot v0.4s, v0.8h, v0.8h +fdot v31.4s, v0.8h, v0.8h +fdot v0.4s, v31.8h, v0.8h +fdot v0.4s, v0.8h, v31.8h diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h index 7d03ce86d25..1e543659ca4 100644 --- a/include/opcode/aarch64.h +++ b/include/opcode/aarch64.h @@ -271,6 +271,8 @@ enum aarch64_feature_bit { AARCH64_FEATURE_SVE2p3, /* SME2.3. */ AARCH64_FEATURE_SME2p3, + /* F16F32DOT instructions. */ + AARCH64_FEATURE_F16F32DOT, /* Virtual features. These are used to gate instructions that are enabled by either of two (or more) sets of command line flags. */ diff --git a/opcodes/aarch64-dis-2.c b/opcodes/aarch64-dis-2.c index a805868571d..68320222034 100644 --- a/opcodes/aarch64-dis-2.c +++ b/opcodes/aarch64-dis-2.c @@ -29041,10 +29041,20 @@ aarch64_opcode_lookup_1 (uint32_t word) { if (((word >> 22) & 0x1) == 0) { - /* 33222222222211111111110000000000 - 10987654321098765432109876543210 - 0x001110x00xxxxxx11111xxxxxxxxxx. */ - return A64_OPID_0e00fc00_fdot_Vd_Vn_Vm; + if (((word >> 23) & 0x1) == 0) + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + 0x001110000xxxxxx11111xxxxxxxxxx. */ + return A64_OPID_0e00fc00_fdot_Vd_Vn_Vm; + } + else + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + 0x001110100xxxxxx11111xxxxxxxxxx. */ + return A64_OPID_0e80fc00_fdot_Vd_Vn_Vm; + } } else { @@ -34841,19 +34851,29 @@ aarch64_opcode_lookup_1 (uint32_t word) { if (((word >> 23) & 0x1) == 0) { - if (((word >> 29) & 0x1) == 0) + if (((word >> 22) & 0x1) == 0) { - /* 33222222222211111111110000000000 - 10987654321098765432109876543210 - xx0011110xxxxxxx1001x0xxxxxxxxxx. */ - return A64_OPID_0f009000_fmul_Vd_Vn_Em16; + if (((word >> 29) & 0x1) == 0) + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + xx00111100xxxxxx1001x0xxxxxxxxxx. */ + return A64_OPID_0f009000_fmul_Vd_Vn_Em16; + } + else + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + xx10111100xxxxxx1001x0xxxxxxxxxx. */ + return A64_OPID_2f009000_fmulx_Vd_Vn_Em16; + } } else { /* 33222222222211111111110000000000 10987654321098765432109876543210 - xx1011110xxxxxxx1001x0xxxxxxxxxx. */ - return A64_OPID_2f009000_fmulx_Vd_Vn_Em16; + xxx0111101xxxxxx1001x0xxxxxxxxxx. */ + return A64_OPID_0f409000_fdot_Vd_Vn_Em16; } } else diff --git a/opcodes/aarch64-tbl-2.h b/opcodes/aarch64-tbl-2.h index cc3c2e85722..eefb05e5d61 100644 --- a/opcodes/aarch64-tbl-2.h +++ b/opcodes/aarch64-tbl-2.h @@ -4038,5 +4038,7 @@ enum aarch64_opcode_idx A64_OPID_45a02000_sqshrun_SVE_Zd_SME_Znx2_SVE_SHRIMM_UNPRED_22, A64_OPID_45a83800_uqrshrn_SVE_Zd_SME_Znx2_SME_SHRIMM3, A64_OPID_45a01000_uqshrn_SVE_Zd_SME_Znx2_SVE_SHRIMM_UNPRED_22, + A64_OPID_0f409000_fdot_Vd_Vn_Em16, + A64_OPID_0e80fc00_fdot_Vd_Vn_Vm, A64_OPID_MAX, }; diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h index 6b733442e7f..4faf4025887 100644 --- a/opcodes/aarch64-tbl.h +++ b/opcodes/aarch64-tbl.h @@ -3076,6 +3076,8 @@ static const aarch64_feature_set aarch64_feature_sme2p3 = AARCH64_FEATURE (SME2p3); static const aarch64_feature_set aarch64_feature_sve2p3_sme2p3 = AARCH64_FEATURE (SVE2p3_SME2p3); +static const aarch64_feature_set aarch64_feature_f16f32dot = + AARCH64_FEATURE (F16F32DOT); #define CORE &aarch64_feature_v8 #define FP &aarch64_feature_fp @@ -3205,6 +3207,7 @@ static const aarch64_feature_set aarch64_feature_sve2p3_sme2p3 = #define SVE2p3 &aarch64_feature_sve2p3 #define SME2p3 &aarch64_feature_sme2p3 #define SVE2p3_SME2p3 &aarch64_feature_sve2p3_sme2p3 +#define F16F32DOT &aarch64_feature_f16f32dot #define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \ { NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS | F_INVALID_IMM_SYMS_1, 0, 0, NULL } @@ -3549,6 +3552,8 @@ static const aarch64_feature_set aarch64_feature_sve2p3_sme2p3 = { NAME, OPCODE, MASK, CLASS, OP, SVE2p3_SME2p3, OPS, QUALS, \ F_STRICT | F_INVALID_IMM_SYMS_3 | FLAGS, CONSTRAINTS, TIED, NULL } +#define F16F32DOT_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \ + { NAME, OPCODE, MASK, CLASS, 0, F16F32DOT, OPS, QUALS, FLAGS, 0, 0, NULL } #define MOPS_CPY_OP1_OP2_PME_INSN(NAME, OPCODE, MASK, FLAGS, CONSTRAINTS) \ MOPS_INSN (NAME, OPCODE, MASK, 0, \ @@ -7841,6 +7846,10 @@ const struct aarch64_opcode aarch64_opcode_table[] = SVE2p3_SME2p3_INSN ("uqrshrn", 0x45a83800, 0xfff8fc20, sve_misc, 0, OP3 (SVE_Zd, SME_Znx2, SME_SHRIMM3), OP_SVE_BHU, 0, 0, 0), SVE2p3_SME2p3_INSN ("uqshrn", 0x45a01000, 0xffe0fc20, sve_shift_tsz_hsd, 0, OP3 (SVE_Zd, SME_Znx2, SVE_SHRIMM_UNPRED_22), OP_SVE_VVU_BH_HS, 0, 0, 0), + /* F16F32DOT instructions. */ + F16F32DOT_INSN ("fdot", 0x0f409000, 0xbfc0f400, dotproduct, OP3 (Vd, Vn, Em16), QL_BFDOT64I, F_SIZEQ), + F16F32DOT_INSN ("fdot", 0x0e80fc00, 0xbfe0fc00, dotproduct, OP3 (Vd, Vn, Vm), QL_BFDOT64, F_SIZEQ), + {0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL}, };