From a6956e0c790c3ca6d446730f13731240abc0e95d Mon Sep 17 00:00:00 2001 From: Sivan Shani Date: Wed, 17 Dec 2025 16:58:52 +0000 Subject: [PATCH] AArch64: Add FEAT_F16F32MM This patch includes: - The feature flag for the FEAT_F16F32MM feature. - Instruction FMMLA Half-precision matrix multiply-accumulate to single-precision. --- gas/config/tc-aarch64.c | 1 + gas/doc/c-aarch64.texi | 2 ++ gas/testsuite/gas/aarch64/f16f32mm-simd.d | 12 ++++++++++++ gas/testsuite/gas/aarch64/f16f32mm-simd.s | 4 ++++ include/opcode/aarch64.h | 2 ++ opcodes/aarch64-dis-2.c | 24 ++++++++++++++++------- opcodes/aarch64-tbl-2.h | 1 + opcodes/aarch64-tbl.h | 8 ++++++++ 8 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 gas/testsuite/gas/aarch64/f16f32mm-simd.d create mode 100644 gas/testsuite/gas/aarch64/f16f32mm-simd.s diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c index 1121336abf6..6ceda91290c 100644 --- a/gas/config/tc-aarch64.c +++ b/gas/config/tc-aarch64.c @@ -10922,6 +10922,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = { {"sve2p3", AARCH64_FEATURE (SVE2p3), AARCH64_FEATURE (SVE2p2)}, {"sme2p3", AARCH64_FEATURE (SME2p3), AARCH64_FEATURES (2, SME2p2, SME_LUTv2)}, {"f16f32dot", AARCH64_FEATURE (F16F32DOT), AARCH64_FEATURE (SIMD)}, + {"f16f32mm", AARCH64_FEATURE (F16F32MM), AARCH64_FEATURES (2, SIMD, F16)}, {NULL, AARCH64_NO_FEATURES, AARCH64_NO_FEATURES}, }; diff --git a/gas/doc/c-aarch64.texi b/gas/doc/c-aarch64.texi index 214f8be031f..c20e359c804 100644 --- a/gas/doc/c-aarch64.texi +++ b/gas/doc/c-aarch64.texi @@ -221,6 +221,8 @@ automatically cause those extensions to be disabled. @tab Enable Armv8.2 16-bit floating-point support. @item @code{f16f32dot} @tab @code{simd} @tab Enable Armv9.7 f16f32dot instructions. +@item @code{f16f32mm} @tab @code{simd}, @code{fp16} + @tab Enable Armv9.7 f16f32mm instructions. @item @code{fprcvt} @tab @code{fp} @tab Enable Armv9.6 fprcvt instructions. @item @code{frintts} @tab @code{fp} diff --git a/gas/testsuite/gas/aarch64/f16f32mm-simd.d b/gas/testsuite/gas/aarch64/f16f32mm-simd.d new file mode 100644 index 00000000000..0029ff3d3a1 --- /dev/null +++ b/gas/testsuite/gas/aarch64/f16f32mm-simd.d @@ -0,0 +1,12 @@ +#as: -march=armv8-a+f16f32mm +#objdump: -dr + +.*: file format .* + +Disassembly of section \.text: + +0+ <\.text>: + *[0-9a-f]+: 4e40ec00 fmmla v0.4s, v0.8h, v0.8h + *[0-9a-f]+: 4e40ec1f fmmla v31.4s, v0.8h, v0.8h + *[0-9a-f]+: 4e40efe0 fmmla v0.4s, v31.8h, v0.8h + *[0-9a-f]+: 4e5fec00 fmmla v0.4s, v0.8h, v31.8h diff --git a/gas/testsuite/gas/aarch64/f16f32mm-simd.s b/gas/testsuite/gas/aarch64/f16f32mm-simd.s new file mode 100644 index 00000000000..0cd034f2012 --- /dev/null +++ b/gas/testsuite/gas/aarch64/f16f32mm-simd.s @@ -0,0 +1,4 @@ +fmmla v0.4s, v0.8h, v0.8h +fmmla v31.4s, v0.8h, v0.8h +fmmla v0.4s, v31.8h, v0.8h +fmmla v0.4s, v0.8h, v31.8h diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h index 1e543659ca4..d2de7efa765 100644 --- a/include/opcode/aarch64.h +++ b/include/opcode/aarch64.h @@ -273,6 +273,8 @@ enum aarch64_feature_bit { AARCH64_FEATURE_SME2p3, /* F16F32DOT instructions. */ AARCH64_FEATURE_F16F32DOT, + /* F16F32MM instructions. */ + AARCH64_FEATURE_F16F32MM, /* Virtual features. These are used to gate instructions that are enabled by either of two (or more) sets of command line flags. */ diff --git a/opcodes/aarch64-dis-2.c b/opcodes/aarch64-dis-2.c index 68320222034..3c043f897b4 100644 --- a/opcodes/aarch64-dis-2.c +++ b/opcodes/aarch64-dis-2.c @@ -28953,19 +28953,29 @@ aarch64_opcode_lookup_1 (uint32_t word) } else { - if (((word >> 15) & 0x1) == 0) + if (((word >> 14) & 0x1) == 0) { - /* 33222222222211111111110000000000 - 10987654321098765432109876543210 - 0x001110xx0xxxxx0x1011xxxxxxxxxx. */ - return A64_OPID_0e002c00_smov_Rd_En; + if (((word >> 15) & 0x1) == 0) + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + 0x001110xx0xxxxx001011xxxxxxxxxx. */ + return A64_OPID_0e002c00_smov_Rd_En; + } + else + { + /* 33222222222211111111110000000000 + 10987654321098765432109876543210 + 0x001110xx0xxxxx101011xxxxxxxxxx. */ + return A64_OPID_4e80ac00_usmmla_Vd_Vn_Vm; + } } else { /* 33222222222211111111110000000000 10987654321098765432109876543210 - 0x001110xx0xxxxx1x1011xxxxxxxxxx. */ - return A64_OPID_4e80ac00_usmmla_Vd_Vn_Vm; + 0x001110xx0xxxxxx11011xxxxxxxxxx. */ + return A64_OPID_4e40ec00_fmmla_Vd_Vn_Vm; } } } diff --git a/opcodes/aarch64-tbl-2.h b/opcodes/aarch64-tbl-2.h index eefb05e5d61..aef23a03701 100644 --- a/opcodes/aarch64-tbl-2.h +++ b/opcodes/aarch64-tbl-2.h @@ -4040,5 +4040,6 @@ enum aarch64_opcode_idx A64_OPID_45a01000_uqshrn_SVE_Zd_SME_Znx2_SVE_SHRIMM_UNPRED_22, A64_OPID_0f409000_fdot_Vd_Vn_Em16, A64_OPID_0e80fc00_fdot_Vd_Vn_Vm, + A64_OPID_4e40ec00_fmmla_Vd_Vn_Vm, A64_OPID_MAX, }; diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h index 4faf4025887..59aef335bbc 100644 --- a/opcodes/aarch64-tbl.h +++ b/opcodes/aarch64-tbl.h @@ -3078,6 +3078,8 @@ static const aarch64_feature_set aarch64_feature_sve2p3_sme2p3 = AARCH64_FEATURE (SVE2p3_SME2p3); static const aarch64_feature_set aarch64_feature_f16f32dot = AARCH64_FEATURE (F16F32DOT); +static const aarch64_feature_set aarch64_feature_f16f32mm = + AARCH64_FEATURE (F16F32MM); #define CORE &aarch64_feature_v8 #define FP &aarch64_feature_fp @@ -3208,6 +3210,7 @@ static const aarch64_feature_set aarch64_feature_f16f32dot = #define SME2p3 &aarch64_feature_sme2p3 #define SVE2p3_SME2p3 &aarch64_feature_sve2p3_sme2p3 #define F16F32DOT &aarch64_feature_f16f32dot +#define F16F32MM &aarch64_feature_f16f32mm #define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \ { NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS | F_INVALID_IMM_SYMS_1, 0, 0, NULL } @@ -3554,6 +3557,8 @@ static const aarch64_feature_set aarch64_feature_f16f32dot = #define F16F32DOT_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \ { NAME, OPCODE, MASK, CLASS, 0, F16F32DOT, OPS, QUALS, FLAGS, 0, 0, NULL } +#define F16F32MM_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \ + { NAME, OPCODE, MASK, CLASS, 0, F16F32MM, OPS, QUALS, FLAGS, 0, 0, NULL } #define MOPS_CPY_OP1_OP2_PME_INSN(NAME, OPCODE, MASK, FLAGS, CONSTRAINTS) \ MOPS_INSN (NAME, OPCODE, MASK, 0, \ @@ -7850,6 +7855,9 @@ const struct aarch64_opcode aarch64_opcode_table[] = F16F32DOT_INSN ("fdot", 0x0f409000, 0xbfc0f400, dotproduct, OP3 (Vd, Vn, Em16), QL_BFDOT64I, F_SIZEQ), F16F32DOT_INSN ("fdot", 0x0e80fc00, 0xbfe0fc00, dotproduct, OP3 (Vd, Vn, Vm), QL_BFDOT64, F_SIZEQ), + /* F16F32MM instructions. */ + F16F32MM_INSN ("fmmla", 0x4e40ec00, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_BFMMLA, F_SIZEQ), + {0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL}, }; -- 2.43.0