]> git.feebdaed.xyz Git - 0xmirror/binutils-gdb.git/commitdiff
AArch64: Add FEAT_F16F32DOT instructions
authorSivan Shani <sivan.shani@arm.com>
Wed, 17 Dec 2025 16:36:54 +0000 (16:36 +0000)
committerAlice Carlotti <alice.carlotti@arm.com>
Sat, 27 Dec 2025 12:45:52 +0000 (12:45 +0000)
This includes the instructions for the F16F32DOT feature:
    - FDOT half-precision to single-precision, by element
    - FDOT half-precision to single-precision, vector

gas/config/tc-aarch64.c
gas/doc/c-aarch64.texi
gas/testsuite/gas/aarch64/f16f32dot-simd.d [new file with mode: 0644]
gas/testsuite/gas/aarch64/f16f32dot-simd.s [new file with mode: 0644]
include/opcode/aarch64.h
opcodes/aarch64-dis-2.c
opcodes/aarch64-tbl-2.h
opcodes/aarch64-tbl.h

index f150c2c58ce7e23cf838ff78f5074cff96dc1f8e..1121336abf6048823adab7aa2ba423ee4b2d1cca 100644 (file)
@@ -10921,6 +10921,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = {
                        AARCH64_FEATURES (2, MOPS, MEMTAG)},
   {"sve2p3",           AARCH64_FEATURE (SVE2p3), AARCH64_FEATURE (SVE2p2)},
   {"sme2p3",           AARCH64_FEATURE (SME2p3), AARCH64_FEATURES (2, SME2p2, SME_LUTv2)},
+  {"f16f32dot",                AARCH64_FEATURE (F16F32DOT), AARCH64_FEATURE (SIMD)},
   {NULL,               AARCH64_NO_FEATURES, AARCH64_NO_FEATURES},
 };
 
index 308e98c3e7824e322462ae2f84a752fb2f908cc1..214f8be031fd489933a1e8c878f5eb66b36fb796 100644 (file)
@@ -219,6 +219,8 @@ automatically cause those extensions to be disabled.
  @tab Enable Armv8.2 16-bit floating-point multiplication variant support.
 @item @code{fp16} @tab @code{fp}
  @tab Enable Armv8.2 16-bit floating-point support.
+@item @code{f16f32dot} @tab @code{simd}
+ @tab Enable Armv9.7 f16f32dot instructions.
 @item @code{fprcvt} @tab @code{fp}
  @tab Enable Armv9.6 fprcvt instructions.
 @item @code{frintts} @tab @code{fp}
diff --git a/gas/testsuite/gas/aarch64/f16f32dot-simd.d b/gas/testsuite/gas/aarch64/f16f32dot-simd.d
new file mode 100644 (file)
index 0000000..0e3f0a4
--- /dev/null
@@ -0,0 +1,24 @@
+#as: -march=armv8-a+f16f32dot
+#objdump: -dr
+
+.*:     file format .*
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+ *[0-9a-f]+:   0f409000        fdot    v0\.2s, v0\.4h, v0\.2h\[0]
+ *[0-9a-f]+:   0f60901f        fdot    v31\.2s, v0\.4h, v0\.2h\[1]
+ *[0-9a-f]+:   0f409be0        fdot    v0\.2s, v31\.4h, v0\.2h\[2]
+ *[0-9a-f]+:   0f7f9800        fdot    v0\.2s, v0\.4h, v31\.2h\[3]
+ *[0-9a-f]+:   4f409000        fdot    v0\.4s, v0\.8h, v0\.2h\[0]
+ *[0-9a-f]+:   4f60901f        fdot    v31\.4s, v0\.8h, v0\.2h\[1]
+ *[0-9a-f]+:   4f409be0        fdot    v0\.4s, v31\.8h, v0\.2h\[2]
+ *[0-9a-f]+:   4f7f9800        fdot    v0\.4s, v0\.8h, v31\.2h\[3]
+ *[0-9a-f]+:   0e80fc00        fdot    v0\.2s, v0\.4h, v0\.4h
+ *[0-9a-f]+:   0e80fc1f        fdot    v31\.2s, v0\.4h, v0\.4h
+ *[0-9a-f]+:   0e80ffe0        fdot    v0\.2s, v31\.4h, v0\.4h
+ *[0-9a-f]+:   0e9ffc00        fdot    v0\.2s, v0\.4h, v31\.4h
+ *[0-9a-f]+:   4e80fc00        fdot    v0\.4s, v0\.8h, v0\.8h
+ *[0-9a-f]+:   4e80fc1f        fdot    v31\.4s, v0\.8h, v0\.8h
+ *[0-9a-f]+:   4e80ffe0        fdot    v0\.4s, v31\.8h, v0\.8h
+ *[0-9a-f]+:   4e9ffc00        fdot    v0\.4s, v0\.8h, v31\.8h
diff --git a/gas/testsuite/gas/aarch64/f16f32dot-simd.s b/gas/testsuite/gas/aarch64/f16f32dot-simd.s
new file mode 100644 (file)
index 0000000..ff92033
--- /dev/null
@@ -0,0 +1,19 @@
+fdot v0.2s, v0.4h, v0.2h[0]
+fdot v31.2s, v0.4h, v0.2h[1]
+fdot v0.2s, v31.4h, v0.2h[2]
+fdot v0.2s, v0.4h, v31.2h[3]
+
+fdot v0.4s, v0.8h, v0.2h[0]
+fdot v31.4s, v0.8h, v0.2h[1]
+fdot v0.4s, v31.8h, v0.2h[2]
+fdot v0.4s, v0.8h, v31.2h[3]
+
+fdot v0.2s, v0.4h, v0.4h
+fdot v31.2s, v0.4h, v0.4h
+fdot v0.2s, v31.4h, v0.4h
+fdot v0.2s, v0.4h, v31.4h
+
+fdot v0.4s, v0.8h, v0.8h
+fdot v31.4s, v0.8h, v0.8h
+fdot v0.4s, v31.8h, v0.8h
+fdot v0.4s, v0.8h, v31.8h
index 7d03ce86d25b0456e5ab43618b7cb6b6060575da..1e543659ca4d514a27e5814c1231a3ead2f3c847 100644 (file)
@@ -271,6 +271,8 @@ enum aarch64_feature_bit {
   AARCH64_FEATURE_SVE2p3,
   /* SME2.3.  */
   AARCH64_FEATURE_SME2p3,
+  /* F16F32DOT instructions.  */
+  AARCH64_FEATURE_F16F32DOT,
 
   /* Virtual features.  These are used to gate instructions that are enabled
      by either of two (or more) sets of command line flags.  */
index a805868571dc23f85f6ba0a5fb9ad89b335f7184..6832022203420d8fa72f8164dc74749d19595a21 100644 (file)
@@ -29041,10 +29041,20 @@ aarch64_opcode_lookup_1 (uint32_t word)
                                                     {
                                                       if (((word >> 22) & 0x1) == 0)
                                                         {
-                                                          /* 33222222222211111111110000000000
-                                                             10987654321098765432109876543210
-                                                             0x001110x00xxxxxx11111xxxxxxxxxx.  */
-                                                          return A64_OPID_0e00fc00_fdot_Vd_Vn_Vm;
+                                                          if (((word >> 23) & 0x1) == 0)
+                                                            {
+                                                              /* 33222222222211111111110000000000
+                                                                 10987654321098765432109876543210
+                                                                 0x001110000xxxxxx11111xxxxxxxxxx.  */
+                                                              return A64_OPID_0e00fc00_fdot_Vd_Vn_Vm;
+                                                            }
+                                                          else
+                                                            {
+                                                              /* 33222222222211111111110000000000
+                                                                 10987654321098765432109876543210
+                                                                 0x001110100xxxxxx11111xxxxxxxxxx.  */
+                                                              return A64_OPID_0e80fc00_fdot_Vd_Vn_Vm;
+                                                            }
                                                         }
                                                       else
                                                         {
@@ -34841,19 +34851,29 @@ aarch64_opcode_lookup_1 (uint32_t word)
                                         {
                                           if (((word >> 23) & 0x1) == 0)
                                             {
-                                              if (((word >> 29) & 0x1) == 0)
+                                              if (((word >> 22) & 0x1) == 0)
                                                 {
-                                                  /* 33222222222211111111110000000000
-                                                     10987654321098765432109876543210
-                                                     xx0011110xxxxxxx1001x0xxxxxxxxxx.  */
-                                                  return A64_OPID_0f009000_fmul_Vd_Vn_Em16;
+                                                  if (((word >> 29) & 0x1) == 0)
+                                                    {
+                                                      /* 33222222222211111111110000000000
+                                                         10987654321098765432109876543210
+                                                         xx00111100xxxxxx1001x0xxxxxxxxxx.  */
+                                                      return A64_OPID_0f009000_fmul_Vd_Vn_Em16;
+                                                    }
+                                                  else
+                                                    {
+                                                      /* 33222222222211111111110000000000
+                                                         10987654321098765432109876543210
+                                                         xx10111100xxxxxx1001x0xxxxxxxxxx.  */
+                                                      return A64_OPID_2f009000_fmulx_Vd_Vn_Em16;
+                                                    }
                                                 }
                                               else
                                                 {
                                                   /* 33222222222211111111110000000000
                                                      10987654321098765432109876543210
-                                                     xx1011110xxxxxxx1001x0xxxxxxxxxx.  */
-                                                  return A64_OPID_2f009000_fmulx_Vd_Vn_Em16;
+                                                     xxx0111101xxxxxx1001x0xxxxxxxxxx.  */
+                                                  return A64_OPID_0f409000_fdot_Vd_Vn_Em16;
                                                 }
                                             }
                                           else
index cc3c2e85722281d3a02b2cea17a54cbe3426ef1f..eefb05e5d61430a21753dbdc0a13f823005921b9 100644 (file)
@@ -4038,5 +4038,7 @@ enum aarch64_opcode_idx
   A64_OPID_45a02000_sqshrun_SVE_Zd_SME_Znx2_SVE_SHRIMM_UNPRED_22,
   A64_OPID_45a83800_uqrshrn_SVE_Zd_SME_Znx2_SME_SHRIMM3,
   A64_OPID_45a01000_uqshrn_SVE_Zd_SME_Znx2_SVE_SHRIMM_UNPRED_22,
+  A64_OPID_0f409000_fdot_Vd_Vn_Em16,
+  A64_OPID_0e80fc00_fdot_Vd_Vn_Vm,
   A64_OPID_MAX,
 };
index 6b733442e7f711211b8916ba9e8d43b64dc96226..4faf4025887f5f111ac70cf31148170a30644b97 100644 (file)
@@ -3076,6 +3076,8 @@ static const aarch64_feature_set aarch64_feature_sme2p3 =
   AARCH64_FEATURE (SME2p3);
 static const aarch64_feature_set aarch64_feature_sve2p3_sme2p3 =
   AARCH64_FEATURE (SVE2p3_SME2p3);
+static const aarch64_feature_set aarch64_feature_f16f32dot =
+  AARCH64_FEATURE (F16F32DOT);
 
 #define CORE           &aarch64_feature_v8
 #define FP             &aarch64_feature_fp
@@ -3205,6 +3207,7 @@ static const aarch64_feature_set aarch64_feature_sve2p3_sme2p3 =
 #define SVE2p3 &aarch64_feature_sve2p3
 #define SME2p3 &aarch64_feature_sme2p3
 #define SVE2p3_SME2p3  &aarch64_feature_sve2p3_sme2p3
+#define F16F32DOT      &aarch64_feature_f16f32dot
 
 #define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \
   { NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS | F_INVALID_IMM_SYMS_1, 0, 0, NULL }
@@ -3549,6 +3552,8 @@ static const aarch64_feature_set aarch64_feature_sve2p3_sme2p3 =
   { NAME, OPCODE, MASK, CLASS, OP, SVE2p3_SME2p3, OPS, QUALS, \
     F_STRICT | F_INVALID_IMM_SYMS_3 | FLAGS, CONSTRAINTS, TIED, NULL }
 
+#define F16F32DOT_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \
+  { NAME, OPCODE, MASK, CLASS, 0, F16F32DOT, OPS, QUALS, FLAGS, 0, 0, NULL }
 
 #define MOPS_CPY_OP1_OP2_PME_INSN(NAME, OPCODE, MASK, FLAGS, CONSTRAINTS) \
   MOPS_INSN (NAME, OPCODE, MASK, 0, \
@@ -7841,6 +7846,10 @@ const struct aarch64_opcode aarch64_opcode_table[] =
   SVE2p3_SME2p3_INSN ("uqrshrn", 0x45a83800, 0xfff8fc20, sve_misc, 0, OP3 (SVE_Zd, SME_Znx2, SME_SHRIMM3), OP_SVE_BHU, 0, 0, 0),
   SVE2p3_SME2p3_INSN ("uqshrn", 0x45a01000, 0xffe0fc20, sve_shift_tsz_hsd, 0, OP3 (SVE_Zd, SME_Znx2, SVE_SHRIMM_UNPRED_22), OP_SVE_VVU_BH_HS, 0, 0, 0),
 
+  /* F16F32DOT instructions.  */
+  F16F32DOT_INSN ("fdot", 0x0f409000, 0xbfc0f400, dotproduct, OP3 (Vd, Vn, Em16), QL_BFDOT64I, F_SIZEQ),
+  F16F32DOT_INSN ("fdot", 0x0e80fc00, 0xbfe0fc00, dotproduct, OP3 (Vd, Vn, Vm), QL_BFDOT64, F_SIZEQ),
+
   {0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
 };