{
machine_mode mode0 = e.result_mode ();
machine_mode mode1 = GET_MODE (e.args[0]);
- convert_optab optab;
- if (e.type_suffix (0).integer_p)
- optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab;
- else if (e.type_suffix (1).integer_p)
- optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab;
- else if (e.type_suffix (0).element_bits
- < e.type_suffix (1).element_bits)
- optab = trunc_optab;
+ if (e.fpm_mode == aarch64_sve::FPM_set)
+ icode = code_for_aarch64_sme2_fp8_cvt (mode1);
else
- optab = sext_optab;
- icode = convert_optab_handler (optab, mode0, mode1);
+ {
+ convert_optab optab;
+ if (e.type_suffix (0).integer_p)
+ optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab;
+ else if (e.type_suffix (1).integer_p)
+ optab = e.type_suffix (1).unsigned_p ? ufloat_optab
+ : sfloat_optab;
+ else if (e.type_suffix (0).element_bits
+ < e.type_suffix (1).element_bits)
+ optab = trunc_optab;
+ else
+ optab = sext_optab;
+ icode = convert_optab_handler (optab, mode0, mode1);
+ }
gcc_assert (icode != CODE_FOR_nothing);
return e.use_exact_insn (icode);
}
#define REQUIRED_EXTENSIONS \
streaming_only (AARCH64_FL_SME2 | AARCH64_FL_FP8)
+DEF_SVE_FUNCTION_GS_FPM (svcvt, unary_convertxn_narrow, cvtn_mf8, x2, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvt, unary_convertxn_narrow, cvtnx_mf8, x4, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtn, unary_convertxn_narrow, cvtnx_mf8, x4, none, set)
DEF_SVE_FUNCTION_GS_FPM (svcvt1, unary_convert, cvt_mf8, x2, none, set)
DEF_SVE_FUNCTION_GS_FPM (svcvt2, unary_convert, cvt_mf8, x2, none, set)
DEF_SVE_FUNCTION_GS_FPM (svcvtl1, unary_convert, cvt_mf8, x2, none, set)
(define_insn "@aarch64_sve2_fp8_cvtn<mode>"
[(set (match_operand:VNx16QI 0 "register_operand" "=w")
(unspec:VNx16QI
- [(match_operand:SVE_FULL_HFx2 1 "aligned_register_operand" "Uw2")
+ [(match_operand:VNx16F_NARROW 1 "aligned_register_operand" "Uw<vector_count>")
(reg:DI FPM_REGNUM)]
UNSPEC_FP8FCVTN))]
- "TARGET_SSVE_FP8"
+ "<MODE>mode == VNx16SFmode ? TARGET_SSME2_FP8 : TARGET_SSVE_FP8"
"<b>fcvtn\t%0.b, %1"
[(set_attr "sve_type" "sve_fp_cvt")]
)
[(set_attr "sve_type" "sve_fp_cvt")]
)
+(define_insn "@aarch64_sme2_fp8_cvt<mode>"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(match_operand:VNx16F_NARROW 1 "aligned_register_operand" "Uw<vector_count>")
+ (reg:DI FPM_REGNUM)]
+ UNSPEC_FCVT))]
+ "TARGET_SSME2_FP8"
+ "<b>fcvt\t%0.b, %1"
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP<-INT] Multi-vector conversions
;; -------------------------------------------------------------------------
(define_mode_iterator VNx2_NARROW [VNx2QI VNx2HI VNx2SI])
(define_mode_iterator VNx2_WIDE [VNx2DI])
+;; Used for narrowing SVE floating point operations.
+(define_mode_iterator VNx16F_NARROW [SVE_FULL_HFx2 VNx16SF])
+
;; All SVE predicate modes.
(define_mode_iterator PRED_ALL [VNx16BI VNx8BI VNx4BI VNx2BI])
UNSPEC_F2CVTL ; Used in aarch64-sve2.md.
UNSPEC_F2CVTLT ; Used in aarch64-sve2.md.
UNSPEC_FADDP ; Used in aarch64-sve2.md.
+ UNSPEC_FCVT ; Used in aarch64-sve2.md.
UNSPEC_FCVTNB ; Used in aarch64-sve2.md.
UNSPEC_FCVTNT ; Used in aarch64-sve2.md.
UNSPEC_FMAXNMP ; Used in aarch64-sve2.md.
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#pragma GCC target "+fp8+bf16"
+#include "test_sme2_acle.h"
+
+/*
+** cvt_z0_z0:
+** msr fpmr, x0
+** bfcvt z0\.b, {z0\.h - z1\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z0, svbfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt_mf8_bf16_x2_fpm (z0, fpm0),
+ z0_res = svcvt_mf8_fpm (z0, fpm0))
+
+/*
+** cvt_z0_z6:
+** msr fpmr, x0
+** bfcvt z0\.b, {z6\.h - z7\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z6, svbfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt_mf8_bf16_x2_fpm (z6, fpm0),
+ z0_res = svcvt_mf8_fpm (z6, fpm0))
+
+/*
+** cvt_z0_z29:
+** msr fpmr, x0
+** mov [^\n]+
+** mov [^\n]+
+** bfcvt z0\.b, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z29, svbfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt_mf8_bf16_x2_fpm (z29, fpm0),
+ z0_res = svcvt_mf8_fpm (z29, fpm0))
+
+/*
+** cvt_z5_z0:
+** msr fpmr, x0
+** bfcvt z5\.b, {z0\.h - z1\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z5_z0, svbfloat16x2_t, svmfloat8_t,
+ z5 = svcvt_mf8_bf16_x2_fpm (z0, fpm0),
+ z5 = svcvt_mf8_fpm (z0, fpm0))
+
+/*
+** cvt_z22_z16:
+** msr fpmr, x0
+** bfcvt z22\.b, {z16\.h - z17\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z22_z16, svbfloat16x2_t, svmfloat8_t,
+ z22 = svcvt_mf8_bf16_x2_fpm (z16, fpm0),
+ z22 = svcvt_mf8_fpm (z16, fpm0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+fp8"
+
+/*
+** cvt_z0_z0:
+** msr fpmr, x0
+** fcvt z0\.b, {z0\.h - z1\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z0, svfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt_mf8_f16_x2_fpm (z0, fpm0),
+ z0_res = svcvt_mf8_fpm (z0, fpm0))
+
+/*
+** cvt_z0_z6:
+** msr fpmr, x0
+** fcvt z0\.b, {z6\.h - z7\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z6, svfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt_mf8_f16_x2_fpm (z6, fpm0),
+ z0_res = svcvt_mf8_fpm (z6, fpm0))
+
+/*
+** cvt_z0_z29:
+** msr fpmr, x0
+** mov [^\n]+
+** mov [^\n]+
+** fcvt z0\.b, {z0\.h - z1\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z0_z29, svfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt_mf8_f16_x2_fpm (z29, fpm0),
+ z0_res = svcvt_mf8_fpm (z29, fpm0))
+
+/*
+** cvt_z5_z0:
+** msr fpmr, x0
+** fcvt z5\.b, {z0\.h - z1\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z5_z0, svfloat16x2_t, svmfloat8_t,
+ z5 = svcvt_mf8_f16_x2_fpm (z0, fpm0),
+ z5 = svcvt_mf8_fpm (z0, fpm0))
+
+/*
+** cvt_z22_z16:
+** msr fpmr, x0
+** fcvt z22\.b, {z16\.h - z17\.h}
+** ret
+*/
+TEST_X2_NARROW (cvt_z22_z16, svfloat16x2_t, svmfloat8_t,
+ z22 = svcvt_mf8_f16_x2_fpm (z16, fpm0),
+ z22 = svcvt_mf8_fpm (z16, fpm0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+fp8"
+
+/*
+** cvt_z0_z0:
+** msr fpmr, x0
+** fcvt z0\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (cvt_z0_z0, svfloat32x4_t, svmfloat8_t,
+ z0_res = svcvt_mf8_f32_x4_fpm (z0, fpm0),
+ z0_res = svcvt_mf8_fpm (z0, fpm0))
+
+/*
+** cvt_z0_z4:
+** msr fpmr, x0
+** fcvt z0\.b, {z4\.s - z7\.s}
+** ret
+*/
+TEST_X4_NARROW (cvt_z0_z4, svfloat32x4_t, svmfloat8_t,
+ z0_res = svcvt_mf8_f32_x4_fpm (z4, fpm0),
+ z0_res = svcvt_mf8_fpm (z4, fpm0))
+
+/*
+** cvt_z0_z21:
+** msr fpmr, x0
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvt z0\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (cvt_z0_z21, svfloat32x4_t, svmfloat8_t,
+ z0_res = svcvt_mf8_f32_x4_fpm (z21, fpm0),
+ z0_res = svcvt_mf8_fpm (z21, fpm0))
+
+/*
+** cvt_z25_z26:
+** msr fpmr, x0
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvt z25\.b, {z28\.s - z31\.s}
+** ret
+*/
+TEST_X4_NARROW (cvt_z25_z26, svfloat32x4_t, svmfloat8_t,
+ z25 = svcvt_mf8_f32_x4_fpm (z26, fpm0),
+ z25 = svcvt_mf8_fpm (z26, fpm0))
+
+/*
+** cvt_z25_z0:
+** msr fpmr, x0
+** fcvt z25\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (cvt_z25_z0, svfloat32x4_t, svmfloat8_t,
+ z25 = svcvt_mf8_f32_x4_fpm (z0, fpm0),
+ z25 = svcvt_mf8_fpm (z0, fpm0))
+
+/*
+** cvt_z22_z16:
+** msr fpmr, x0
+** fcvt z22\.b, {z16\.s - z19\.s}
+** ret
+*/
+TEST_X4_NARROW (cvt_z22_z16, svfloat32x4_t, svmfloat8_t,
+ z22_res = svcvt_mf8_f32_x4_fpm (z16, fpm0),
+ z22_res = svcvt_mf8_fpm (z16, fpm0))
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+#pragma GCC target "+fp8"
+
+/*
+** cvtn_z0_z0:
+** msr fpmr, x0
+** fcvtn z0\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (cvtn_z0_z0, svfloat32x4_t, svmfloat8_t,
+ z0_res = svcvtn_mf8_f32_x4_fpm (z0, fpm0),
+ z0_res = svcvtn_mf8_fpm (z0, fpm0))
+
+/*
+** cvtn_z0_z4:
+** msr fpmr, x0
+** fcvtn z0\.b, {z4\.s - z7\.s}
+** ret
+*/
+TEST_X4_NARROW (cvtn_z0_z4, svfloat32x4_t, svmfloat8_t,
+ z0_res = svcvtn_mf8_f32_x4_fpm (z4, fpm0),
+ z0_res = svcvtn_mf8_fpm (z4, fpm0))
+
+/*
+** cvtn_z0_z21:
+** msr fpmr, x0
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvtn z0\.b, [^\n]+
+** ret
+*/
+TEST_X4_NARROW (cvtn_z0_z21, svfloat32x4_t, svmfloat8_t,
+ z0_res = svcvtn_mf8_f32_x4_fpm (z21, fpm0),
+ z0_res = svcvtn_mf8_fpm (z21, fpm0))
+
+/*
+** cvtn_z25_z26:
+** msr fpmr, x0
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** fcvtn z25\.b, {z28\.s - z31\.s}
+** ret
+*/
+TEST_X4_NARROW (cvtn_z25_z26, svfloat32x4_t, svmfloat8_t,
+ z25 = svcvtn_mf8_f32_x4_fpm (z26, fpm0),
+ z25 = svcvtn_mf8_fpm (z26, fpm0))
+
+/*
+** cvtn_z25_z0:
+** msr fpmr, x0
+** fcvtn z25\.b, {z0\.s - z3\.s}
+** ret
+*/
+TEST_X4_NARROW (cvtn_z25_z0, svfloat32x4_t, svmfloat8_t,
+ z25 = svcvtn_mf8_f32_x4_fpm (z0, fpm0),
+ z25 = svcvtn_mf8_fpm (z0, fpm0))
+
+/*
+** cvtn_z22_z16:
+** msr fpmr, x0
+** fcvtn z22\.b, {z16\.s - z19\.s}
+** ret
+*/
+TEST_X4_NARROW (cvtn_z22_z16, svfloat32x4_t, svmfloat8_t,
+ z22_res = svcvtn_mf8_f32_x4_fpm (z16, fpm0),
+ z22_res = svcvtn_mf8_fpm (z16, fpm0))
#define TEST_X2_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
PROTO (NAME, void, ()) \
{ \
+ register fpm_t fpm0 __asm ("x0"); \
register TTYPE z0 __asm ("z0"); \
register ZTYPE z5 __asm ("z5"); \
register TTYPE z6 __asm ("z6"); \
#define TEST_X4_NARROW(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
PROTO (NAME, void, ()) \
{ \
+ register fpm_t fpm0 __asm ("x0"); \
register TTYPE z0 __asm ("z0"); \
register TTYPE z4 __asm ("z4"); \
register TTYPE z16 __asm ("z16"); \