FUNCTION (svcvt1, svcvt_fp8_impl, (UNSPEC_F1CVT))
FUNCTION (svcvt2, svcvt_fp8_impl, (UNSPEC_F2CVT))
FUNCTION (svcvtl, svcvtl_impl,)
+FUNCTION (svcvtl1, svcvt_fp8_impl, (UNSPEC_F1CVTL))
+FUNCTION (svcvtl2, svcvt_fp8_impl, (UNSPEC_F2CVTL))
FUNCTION (svcvtlt1, svcvt_fp8_impl, (UNSPEC_F1CVTLT))
FUNCTION (svcvtlt2, svcvt_fp8_impl, (UNSPEC_F2CVTLT))
FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
DEF_SVE_FUNCTION_GS_FPM (svdot, ternary_mfloat8, h_float_mf8, none, none, set)
DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, h_float_mf8, none, none, set)
#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+ streaming_only (AARCH64_FL_SME2 | AARCH64_FL_FP8)
+DEF_SVE_FUNCTION_GS_FPM (svcvt1, unary_convert, cvt_mf8, x2, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvt2, unary_convert, cvt_mf8, x2, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtl1, unary_convert, cvt_mf8, x2, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtl2, unary_convert, cvt_mf8, x2, none, set)
+#undef REQUIRED_EXTENSIONS
extern const function_base *const svcvt1;
extern const function_base *const svcvt2;
extern const function_base *const svcvtl;
+ extern const function_base *const svcvtl1;
+ extern const function_base *const svcvtl2;
extern const function_base *const svcvtlt;
extern const function_base *const svcvtlt1;
extern const function_base *const svcvtlt2;
/* Handle convert-like functions in which the first type suffix is
explicit. */
if (type_suffix_ids[0] != NUM_TYPE_SUFFIXES)
- return resolve_to (mode_suffix_id, type_suffix_ids[0], type);
+ return resolve_to (mode_suffix_id, type_suffix_ids[0], type,
+ group_suffix_id);
return resolve_to (mode_suffix_id, type);
}
[(set_attr "sve_type" "sve_fp_cvt")]
)
+(define_insn "@aarch64_sve2_fp8_cvt_<fp8_cvt_uns_op><mode>"
+ [(set (match_operand:SVE_FULL_HFx2 0 "aligned_register_operand" "=Uw2")
+ (unspec:SVE_FULL_HFx2
+ [(match_operand:VNx16QI 1 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FP8CVT_UNS))]
+ "TARGET_SSME2_FP8"
+ "<b><fp8_cvt_uns_op>\t%0, %1.b"
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP<-FP] Multi-vector narrowing conversions
;; -------------------------------------------------------------------------
(TARGET_SVE2 && TARGET_FP8DOT2) || TARGET_STREAMING) \
&& (AARCH64_HAVE_ISA(SSVE_FP8DOT2) || TARGET_NON_STREAMING))
+#define TARGET_SSME2_FP8 (TARGET_FP8 && TARGET_STREAMING_SME2)
+
/* Standard register usage. */
/* 31 64-bit general purpose registers R0-R30:
UNSPEC_EORBT ; Used in aarch64-sve2.md.
UNSPEC_EORTB ; Used in aarch64-sve2.md.
UNSPEC_F1CVT ; Used in aarch64-sve2.md.
+ UNSPEC_F1CVTL ; Used in aarch64-sve2.md.
UNSPEC_F1CVTLT ; Used in aarch64-sve2.md.
UNSPEC_F2CVT ; Used in aarch64-sve2.md.
+ UNSPEC_F2CVTL ; Used in aarch64-sve2.md.
UNSPEC_F2CVTLT ; Used in aarch64-sve2.md.
UNSPEC_FADDP ; Used in aarch64-sve2.md.
UNSPEC_FCVTNB ; Used in aarch64-sve2.md.
(define_int_iterator FP8CVT_UNS
[UNSPEC_F1CVT
UNSPEC_F2CVT
+ UNSPEC_F1CVTL
+ UNSPEC_F2CVTL
UNSPEC_F1CVTLT
UNSPEC_F2CVTLT])
(define_int_attr fp8_cvt_uns_op
[(UNSPEC_F1CVT "f1cvt")
(UNSPEC_F2CVT "f2cvt")
+ (UNSPEC_F1CVTL "f1cvtl")
+ (UNSPEC_F2CVTL "f2cvtl")
(UNSPEC_F1CVTLT "f1cvtlt")
(UNSPEC_F2CVTLT "f2cvtlt")])
# Turn off any codegen tweaks by default that may affect expected assembly.
# Tests relying on those should turn them on explicitly.
-set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none"
+# Reduce testsuite churn when writing to fmpr
+set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none -moverride=tune=cheap_fpmr_write"
global gcc_runtest_parallelize_limit_minor
if { [info exists gcc_runtest_parallelize_limit_minor] } {
# Turn off any codegen tweaks by default that may affect expected assembly.
# Tests relying on those should turn them on explicitly.
-set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none"
+# Reduce testsuite churn when writing to fmpr
+set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none -moverride=tune=cheap_fpmr_write"
global gcc_runtest_parallelize_limit_minor
if { [info exists gcc_runtest_parallelize_limit_minor] } {
--- /dev/null
+/* { dg-do assemble { target { aarch64_asm_fp8_ok && aarch64_asm_sme2_ok } } } */
+/* { dg-do compile { target { ! { aarch64_asm_fp8_ok && aarch64_asm_sme2_ok } } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+fp8"
+
+/*
+** cvt1_f16_mf8_x2_fpm:
+** msr fpmr, x0
+** f1cvt {z0\.h - z1\.h}, z0\.b
+** ret
+*/
+TEST_X2_WIDE (cvt1_f16_mf8_x2_fpm, svfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt1_f16_mf8_x2_fpm (z0, fpm0),
+ z0_res = svcvt1_f16_x2_fpm (z0, fpm0))
+
+/*
+** cvt1_bf16_mf8_x2_fpm:
+** msr fpmr, x0
+** bf1cvt {z0\.h - z1\.h}, z0\.b
+** ret
+*/
+TEST_X2_WIDE (cvt1_bf16_mf8_x2_fpm, svbfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt1_bf16_mf8_x2_fpm (z0, fpm0),
+ z0_res = svcvt1_bf16_x2_fpm (z0, fpm0))
+
+/*
+** cvt2_f16_mf8_x2_fpm:
+** msr fpmr, x0
+** f2cvt {z0\.h - z1\.h}, z0\.b
+** ret
+*/
+TEST_X2_WIDE (cvt2_f16_mf8_x2_fpm, svfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt2_f16_mf8_x2_fpm (z0, fpm0),
+ z0_res = svcvt2_f16_x2_fpm (z0, fpm0))
+
+/*
+** cvt2_bf16_mf8_x2_fpm:
+** msr fpmr, x0
+** bf2cvt {z0\.h - z1\.h}, z0\.b
+** ret
+*/
+TEST_X2_WIDE (cvt2_bf16_mf8_x2_fpm, svbfloat16x2_t, svmfloat8_t,
+ z0_res = svcvt2_bf16_mf8_x2_fpm (z0, fpm0),
+ z0_res = svcvt2_bf16_x2_fpm (z0, fpm0))
--- /dev/null
+/* { dg-do assemble { target { aarch64_asm_fp8_ok && aarch64_asm_sme2_ok } } } */
+/* { dg-do compile { target { ! { aarch64_asm_fp8_ok && aarch64_asm_sme2_ok } } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+fp8"
+
+/*
+** cvtl1_f16_mf8_x2_fpm:
+** msr fpmr, x0
+** f1cvtl {z0\.h - z1\.h}, z0\.b
+** ret
+*/
+TEST_X2_WIDE (cvtl1_f16_mf8_x2_fpm, svfloat16x2_t, svmfloat8_t,
+ z0_res = svcvtl1_f16_mf8_x2_fpm (z0, fpm0),
+ z0_res = svcvtl1_f16_x2_fpm (z0, fpm0))
+
+/*
+** cvtl1_bf16_mf8_x2_fpm:
+** msr fpmr, x0
+** bf1cvtl {z0\.h - z1\.h}, z0\.b
+** ret
+*/
+TEST_X2_WIDE (cvtl1_bf16_mf8_x2_fpm, svbfloat16x2_t, svmfloat8_t,
+ z0_res = svcvtl1_bf16_mf8_x2_fpm (z0, fpm0),
+ z0_res = svcvtl1_bf16_x2_fpm (z0, fpm0))
+
+/*
+** cvtl2_f16_mf8_x2_fpm:
+** msr fpmr, x0
+** f2cvtl {z0\.h - z1\.h}, z0\.b
+** ret
+*/
+TEST_X2_WIDE (cvtl2_f16_mf8_x2_fpm, svfloat16x2_t, svmfloat8_t,
+ z0_res = svcvtl2_f16_mf8_x2_fpm (z0, fpm0),
+ z0_res = svcvtl2_f16_x2_fpm (z0, fpm0))
+
+/*
+** cvtl2_bf16_mf8_x2_fpm:
+** msr fpmr, x0
+** bf2cvtl {z0\.h - z1\.h}, z0\.b
+** ret
+*/
+TEST_X2_WIDE (cvtl2_bf16_mf8_x2_fpm, svbfloat16x2_t, svmfloat8_t,
+ z0_res = svcvtl2_bf16_mf8_x2_fpm (z0, fpm0),
+ z0_res = svcvtl2_bf16_x2_fpm (z0, fpm0))
#define TEST_X2_WIDE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
PROTO (NAME, void, ()) \
{ \
+ register fpm_t fpm0 __asm ("x0"); \
register ZTYPE z0 __asm ("z0"); \
register ZTYPE z5 __asm ("z5"); \
register TTYPE z6 __asm ("z6"); \