From b08a73ef63c661a526a1859fc8bf47abb65e4c79 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 13 Feb 2025 22:38:41 +0100 Subject: [PATCH 1/5] use generic llvm intrinsics for min/max on powerpc --- crates/core_arch/src/powerpc/altivec.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 7d0081f03a..3eb388d399 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -158,32 +158,32 @@ unsafe extern "C" { #[link_name = "llvm.ppc.altivec.vmulosh"] fn vmulosh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.vmaxsb"] + #[link_name = "llvm.smax.v16i8"] fn vmaxsb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; - #[link_name = "llvm.ppc.altivec.vmaxsh"] + #[link_name = "llvm.smax.v8i16"] fn vmaxsh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; - #[link_name = "llvm.ppc.altivec.vmaxsw"] + #[link_name = "llvm.smax.v4i32"] fn vmaxsw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.vmaxub"] + #[link_name = "llvm.umax.v16i8"] fn vmaxub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; - #[link_name = "llvm.ppc.altivec.vmaxuh"] + #[link_name = "llvm.umax.v8i16"] fn vmaxuh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; - #[link_name = "llvm.ppc.altivec.vmaxuw"] + #[link_name = "llvm.umax.v4i32"] fn vmaxuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; - #[link_name = "llvm.ppc.altivec.vminsb"] + #[link_name = "llvm.smin.v16i8"] fn vminsb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; - #[link_name = "llvm.ppc.altivec.vminsh"] + #[link_name = "llvm.smin.v8i16"] fn vminsh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; - #[link_name = "llvm.ppc.altivec.vminsw"] + #[link_name = "llvm.smin.v4i32"] fn vminsw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.vminub"] + #[link_name = "llvm.umin.v16i8"] fn vminub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; - #[link_name = "llvm.ppc.altivec.vminuh"] + #[link_name = "llvm.umin.v8i16"] fn vminuh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; - #[link_name = "llvm.ppc.altivec.vminuw"] + #[link_name = "llvm.umin.v4i32"] fn vminuw(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; #[link_name = "llvm.ppc.altivec.vsubsbs"] From 1694df9a954e6d35e51a99255c3c773bbeea3e7c Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 13 Feb 2025 22:58:20 +0100 Subject: [PATCH 2/5] use `llvm.nearbyint.v4f32` for rounding see https://godbolt.org/z/Wx1KWezbe to see that these in fact generate equivalent assembly --- crates/core_arch/src/powerpc/altivec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 3eb388d399..242b3ec4d6 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -382,7 +382,7 @@ unsafe extern "C" { #[link_name = "llvm.ppc.altivec.vrlw"] fn vrlw(a: vector_signed_int, c: vector_unsigned_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.vrfin"] + #[link_name = "llvm.nearbyint.v4f32"] fn vrfin(a: vector_float) -> vector_float; } From 752d0ce00b18a1f53c483f1a943e2e2f0c138545 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 13 Feb 2025 23:03:24 +0100 Subject: [PATCH 3/5] use `simd_ctlz` instead of an extern --- crates/core_arch/src/powerpc/altivec.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 242b3ec4d6..466e9879b9 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -368,13 +368,6 @@ unsafe extern "C" { #[link_name = "llvm.ppc.altivec.srv"] fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; - #[link_name = "llvm.ctlz.v16i8"] - fn vclzb(a: vector_signed_char) -> vector_signed_char; - #[link_name = "llvm.ctlz.v8i16"] - fn vclzh(a: vector_signed_short) -> vector_signed_short; - #[link_name = "llvm.ctlz.v4i32"] - fn vclzw(a: vector_signed_int) -> vector_signed_int; - #[link_name = "llvm.ppc.altivec.vrlb"] fn vrlb(a: vector_signed_char, b: vector_unsigned_char) -> vector_signed_char; #[link_name = "llvm.ppc.altivec.vrlh"] @@ -3191,9 +3184,9 @@ mod sealed { impl_vec_shift_octect! { [VectorSro vec_sro] (vsro) } - test_impl! { vec_vcntlzb(a: vector_signed_char) -> vector_signed_char [vclzb, vclzb] } - test_impl! { vec_vcntlzh(a: vector_signed_short) -> vector_signed_short [vclzh, vclzh] } - test_impl! { vec_vcntlzw(a: vector_signed_int) -> vector_signed_int [vclzw, vclzw] } + test_impl! { vec_vcntlzb(a: vector_signed_char) -> vector_signed_char [simd_ctlz, vclzb] } + test_impl! { vec_vcntlzh(a: vector_signed_short) -> vector_signed_short [simd_ctlz, vclzh] } + test_impl! { vec_vcntlzw(a: vector_signed_int) -> vector_signed_int [simd_ctlz, vclzw] } #[unstable(feature = "stdarch_powerpc", issue = "111145")] pub trait VectorCntlz { From 9a1cbcd0160a136e577671feecb18a0d0af54e78 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 22 Feb 2025 21:23:57 +0100 Subject: [PATCH 4/5] use `simd_shuffle` in the implementation of `vec_splat` --- crates/core_arch/src/powerpc/altivec.rs | 19 +++---------------- crates/core_arch/src/simd.rs | 4 ++++ 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 466e9879b9..95c67619fc 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -1455,8 +1455,7 @@ mod sealed { #[cfg_attr(test, assert_instr(vspltb, IMM4 = 15))] unsafe fn vspltb(a: vector_signed_char) -> vector_signed_char { static_assert_uimm_bits!(IMM4, 4); - let b = u8x16::splat(IMM4 as u8); - vec_perm(a, a, transmute(b)) + simd_shuffle(a, a, const { u32x16::from_array([IMM4; 16]) }) } #[inline] @@ -1464,12 +1463,7 @@ mod sealed { #[cfg_attr(test, assert_instr(vsplth, IMM3 = 7))] unsafe fn vsplth(a: vector_signed_short) -> vector_signed_short { static_assert_uimm_bits!(IMM3, 3); - let b0 = IMM3 as u8 * 2; - let b1 = b0 + 1; - let b = u8x16::new( - b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, b0, b1, - ); - vec_perm(a, a, transmute(b)) + simd_shuffle(a, a, const { u32x8::from_array([IMM3; 8]) }) } #[inline] @@ -1478,14 +1472,7 @@ mod sealed { #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxspltw, IMM2 = 3))] unsafe fn vspltw(a: vector_signed_int) -> vector_signed_int { static_assert_uimm_bits!(IMM2, 2); - let b0 = IMM2 as u8 * 4; - let b1 = b0 + 1; - let b2 = b0 + 2; - let b3 = b0 + 3; - let b = u8x16::new( - b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3, b0, b1, b2, b3, - ); - vec_perm(a, a, transmute(b)) + simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) }) } #[unstable(feature = "stdarch_powerpc", issue = "111145")] diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs index 9adc2f5089..a97d45c3bd 100644 --- a/crates/core_arch/src/simd.rs +++ b/crates/core_arch/src/simd.rs @@ -17,6 +17,10 @@ macro_rules! simd_ty { pub(crate) const fn new($($param_name: $elem_type),*) -> Self { $id([$($param_name),*]) } + #[inline(always)] + pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self { + $id(elements) + } // FIXME: Workaround rust@60637 #[inline(always)] pub(crate) fn splat(value: $elem_type) -> Self { From aaa01dae7235364d4ca29f8e548eefbc61f557b0 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 22 Feb 2025 21:27:45 +0100 Subject: [PATCH 5/5] use `simd_neg` in the `impl crate::ops::Neg` on s390x I had some problems with the old implementation not combining with other instructions well, and using the purpose-built intrinsic is just clearer. --- crates/core_arch/src/powerpc/macros.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/core_arch/src/powerpc/macros.rs b/crates/core_arch/src/powerpc/macros.rs index 4dcf558919..af47494e8f 100644 --- a/crates/core_arch/src/powerpc/macros.rs +++ b/crates/core_arch/src/powerpc/macros.rs @@ -298,8 +298,7 @@ macro_rules! impl_neg { impl crate::ops::Neg for s_t_l!($s) { type Output = s_t_l!($s); fn neg(self) -> Self::Output { - let zero = $s::splat($zero); - unsafe { transmute(simd_sub(zero, transmute(self))) } + unsafe { simd_neg(self) } } } };