From 701302abab4fc04630faf9a58e2cfa677ea331b6 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 11 Jun 2026 09:58:43 +0000 Subject: [PATCH] intrinsic-test: simplify architecture constants There doesn't need to be so many or other modules with the values. --- crates/intrinsic-test/src/arm/config.rs | 25 ---- crates/intrinsic-test/src/arm/mod.rs | 38 ++++- crates/intrinsic-test/src/common/gen_c.rs | 7 +- crates/intrinsic-test/src/common/gen_rust.rs | 5 +- crates/intrinsic-test/src/common/mod.rs | 21 +-- crates/intrinsic-test/src/x86/config.rs | 138 ----------------- crates/intrinsic-test/src/x86/mod.rs | 148 ++++++++++++++++++- 7 files changed, 181 insertions(+), 201 deletions(-) delete mode 100644 crates/intrinsic-test/src/arm/config.rs delete mode 100644 crates/intrinsic-test/src/x86/config.rs diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs deleted file mode 100644 index 7c26143622..0000000000 --- a/crates/intrinsic-test/src/arm/config.rs +++ /dev/null @@ -1,25 +0,0 @@ -pub const NOTICE: &str = "\ -// This is a transient test file, not intended for distribution. Some aspects of the -// test are derived from a JSON specification, published under the same license as the -// `intrinsic-test` crate.\n"; - -pub const PLATFORM_RUST_DEFINITIONS: &str = ""; - -pub const PLATFORM_RUST_CFGS: &str = r#" -#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] -#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] -#![feature(stdarch_neon_f16)] - -#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] -use core_arch::arch::aarch64::*; - -#[cfg(target_arch = "arm")] -use core_arch::arch::arm::*; -"#; diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 378f23ba7c..3dcb5373ed 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -1,4 +1,3 @@ -mod config; mod intrinsic; mod json_parser; mod types; @@ -21,14 +20,20 @@ impl SupportedArchitectureTest for ArmArchitectureTest { &self.intrinsics } - const NOTICE: &str = config::NOTICE; + const NOTICE: &str = r#" +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from a JSON specification, published under the same license as the +// `intrinsic-test` crate. +"#; - const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; + const C_PRELUDE: &str = r#" +#include +#include +#include +"#; + const RUST_PRELUDE: &str = RUST_PRELUDE; - const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; - const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - - fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { + fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { // GCC uses an extra `-` in the arch name match cli_options.cc_arg_style { CcArgStyle::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"], @@ -69,3 +74,22 @@ impl SupportedArchitectureTest for ArmArchitectureTest { Self { intrinsics } } } + +const RUST_PRELUDE: &str = r#" +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] +#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] +#![feature(stdarch_neon_f16)] + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use core_arch::arch::aarch64::*; + +#[cfg(target_arch = "arm")] +use core_arch::arch::arm::*; +"#; diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 24756324c4..92ac97ea6f 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -17,17 +17,14 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, - platform_headers: &[&str], + prelude: &str, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!(w, "{notice}")?; writeln!(w, "#include ")?; writeln!(w, "#include ")?; - - for header in platform_headers { - writeln!(w, "#include <{header}>")?; - } + writeln!(w, "{prelude}")?; for intrinsic in intrinsics { intrinsic.iter_specializations(|imm_values| { diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 02f6e40dc0..2ab6c8408c 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -105,7 +105,6 @@ pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io: pub fn write_lib_rs( w: &mut impl std::io::Write, notice: &str, - cfg: &str, definitions: &str, i: usize, intrinsics: &[Intrinsic], @@ -121,12 +120,10 @@ pub fn write_lib_rs( writeln!(w, "#![allow(non_camel_case_types)]")?; writeln!(w, "#![allow(non_snake_case)]")?; - writeln!(w, "{cfg}")?; + writeln!(w, "{definitions}")?; writeln!(w, "{}", COMMON_RUST_DEFINITIONS)?; - writeln!(w, "{definitions}")?; - let mut seen = std::collections::HashSet::new(); for intrinsic in intrinsics { diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index b577491454..df928fe05c 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -38,12 +38,10 @@ pub trait SupportedArchitectureTest { const NOTICE: &str; - const PLATFORM_C_HEADERS: &[&str]; + const C_PRELUDE: &str; + const RUST_PRELUDE: &str; - const PLATFORM_RUST_CFGS: &str; - const PLATFORM_RUST_DEFINITIONS: &str; - - fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; + fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; fn generate_c_file(&self) { let (max_chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); @@ -55,14 +53,14 @@ pub trait SupportedArchitectureTest { .map(|(i, chunk)| { let c_filename = format!("c_programs/wrapper_{i}.c"); let mut file = File::create(&c_filename).unwrap(); - write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk) + write_wrapper_c(&mut file, Self::NOTICE, Self::C_PRELUDE, chunk) }) .collect::>() .unwrap(); } fn generate_rust_file(&self, cli_options: &ProcessedCli) { - let arch_flags = self.arch_flags(cli_options); + let arch_flags = self.c_compiler_flags(cli_options); std::fs::create_dir_all("rust_programs").unwrap(); @@ -81,14 +79,7 @@ pub trait SupportedArchitectureTest { trace!("generating `{rust_filename}`"); let mut file = File::create(&rust_filename)?; - write_lib_rs( - &mut file, - Self::NOTICE, - Self::PLATFORM_RUST_CFGS, - Self::PLATFORM_RUST_DEFINITIONS, - i, - chunk, - )?; + write_lib_rs(&mut file, Self::NOTICE, Self::RUST_PRELUDE, i, chunk)?; run_rustfmt(&rust_filename); let toml_filename = format!("rust_programs/mod_{i}/Cargo.toml"); diff --git a/crates/intrinsic-test/src/x86/config.rs b/crates/intrinsic-test/src/x86/config.rs deleted file mode 100644 index 68737ab5ac..0000000000 --- a/crates/intrinsic-test/src/x86/config.rs +++ /dev/null @@ -1,138 +0,0 @@ -pub const NOTICE: &str = "\ -// This is a transient test file, not intended for distribution. Some aspects of the -// test are derived from an XML specification, published under the same license as the -// `intrinsic-test` crate.\n"; - -pub const PLATFORM_RUST_DEFINITIONS: &str = r#" -use core_arch::arch::x86_64::*; - -#[inline] -unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { - _mm_castph_si128(_mm_loadu_ph(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { - _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { - _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) -} - - -#[inline] -unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { - _mm_castps_ph(_mm_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { - _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { - _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) -} - -// === -#[inline] -unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) -} - -"#; - -pub const PLATFORM_RUST_CFGS: &str = r#" -#![feature(stdarch_x86_avx512_bf16)] -#![feature(stdarch_x86_avx512_f16)] -#![feature(stdarch_x86_rtm)] -#![feature(x86_amx_intrinsics)] -"#; diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index 288bd8bdf8..1da66be8bd 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -1,4 +1,3 @@ -mod config; mod constraint; mod intrinsic; mod types; @@ -22,14 +21,18 @@ impl SupportedArchitectureTest for X86ArchitectureTest { &self.intrinsics } - const NOTICE: &str = config::NOTICE; + const NOTICE: &str = r#" +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from an XML specification, published under the same license as the +// `intrinsic-test` crate. +"#; - const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; + const C_PRELUDE: &str = r#" +#include +"#; + const RUST_PRELUDE: &str = RUST_PRELUDE; - const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; - const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - - fn arch_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { + fn c_compiler_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { vec![ "-maes", "-mf16c", @@ -97,3 +100,134 @@ impl SupportedArchitectureTest for X86ArchitectureTest { Self { intrinsics } } } + +const RUST_PRELUDE: &str = r#" +#![feature(stdarch_x86_avx512_bf16)] +#![feature(stdarch_x86_avx512_f16)] +#![feature(stdarch_x86_rtm)] +#![feature(x86_amx_intrinsics)] + +use core_arch::arch::x86_64::*; + +#[inline] +unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { + _mm_castph_si128(_mm_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { + _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { + _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) +} + + +#[inline] +unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { + _mm_castps_ph(_mm_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { + _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { + _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) +} + +// === +#[inline] +unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) +} +"#;