From 8dec7c94e121c278cfef3352924b00fe138898dd Mon Sep 17 00:00:00 2001 From: taylorswift Date: Wed, 31 Jan 2024 02:37:11 +0000 Subject: [PATCH] fix broken SIMD acceleration on apple silicon --- Sources/LZ77/SIMD16 (ext).swift | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/Sources/LZ77/SIMD16 (ext).swift b/Sources/LZ77/SIMD16 (ext).swift index 1bf697e5..b1816a92 100644 --- a/Sources/LZ77/SIMD16 (ext).swift +++ b/Sources/LZ77/SIMD16 (ext).swift @@ -4,7 +4,7 @@ import _Builtin_intrinsics.intel -extension SIMD16 where Scalar == UInt8 +extension SIMD16 { func find(_ key:UInt8) -> UInt16 { @@ -18,19 +18,25 @@ extension SIMD16 where Scalar == UInt8 #else -extension SIMD16 where Scalar == UInt8 +extension SIMD16 { func find(_ key:UInt8) -> UInt16 { - // (key: 5, vector: (1, 5, 1, 1, 5, 5, 1, 1, 1, 1, 1, 1, 5, 1, 1, 5)) - let places:SIMD16 = - .init(128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1), - match:SIMD16 = places.replacing(with: 0, where: self .!= key) - // match: ( 0, 64, 0, 0, 8, 4, 0, 0, 0, 0, 0, 0, 8, 0, 0, 1) - let r8:SIMD8 = match.evenHalf | match.oddHalf, - r4:SIMD4 = r8.evenHalf | r8.oddHalf, - r2:SIMD2 = r4.evenHalf | r4.oddHalf - return .init(r2.x) << 8 | .init(r2.y) + // key: 5 + // vector: + // (1, 5, 1, 1, 5, 5, 1, 1, + // 1, 1, 1, 1, 5, 1, 1, 5) + let places:SIMD16 = .init( + 1, 2, 4, 8, 16, 32, 64, 128, + 1, 2, 4, 8, 16, 32, 64, 128) + let match:SIMD16 = places.replacing(with: 0, where: self .!= key) + // match: + // (0, 2, 0, 0, 16, 32, 0, 0, + // 0, 0, 0, 0, 16, 0, 0, 128) + let r8:SIMD8 = match.evenHalf | match.oddHalf + let r4:SIMD4 = r8.evenHalf | r8.oddHalf + let r2:SIMD2 = r4.evenHalf | r4.oddHalf + return .init(r2.y) << 8 | .init(r2.x) } }