diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 3d0d6bef76..7dd115fb65 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -288,6 +288,9 @@ endif () if (WAMR_BUILD_LIB_RATS EQUAL 1) message (" Lib rats enabled") endif() +if ((WAMR_BUILD_LIB_SIMDE EQUAL 1)) + message (" Lib simde enabled") +endif() if (WAMR_BUILD_MINI_LOADER EQUAL 1) add_definitions (-DWASM_ENABLE_MINI_LOADER=1) message (" WASM mini loader enabled") diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index 3ab0cff4fb..1c67579940 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -142,6 +142,10 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () +if (WAMR_BUILD_LIB_SIMDE EQUAL 1) + include (${IWASM_DIR}/libraries/simde/simde.cmake) +endif () + if (WAMR_BUILD_WASM_CACHE EQUAL 1) include (${WAMR_ROOT_DIR}/build-scripts/involve_boringssl.cmake) endif () diff --git a/core/config.h b/core/config.h index 50f7989224..16eec8487b 100644 --- a/core/config.h +++ b/core/config.h @@ -318,6 +318,12 @@ #define WASM_ENABLE_SIMD 0 #endif +/* Disable SIMDe (used in the fast interpreter for SIMD opcodes) +unless used elsewhere */ +#ifndef WASM_ENABLE_SIMDE +#define WASM_ENABLE_SIMDE 0 +#endif + /* GC performance profiling */ #ifndef WASM_ENABLE_GC_PERF_PROFILING #define WASM_ENABLE_GC_PERF_PROFILING 0 diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 0b89edf5e8..0fe7bd4010 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -73,6 +73,12 @@ STORE_U8(void *addr, uint8_t value) *(uint8 *)addr = value; } +static inline void +STORE_V128(void *addr, V128 value) +{ + *(V128 *)addr = value; +} + /* For LOAD opcodes */ #define LOAD_I64(addr) (*(int64 *)(addr)) #define LOAD_F64(addr) (*(float64 *)(addr)) @@ -80,6 +86,7 @@ STORE_U8(void *addr, uint8_t value) #define LOAD_U32(addr) (*(uint32 *)(addr)) #define LOAD_I16(addr) (*(int16 *)(addr)) #define LOAD_U16(addr) (*(uint16 *)(addr)) +#define LOAD_V128(addr) (*(V128 *)(addr)) #define STORE_PTR(addr, ptr) \ do { \ @@ -264,7 +271,67 @@ STORE_U16(void *addr, uint16_t value) ((uint8_t *)(addr))[0] = u.u8[0]; ((uint8_t *)(addr))[1] = u.u8[1]; } + +static inline void +STORE_V128(void *addr, V128 value) +{ + uintptr_t addr_ = (uintptr_t)(addr); + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + + if ((addr_ & (uintptr_t)15) == 0) { + *(V128 *)addr = value; + } + else { + u.val = value; + if ((addr_ & (uintptr_t)7) == 0) { + ((uint64 *)(addr))[0] = u.u64[0]; + ((uint64 *)(addr))[1] = u.u64[1]; + } + else { + bh_assert((addr_ & (uintptr_t)3) == 0); + ((uint32 *)addr)[0] = u.u32[0]; + ((uint32 *)addr)[1] = u.u32[1]; + ((uint32 *)addr)[2] = u.u32[2]; + ((uint32 *)addr)[3] = u.u32[3]; + } + } +} + /* For LOAD opcodes */ +static inline V128 +LOAD_V128(void *addr) +{ + uintptr_t addr1 = (uintptr_t)addr; + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + if ((addr1 & (uintptr_t)15) == 0) + return *(V128 *)addr; + + if ((addr1 & (uintptr_t)7) == 0) { + u.u64[0] = ((uint64 *)addr)[0]; + u.u64[1] = ((uint64 *)addr)[1]; + } + else { + bh_assert((addr1 & (uintptr_t)3) == 0); + u.u32[0] = ((uint32 *)addr)[0]; + u.u32[1] = ((uint32 *)addr)[1]; + u.u32[2] = ((uint32 *)addr)[2]; + u.u32[3] = ((uint32 *)addr)[3]; + } + return u.val; +} + static inline int64 LOAD_I64(void *addr) { diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 9d3b743382..b5620d8902 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -21,6 +21,10 @@ #include "../common/wasm_shared_memory.h" #endif +#if WASM_ENABLE_SIMDE != 0 +#include "simde/wasm/simd128.h" +#endif + typedef int32 CellType_I32; typedef int64 CellType_I64; typedef float32 CellType_F32; @@ -5646,7 +5650,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #endif goto call_func_from_entry; } -#if WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_SIMDE != 0 +#define SIMD_V128_TO_SIMDE_V128(v) \ + ({ \ + bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ + simde_v128_t result; \ + bh_memcpy_s(&result, sizeof(simde_v128_t), &(v), sizeof(V128)); \ + result; \ + }) + +#define SIMDE_V128_TO_SIMD_V128(sv, v) \ + do { \ + bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ + bh_memcpy_s(&(v), sizeof(V128), &(sv), sizeof(simde_v128_t)); \ + } while (0) + HANDLE_OP(WASM_OP_SIMD_PREFIX) { GET_OPCODE(); @@ -5654,19 +5672,129 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, switch (opcode) { /* Memory */ case SIMD_v128_load: + { + uint32 offset, addr; + offset = read_uint32( + frame_ip); // TODO: Check with an offset! + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); + break; + } +#define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + V128 reversed_result; \ + for (int i = 0; i < num_elements; i++) { \ + reversed_result.i##element_size##x##num_elements[i] = \ + result.i##element_size##x##num_elements[num_elements - 1 - i]; \ + } \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); \ + \ + break; \ + } while (0) case SIMD_v128_load8x8_s: + { + SIMD_LOAD_OP(SIMD_v128_load8x8_s, + simde_wasm_i16x8_load8x8, 16, 8); + break; + } case SIMD_v128_load8x8_u: + { + SIMD_LOAD_OP(SIMD_v128_load8x8_u, + simde_wasm_u16x8_load8x8, 16, 8); + break; + } case SIMD_v128_load16x4_s: + { + SIMD_LOAD_OP(SIMD_v128_load16x4_s, + simde_wasm_i32x4_load16x4, 32, 4); + break; + } case SIMD_v128_load16x4_u: + { + SIMD_LOAD_OP(SIMD_v128_load16x4_u, + simde_wasm_u32x4_load16x4, 32, 4); + break; + } case SIMD_v128_load32x2_s: + { + SIMD_LOAD_OP(SIMD_v128_load32x2_s, + simde_wasm_i64x2_load32x2, 64, 2); + break; + } case SIMD_v128_load32x2_u: + { + SIMD_LOAD_OP(SIMD_v128_load32x2_u, + simde_wasm_u64x2_load32x2, 64, 2); + break; + } +#define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + case SIMD_v128_load8_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load8_splat, + simde_wasm_v128_load8_splat); + break; + } case SIMD_v128_load16_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load16_splat, + simde_wasm_v128_load16_splat); + break; + } case SIMD_v128_load32_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load32_splat, + simde_wasm_v128_load32_splat); + break; + } case SIMD_v128_load64_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load64_splat, + simde_wasm_v128_load64_splat); + break; + } case SIMD_v128_store: { - wasm_set_exception(module, "unsupported SIMD opcode"); + uint32 offset, addr; + offset = read_uint32(frame_ip); + frame_ip += 2; + addr = GET_OPERAND(uint32, I32, 0); + + V128 data; + data = POP_V128(); + + CHECK_MEMORY_OVERFLOW(16); + STORE_V128(maddr, data); break; } @@ -5681,25 +5809,100 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, *(V128 *)orig_ip); break; } + // TODO: Add a faster SIMD implementation case SIMD_v8x16_shuffle: - case SIMD_v8x16_swizzle: { - wasm_set_exception(module, "unsupported SIMD opcode"); + V128 indices; + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + bh_memcpy_s(&indices, sizeof(V128), frame_ip, + sizeof(V128)); + frame_ip += sizeof(V128); + + V128 result; + for (int i = 0; i < 16; i++) { + uint8_t index = indices.i8x16[i]; + if (index < 16) { + result.i8x16[i] = v1.i8x16[index]; + } + else { + result.i8x16[i] = v2.i8x16[index - 16]; + } + } + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } + case SIMD_v8x16_swizzle: + { + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + simde_v128_t simde_result = simde_wasm_i8x16_swizzle( + SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + } /* Splat */ +#define SIMD_SPLAT_OP(simde_func, pop_func, val_type) \ + do { \ + val_type val = pop_func(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(val); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + +#define SIMD_SPLAT_OP_I32(simde_func) SIMD_SPLAT_OP(simde_func, POP_I32, uint32) +#define SIMD_SPLAT_OP_I64(simde_func) SIMD_SPLAT_OP(simde_func, POP_I64, uint64) +#define SIMD_SPLAT_OP_F32(simde_func) \ + SIMD_SPLAT_OP(simde_func, POP_F32, float32) +#define SIMD_SPLAT_OP_F64(simde_func) \ + SIMD_SPLAT_OP(simde_func, POP_F64, float64) + case SIMD_i8x16_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); + break; + } case SIMD_i16x8_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i16x8_splat); + break; + } case SIMD_i32x4_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i32x4_splat); + break; + } case SIMD_i64x2_splat: + { + SIMD_SPLAT_OP_I64(simde_wasm_i64x2_splat); + break; + } case SIMD_f32x4_splat: + { + SIMD_SPLAT_OP_F32(simde_wasm_f32x4_splat); + break; + } case SIMD_f64x2_splat: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SPLAT_OP_F64(simde_wasm_f64x2_splat); break; } + // TODO: /* Lane */ case SIMD_i8x16_extract_lane_s: case SIMD_i8x16_extract_lane_u: @@ -5720,89 +5923,238 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } +#define SIMD_DOUBLE_OP(simde_func) \ + do { \ + V128 v1 = POP_V128(); \ + V128 v2 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1), \ + SIMD_V128_TO_SIMDE_V128(v2)); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - V128 v1 = POP_V128(); - V128 v2 = POP_V128(); - int i; - addr_ret = GET_OFFSET(); - - V128 result; - for (i = 0; i < 16; i++) { - result.i8x16[i] = - v1.i8x16[i] == v2.i8x16[i] ? 0xff : 0; - } - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); break; } case SIMD_i8x16_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_ne); + break; + } case SIMD_i8x16_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); + break; + } case SIMD_i8x16_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); + break; + } case SIMD_i8x16_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); + break; + } case SIMD_i8x16_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); + break; + } case SIMD_i8x16_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_le); + break; + } case SIMD_i8x16_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_le); + break; + } case SIMD_i8x16_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); + break; + } case SIMD_i8x16_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); break; } /* i16x8 comparison operations */ case SIMD_i16x8_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_eq); + break; + } case SIMD_i16x8_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_ne); + break; + } case SIMD_i16x8_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); + break; + } case SIMD_i16x8_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); + break; + } case SIMD_i16x8_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); + break; + } case SIMD_i16x8_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); + break; + } case SIMD_i16x8_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_le); + break; + } case SIMD_i16x8_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_le); + break; + } case SIMD_i16x8_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); + break; + } case SIMD_i16x8_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); break; } /* i32x4 comparison operations */ case SIMD_i32x4_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_eq); + break; + } case SIMD_i32x4_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_ne); + break; + } case SIMD_i32x4_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); + break; + } case SIMD_i32x4_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); + break; + } case SIMD_i32x4_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); + break; + } case SIMD_i32x4_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); + break; + } case SIMD_i32x4_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_le); + break; + } case SIMD_i32x4_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_le); + break; + } case SIMD_i32x4_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); + break; + } case SIMD_i32x4_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); break; } /* f32x4 comparison operations */ case SIMD_f32x4_eq: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); + break; + } case SIMD_f32x4_ne: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); + break; + } case SIMD_f32x4_lt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); + break; + } case SIMD_f32x4_gt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); + break; + } case SIMD_f32x4_le: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_le); + break; + } case SIMD_f32x4_ge: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); break; } /* f64x2 comparison operations */ case SIMD_f64x2_eq: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); + break; + } case SIMD_f64x2_ne: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); + break; + } case SIMD_f64x2_lt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); + break; + } case SIMD_f64x2_gt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); + break; + } case SIMD_f64x2_le: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_le); + break; + } case SIMD_f64x2_ge: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); break; } @@ -5856,10 +6208,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, v1.i64x2[1] ^ v2.i64x2[1]); break; } + // TODO: Test case SIMD_v128_bitselect: { - wasm_set_exception(module, "unsupported SIMD opcode"); - break; + V128 v1 = POP_V128(); + V128 v2 = POP_V128(); + V128 v3 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_v128_bitselect( + SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2), + SIMD_V128_TO_SIMDE_V128(v3)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); } case SIMD_v128_any_true: { @@ -5870,6 +6235,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } + // TODO: /* load lane operations */ case SIMD_v128_load8_lane: case SIMD_v128_load16_lane: @@ -5886,209 +6252,808 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } +#define SIMD_SINGLE_OP(simde_func) \ + do { \ + V128 v1 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1)); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + /* Float conversion */ case SIMD_f32x4_demote_f64x2_zero: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_demote_f64x2_zero); + break; + } case SIMD_f64x2_promote_low_f32x4_zero: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_f64x2_promote_low_f32x4); break; } /* i8x16 operations */ case SIMD_i8x16_abs: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_abs); + break; + } case SIMD_i8x16_neg: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_neg); + break; + } case SIMD_i8x16_popcnt: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_popcnt); + break; + } case SIMD_i8x16_all_true: { - V128 v = POP_V128(); - uint8_t *bytes = (uint8_t *)&v; - bool all_true = true; + V128 v1 = POP_V128(); - for (int i = 0; i < 16; i++) { - if (bytes[i] == 0) { - all_true = false; - break; - } - } + bool result = simde_wasm_i8x16_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); - PUSH_I32(all_true ? 1 : 0); + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; break; } case SIMD_i8x16_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i8x16_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i8x16_narrow_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + break; + } case SIMD_i8x16_narrow_i16x8_u: - case SIMD_f32x4_ceil: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + break; + } + case SIMD_f32x4_ceil: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_ceil); + break; + } case SIMD_f32x4_floor: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_floor); + break; + } case SIMD_f32x4_trunc: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_trunc); + break; + } case SIMD_f32x4_nearest: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_nearest); + break; + } +// TODO: Check count? +#define SIMD_LANE_SHIFT(simde_func) \ + do { \ + int32 count = POP_I32(); \ + V128 v1 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = \ + simde_func(SIMD_V128_TO_SIMDE_V128(v1), count); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) case SIMD_i8x16_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shl); + break; + } case SIMD_i8x16_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); + break; + } case SIMD_i8x16_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); + break; + } case SIMD_i8x16_add: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add); + break; + } case SIMD_i8x16_add_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + break; + } case SIMD_i8x16_add_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + break; + } case SIMD_i8x16_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub); + break; + } case SIMD_i8x16_sub_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + break; + } case SIMD_i8x16_sub_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + break; + } case SIMD_f64x2_ceil: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_ceil); + break; + } case SIMD_f64x2_floor: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_floor); + break; + } case SIMD_i8x16_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + break; + } case SIMD_i8x16_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + break; + } case SIMD_i8x16_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + break; + } case SIMD_i8x16_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + break; + } case SIMD_f64x2_trunc: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_trunc); + break; + } case SIMD_i8x16_avgr_u: + { + SIMD_DOUBLE_OP(simde_wasm_u8x16_avgr); + break; + } case SIMD_i16x8_extadd_pairwise_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + break; + } case SIMD_i16x8_extadd_pairwise_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + break; + } case SIMD_i32x4_extadd_pairwise_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); + break; + } case SIMD_i32x4_extadd_pairwise_i16x8_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); break; } /* i16x8 operations */ case SIMD_i16x8_abs: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_abs); + break; + } case SIMD_i16x8_neg: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_neg); + break; + } case SIMD_i16x8_q15mulr_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_q15mulr_sat); + break; + } case SIMD_i16x8_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i16x8_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i16x8_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i16x8_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i16x8_narrow_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + break; + } case SIMD_i16x8_narrow_i32x4_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + break; + } case SIMD_i16x8_extend_low_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + break; + } case SIMD_i16x8_extend_high_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + break; + } case SIMD_i16x8_extend_low_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + break; + } case SIMD_i16x8_extend_high_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + break; + } case SIMD_i16x8_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shl); + break; + } case SIMD_i16x8_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); + break; + } case SIMD_i16x8_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); + break; + } case SIMD_i16x8_add: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add); + break; + } case SIMD_i16x8_add_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + break; + } case SIMD_i16x8_add_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + break; + } case SIMD_i16x8_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub); + break; + } case SIMD_i16x8_sub_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + break; + } case SIMD_i16x8_sub_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + break; + } case SIMD_f64x2_nearest: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_nearest); + break; + } case SIMD_i16x8_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_mul); + break; + } case SIMD_i16x8_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + break; + } case SIMD_i16x8_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + break; + } case SIMD_i16x8_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + break; + } case SIMD_i16x8_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + break; + } case SIMD_i16x8_avgr_u: + { + SIMD_DOUBLE_OP(simde_wasm_u16x8_avgr); + break; + } case SIMD_i16x8_extmul_low_i8x16_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + break; + } case SIMD_i16x8_extmul_high_i8x16_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); + break; + } case SIMD_i16x8_extmul_low_i8x16_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + break; + } case SIMD_i16x8_extmul_high_i8x16_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); break; } /* i32x4 operations */ case SIMD_i32x4_abs: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_abs); + break; + } case SIMD_i32x4_neg: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_neg); + break; + } case SIMD_i32x4_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i32x4_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i32x4_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i32x4_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i32x4_extend_low_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + break; + } case SIMD_i32x4_extend_high_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + break; + } case SIMD_i32x4_extend_low_i16x8_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + break; + } case SIMD_i32x4_extend_high_i16x8_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + break; + } case SIMD_i32x4_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shl); + break; + } case SIMD_i32x4_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); + break; + } case SIMD_i32x4_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); + break; + } case SIMD_i32x4_add: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_add); + break; + } case SIMD_i32x4_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_sub); + break; + } case SIMD_i32x4_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_mul); + break; + } case SIMD_i32x4_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + break; + } case SIMD_i32x4_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + break; + } case SIMD_i32x4_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + break; + } case SIMD_i32x4_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + break; + } case SIMD_i32x4_dot_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_dot_i16x8); + break; + } case SIMD_i32x4_extmul_low_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + break; + } case SIMD_i32x4_extmul_high_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); + break; + } case SIMD_i32x4_extmul_low_i16x8_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + break; + } case SIMD_i32x4_extmul_high_i16x8_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); break; } /* i64x2 operations */ case SIMD_i64x2_abs: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_abs); + break; + } case SIMD_i64x2_neg: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_neg); + break; + } case SIMD_i64x2_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i64x2_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i64x2_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i64x2_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i64x2_extend_low_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + break; + } case SIMD_i64x2_extend_high_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + break; + } case SIMD_i64x2_extend_low_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + break; + } case SIMD_i64x2_extend_high_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + break; + } + + // TODO: Verify count works case SIMD_i64x2_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shl); + break; + } case SIMD_i64x2_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); + break; + } case SIMD_i64x2_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); + break; + } case SIMD_i64x2_add: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_add); + break; + } case SIMD_i64x2_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_sub); + break; + } case SIMD_i64x2_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_mul); + break; + } case SIMD_i64x2_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_eq); + break; + } case SIMD_i64x2_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_ne); + break; + } case SIMD_i64x2_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_lt); + break; + } case SIMD_i64x2_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_gt); + break; + } case SIMD_i64x2_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_le); + break; + } case SIMD_i64x2_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_ge); + break; + } case SIMD_i64x2_extmul_low_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + break; + } case SIMD_i64x2_extmul_high_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); + break; + } case SIMD_i64x2_extmul_low_i32x4_u: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + break; + } case SIMD_i64x2_extmul_high_i32x4_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); break; } /* f32x4 opertions */ case SIMD_f32x4_abs: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_abs); + break; + } case SIMD_f32x4_neg: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_neg); + break; + } case SIMD_f32x4_sqrt: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_sqrt); + break; + } case SIMD_f32x4_add: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_add); + break; + } case SIMD_f32x4_sub: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_sub); + break; + } case SIMD_f32x4_mul: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_mul); + break; + } case SIMD_f32x4_div: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_div); + break; + } case SIMD_f32x4_min: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_min); + break; + } case SIMD_f32x4_max: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_max); + break; + } case SIMD_f32x4_pmin: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_pmin); + break; + } case SIMD_f32x4_pmax: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_pmax); break; } /* f64x2 operations */ case SIMD_f64x2_abs: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_abs); + break; + } case SIMD_f64x2_neg: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_neg); + break; + } case SIMD_f64x2_sqrt: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_sqrt); + break; + } case SIMD_f64x2_add: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_add); + break; + } case SIMD_f64x2_sub: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_sub); + break; + } case SIMD_f64x2_mul: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_mul); + break; + } case SIMD_f64x2_div: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_div); + break; + } case SIMD_f64x2_min: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_min); + break; + } case SIMD_f64x2_max: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_max); + break; + } case SIMD_f64x2_pmin: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_pmin); + break; + } case SIMD_f64x2_pmax: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f64x2_pmax); break; } /* Conversion operations */ case SIMD_i32x4_trunc_sat_f32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + break; + } case SIMD_i32x4_trunc_sat_f32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + break; + } case SIMD_f32x4_convert_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + break; + } case SIMD_f32x4_convert_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + break; + } case SIMD_i32x4_trunc_sat_f64x2_s_zero: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + break; + } case SIMD_i32x4_trunc_sat_f64x2_u_zero: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + break; + } case SIMD_f64x2_convert_low_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); + break; + } case SIMD_f64x2_convert_low_i32x4_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); break; } @@ -6098,6 +7063,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } #endif + HANDLE_OP(WASM_OP_CALL) { #if WASM_ENABLE_THREAD_MGR != 0 diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 5005fc63bf..0bacc31e76 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -14934,6 +14934,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + POP_AND_PUSH(mem_offset_type, VALUE_TYPE_V128); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 func->has_memory_operations = true; @@ -14953,6 +14957,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + POP_V128(); POP_MEM_OFFSET(); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 @@ -14982,12 +14990,17 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, CHECK_BUF1(p, p_end, 16); mask = read_i8x16(p, error_buf, error_buf_size); - p += 16; if (!check_simd_shuffle_mask(mask, error_buf, error_buf_size)) { goto fail; } - +#if WASM_ENABLE_FAST_INTERP != 0 + uint64 high, low; + wasm_runtime_read_v128(p, &high, &low); + emit_uint64(loader_ctx, high); + emit_uint64(loader_ctx, low); +#endif + p += 16; POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; } @@ -15058,7 +15071,6 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf_size)) { goto fail; } - if (replace[opcode1 - SIMD_i8x16_extract_lane_s]) { if (!(wasm_loader_pop_frame_ref( loader_ctx, diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 1424840e79..75d30c9b31 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -779,10 +779,10 @@ typedef enum WASMAtomicEXTOpcode { #else #define DEF_DEBUG_BREAK_HANDLE() #endif - #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ +#if (WASM_ENABLE_JIT != 0 \ + || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake new file mode 100644 index 0000000000..b36e356945 --- /dev/null +++ b/core/iwasm/libraries/simde/simde.cmake @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Amazon Inc. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# simde is a header only library + +set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) + +if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") + add_definitions (-DWASM_ENABLE_SIMDE=1) +endif () + +include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) + +include(FetchContent) + +FetchContent_Declare( + simde + GIT_REPOSITORY https://github.com/simd-everywhere/simde + GIT_TAG v0.8.2 +) + +message("-- Fetching simde ..") +FetchContent_MakeAvailable(simde) +include_directories("${simde_SOURCE_DIR}")