diff --git a/include/nncase/codegen/stackvm/op_writer.h b/include/nncase/codegen/stackvm/op_writer.h index 873bf170d8..dcff068765 100644 --- a/include/nncase/codegen/stackvm/op_writer.h +++ b/include/nncase/codegen/stackvm/op_writer.h @@ -1,4 +1,4 @@ -/* This file is generated by tools/stackvm_gen/IsaGen at 2023/2/6 下午2:28:54 +08:00. +/* This file is generated by tools/stackvm_gen/IsaGen at 2023/5/9 下午5:18:43 +08:00. * * Copyright 2019-2021 Canaan Inc. * @@ -1543,6 +1543,19 @@ struct op_writer } }; +template <> +struct op_writer +{ + void operator()(const nncase::runtime::stackvm::tensor_instance_normalization_op_t &op, binary_writer &writer) const + { + writer.write(static_cast(op.opcode)); + writer.write(static_cast(op.funct)); + writer.write(static_cast(op.datatype)); + writer.write(op.input_shape); + writer.write(op.epsilon); + } +}; + class NNCASE_API op_builder { public: @@ -1684,6 +1697,7 @@ class NNCASE_API op_builder void tensor_layer_normalization_(datatype_t datatype, uint8_t input_shape, int32_t axis, float epsilon); void tensor_compress_(uint8_t input_shape_src, uint8_t condition_shape_src, float axis); void tensor_gather_elements_(uint8_t input_shape_src, uint8_t indices_shape_src, int32_t axis); + void tensor_instance_normalization_(datatype_t datatype, uint8_t input_shape, float epsilon); private: section_writer &writer_; diff --git a/include/nncase/ir/opcode.def b/include/nncase/ir/opcode.def index 97b8fc47e1..0fd5ccfb26 100644 --- a/include/nncase/ir/opcode.def +++ b/include/nncase/ir/opcode.def @@ -48,7 +48,8 @@ DEFINE_NEUTRAL_OPCODE(roi_align, RoiAlign, 0x126) DEFINE_NEUTRAL_OPCODE(compare, Compare, 0x127) DEFINE_NEUTRAL_OPCODE(softmax, Softmax, 0x128) DEFINE_NEUTRAL_OPCODE(gru, GRU, 0x129) -DEFINE_NEUTRAL_OPCODE(tflite_detection_postprocess, TfliteDetectionPostprocess, 0x12A) +DEFINE_NEUTRAL_OPCODE(tflite_detection_postprocess, TfliteDetectionPostprocess, 0x12A) DEFINE_NEUTRAL_OPCODE(layernorm, LayerNormalization, 0x12B) DEFINE_NEUTRAL_OPCODE(compress, Compress, 0x12C) DEFINE_NEUTRAL_OPCODE(gather_elements, GatherElements, 0x12D) +DEFINE_NEUTRAL_OPCODE(instancenorm, InstanceNormliaztion, 0x12E) diff --git a/include/nncase/ir/ops/instancenorm.h b/include/nncase/ir/ops/instancenorm.h new file mode 100644 index 0000000000..9902f99bc0 --- /dev/null +++ b/include/nncase/ir/ops/instancenorm.h @@ -0,0 +1,39 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "../node.h" +#include "nncase/ir/connectors.h" + +namespace nncase::ir +{ +class NNCASE_API instancenorm : public node +{ +public: + DEFINE_NODE_OPCODE(op_instancenorm); + + input_connector &input() { return input_at(0); } + input_connector &scale() { return input_at(1); } + input_connector &bias() { return input_at(2); } + output_connector &output() { return output_at(0); } + float epsilon() const noexcept { return epsilon_; } + instancenorm(datatype_t input_type, shape_t input_shape, float epsilon); + +protected: + bool properties_equal(node &other) const override; + +private: + float epsilon_; +}; +} diff --git a/include/nncase/kernels/cpu/optimized/tensor_compute.h b/include/nncase/kernels/cpu/optimized/tensor_compute.h index b483a6d60b..89b81f34f1 100644 --- a/include/nncase/kernels/cpu/optimized/tensor_compute.h +++ b/include/nncase/kernels/cpu/optimized/tensor_compute.h @@ -85,6 +85,9 @@ template NNCASE_API result sigmoid(const T *input, T *output, const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides) noexcept; +template +NNCASE_API result instancenorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, float epsilon) noexcept; + template NNCASE_API result layernorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, int32_t axis, float epsilon) noexcept; diff --git a/include/nncase/kernels/cpu/reference/tensor_compute.h b/include/nncase/kernels/cpu/reference/tensor_compute.h index acac82cdab..bc5c3c14db 100644 --- a/include/nncase/kernels/cpu/reference/tensor_compute.h +++ b/include/nncase/kernels/cpu/reference/tensor_compute.h @@ -249,6 +249,11 @@ NNCASE_API result gather_elements(const TI *input, const TK *indices, TI *output, const runtime_shape_t &in_shape, const runtime_shape_t &indices_shape, const int axis) noexcept; +template +NNCASE_API result +instancenorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, + float epsilon) noexcept; + template NNCASE_API result layernorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, int32_t axis, diff --git a/include/nncase/kernels/tensor_compute.h b/include/nncase/kernels/tensor_compute.h index e722e3e383..6f9c6c8a3d 100644 --- a/include/nncase/kernels/tensor_compute.h +++ b/include/nncase/kernels/tensor_compute.h @@ -170,6 +170,9 @@ template NNCASE_API result gather_elements(const TI *input, const TK *indices, TI *output, const runtime_shape_t &in_shape, const runtime_shape_t &indices_shape, const int axis) noexcept; +template +NNCASE_API result instancenorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, float epsilon) noexcept; + template NNCASE_API result layernorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, int32_t axis, float epsilon) noexcept; diff --git a/include/nncase/runtime/stackvm/op_reader.h b/include/nncase/runtime/stackvm/op_reader.h index 63f4fe8250..5246ab4ea7 100644 --- a/include/nncase/runtime/stackvm/op_reader.h +++ b/include/nncase/runtime/stackvm/op_reader.h @@ -1,4 +1,4 @@ -/* This file is generated by tools/stackvm_gen/IsaGen at 2023/2/6 下午2:28:53 +08:00. +/* This file is generated by tools/stackvm_gen/IsaGen at 2023/5/9 下午5:18:43 +08:00. * * Copyright 2019-2021 Canaan Inc. * @@ -1813,6 +1813,21 @@ struct op_reader } }; +template <> +struct op_reader +{ + tensor_instance_normalization_op_t operator()(span_reader &reader) const + { + tensor_instance_normalization_op_t op(default_init); + op.opcode = static_cast(reader.read_unaligned()); + op.funct = static_cast(reader.read_unaligned()); + op.datatype = static_cast(reader.read_unaligned()); + op.input_shape = reader.read_unaligned(); + op.epsilon = reader.read_unaligned(); + return op; + } +}; + class NNCASE_API op_visitor { public: @@ -1959,6 +1974,7 @@ class NNCASE_API op_visitor virtual result visit(NNCASE_UNUSED const tensor_layer_normalization_op_t &op) noexcept { return ok(); } virtual result visit(NNCASE_UNUSED const tensor_compress_op_t &op) noexcept { return ok(); } virtual result visit(NNCASE_UNUSED const tensor_gather_elements_op_t &op) noexcept { return ok(); } + virtual result visit(NNCASE_UNUSED const tensor_instance_normalization_op_t &op) noexcept { return ok(); } protected: bool interrupted_; diff --git a/include/nncase/runtime/stackvm/opcode.h b/include/nncase/runtime/stackvm/opcode.h index 0707d72c28..3263a7f0af 100644 --- a/include/nncase/runtime/stackvm/opcode.h +++ b/include/nncase/runtime/stackvm/opcode.h @@ -1,4 +1,4 @@ -/* This file is generated by tools/stackvm_gen/IsaGen at 2023/2/6 下午2:28:53 +08:00. +/* This file is generated by tools/stackvm_gen/IsaGen at 2023/5/9 下午5:18:43 +08:00. * * Copyright 2019-2021 Canaan Inc. * @@ -166,6 +166,7 @@ enum class tensor_function_t LAYER_NORMALIZATION = 0x0029, COMPRESS = 0x002A, GATHER_ELEMENTS = 0x002B, + INSTANCE_NORMALIZATION = 0x002C, }; // Instructions @@ -1958,4 +1959,19 @@ struct tensor_gather_elements_op_t } }; +struct tensor_instance_normalization_op_t +{ + opcode_t opcode; + tensor_function_t funct; + datatype_t datatype; + uint8_t input_shape; + float epsilon; + + tensor_instance_normalization_op_t(default_init_t) noexcept { } + explicit tensor_instance_normalization_op_t(datatype_t datatype, uint8_t input_shape, float epsilon) noexcept + : opcode(opcode_t::TENSOR), funct(tensor_function_t::INSTANCE_NORMALIZATION), datatype(datatype), input_shape(input_shape), epsilon(epsilon) + { + } +}; + END_NS_NNCASE_RT_MODULE diff --git a/include/nncase/transforms/neutral/fold_instancenorm.h b/include/nncase/transforms/neutral/fold_instancenorm.h new file mode 100644 index 0000000000..f01ee2541c --- /dev/null +++ b/include/nncase/transforms/neutral/fold_instancenorm.h @@ -0,0 +1,29 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "../transform.h" + +namespace nncase::ir::transforms +{ +class NNCASE_API fold_instancenorm_transform : public transform +{ +public: + void process(transform_context &context) override; + +protected: + bool skip_self_contained_check() const noexcept override { return true; } + bool on_try_match(ir::node &node, transform_context &context) override; +}; +} diff --git a/src/codegen/stackvm/CMakeLists.txt b/src/codegen/stackvm/CMakeLists.txt index d0d58297ff..f99e2483f6 100644 --- a/src/codegen/stackvm/CMakeLists.txt +++ b/src/codegen/stackvm/CMakeLists.txt @@ -41,7 +41,8 @@ set(SRCS module_builder.cpp ops/trilu.cpp ops/tflite_detection_postprocess.cpp ops/unary.cpp - ops/layernorm.cpp) + ops/layernorm.cpp + ops/instancenorm.cpp) add_library(codegen_stackvm OBJECT ${SRCS}) target_link_libraries(codegen_stackvm PUBLIC ir schedule) diff --git a/src/codegen/stackvm/module_builder.h b/src/codegen/stackvm/module_builder.h index 1e0b52c075..5bf1d712b4 100644 --- a/src/codegen/stackvm/module_builder.h +++ b/src/codegen/stackvm/module_builder.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/src/codegen/stackvm/op_writer.cpp b/src/codegen/stackvm/op_writer.cpp index 8fa3f49b03..8a6a38dd9d 100644 --- a/src/codegen/stackvm/op_writer.cpp +++ b/src/codegen/stackvm/op_writer.cpp @@ -1,4 +1,4 @@ -/* This file is generated by tools/stackvm_gen/IsaGen at 2023/2/6 下午2:28:54 +08:00. +/* This file is generated by tools/stackvm_gen/IsaGen at 2023/5/9 下午5:18:43 +08:00. * * Copyright 2019-2021 Canaan Inc. * @@ -697,3 +697,8 @@ void op_builder::tensor_gather_elements_(uint8_t input_shape_src, uint8_t indice { op_writer()(tensor_gather_elements_op_t(input_shape_src, indices_shape_src, axis), writer_); } + +void op_builder::tensor_instance_normalization_(datatype_t datatype, uint8_t input_shape, float epsilon) +{ + op_writer()(tensor_instance_normalization_op_t(datatype, input_shape, epsilon), writer_); +} diff --git a/src/codegen/stackvm/ops.def b/src/codegen/stackvm/ops.def index 25a77ffb29..a30ed404bf 100644 --- a/src/codegen/stackvm/ops.def +++ b/src/codegen/stackvm/ops.def @@ -37,4 +37,5 @@ DEFINE_OP(transpose) DEFINE_OP(trilu) DEFINE_OP(tflite_detection_postprocess) DEFINE_OP(unary) -DEFINE_OP(layernorm) \ No newline at end of file +DEFINE_OP(layernorm) +DEFINE_OP(instancenorm) \ No newline at end of file diff --git a/src/codegen/stackvm/ops/instancenorm.cpp b/src/codegen/stackvm/ops/instancenorm.cpp new file mode 100644 index 0000000000..4f2c323410 --- /dev/null +++ b/src/codegen/stackvm/ops/instancenorm.cpp @@ -0,0 +1,37 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../module_builder.h" + +using namespace nncase; +using namespace nncase::codegen; +using namespace nncase::codegen::stackvm; +using namespace nncase::ir; + +void stackvm_module_builder::emit(instancenorm &node, stackvm_op_builder &builder) +{ + auto &input = allocation(node.input()); + auto &scale = allocation(node.scale()); + auto &bias = allocation(node.bias()); + auto &output = allocation(node.output()); + + builder.lea_buffer(input); + builder.lea_buffer(scale); + builder.lea_buffer(bias); + builder.lea_buffer(output); + + builder.stshape(0, input.shape); + + builder.tensor_instance_normalization_(node.output().type(), 0, node.epsilon()); +} diff --git a/src/evaluator/ops/neutral/neutral_ops.cpp b/src/evaluator/ops/neutral/neutral_ops.cpp index 4678983527..6a42b9d585 100644 --- a/src/evaluator/ops/neutral/neutral_ops.cpp +++ b/src/evaluator/ops/neutral/neutral_ops.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -886,6 +887,27 @@ void register_neutral_evaluators() } }); + register_evaluator(op_instancenorm, [](ir::node &node, function_evaluate_context &context) { + auto &rnode = static_cast(node); + + auto input = context.memory_at(rnode.input()); + auto scale = context.memory_at(rnode.scale()); + auto bias = context.memory_at(rnode.bias()); + auto output = context.memory_at(rnode.output()); + + auto output_type = rnode.output().type(); + switch (output_type) + { + case dt_float32: + kernels::instancenorm(input.buffer().as_span().data(), output.buffer().as_span().data(), + scale.buffer().as_span().data(), bias.buffer().as_span().data(), input.shape(), + rnode.epsilon()) + .unwrap_or_throw(); + break; + default: + std::cerr << "unsupported dtype for layernorm: " + std::string(datatype_names(output_type)); + } }); + register_evaluator(op_layernorm, [](ir::node &node, function_evaluate_context &context) { auto &rnode = static_cast(node); diff --git a/src/ir/ops/CMakeLists.txt b/src/ir/ops/CMakeLists.txt index 273ebd8cb1..f8125e4e57 100644 --- a/src/ir/ops/CMakeLists.txt +++ b/src/ir/ops/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.13) +cmake_minimum_required(VERSION 3.13) target_sources(ir PRIVATE call.cpp @@ -51,4 +51,5 @@ target_sources(ir PRIVATE gather_elements.cpp layernorm.cpp compress.cpp - ) + instancenorm.cpp +) diff --git a/src/ir/ops/instancenorm.cpp b/src/ir/ops/instancenorm.cpp new file mode 100644 index 0000000000..a6803fd246 --- /dev/null +++ b/src/ir/ops/instancenorm.cpp @@ -0,0 +1,35 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +using namespace nncase; +using namespace nncase::ir; + +instancenorm::instancenorm(datatype_t input_type, shape_t input_shape, float epsilon) + : epsilon_(epsilon) +{ + add_input("input", input_type, input_shape); + add_input("scale", input_type, shape_t { input_shape[1], 1, 1 }); + add_input("bias", input_type, shape_t { input_shape[1], 1, 1 }); + add_output("output", input_type, input_shape); +} + +bool instancenorm::properties_equal(node &other) const +{ + auto &r = static_cast(other); + return epsilon() == r.epsilon(); +} diff --git a/src/kernels/cpu/optimized/CMakeLists.txt b/src/kernels/cpu/optimized/CMakeLists.txt index e2f79bee71..e1c8569f8f 100644 --- a/src/kernels/cpu/optimized/CMakeLists.txt +++ b/src/kernels/cpu/optimized/CMakeLists.txt @@ -23,5 +23,6 @@ set(SRCS convolution.cpp ${ARCH}/softmax.cpp ${ARCH}/layernorm.cpp ${ARCH}/ternary.cpp - ${ARCH}/reduce.cpp) + ${ARCH}/reduce.cpp + ${ARCH}/instancenorm.cpp) target_sources(kernels PRIVATE ${SRCS}) diff --git a/src/kernels/cpu/optimized/riscv64/instancenorm.cpp b/src/kernels/cpu/optimized/riscv64/instancenorm.cpp new file mode 100644 index 0000000000..cd9c6a97a2 --- /dev/null +++ b/src/kernels/cpu/optimized/riscv64/instancenorm.cpp @@ -0,0 +1,172 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +using namespace nncase; +using namespace nncase::runtime; +using namespace nncase::kernels; +using namespace nncase::kernels::cpu; +using namespace nncase::kernels::cpu::optimized; + +// #if __riscv_vector +// #define RVV_LMUL 8 +// #define _STR(x) #x +// #define STR(x) _STR(x) +// #define _CONNECT(a, b) a##b +// #define CONNECT(a, b) _CONNECT(a, b) +// #define RVVSETVLI2(evl, avl, elen) "vsetvli " STR(evl) "," STR(avl) "," STR(elen) "," STR(CONNECT(m, RVV_LMUL)) ";" + +// static float get_mean(const float *data, int n) +// { +// float ret; +// __asm volatile( +// "mv a0, %[avl];" +// "mv a1, %[input_ptr1];" RVVSETVLI2(t0, a0, e32) "vmv.s.x v0, x0;" +// "XXXXXX%=:;" RVVSETVLI2(t0, a0, e32) "vle32.v v8, (a1);" +// "sub a0,a0, t0;" +// "slli t1, t0, 2;" +// "vfredsum.vs v0,v8,v0;" + +// "add a1, a1, t1;" +// "bnez a0, XXXXXX%=;" +// "vfmv.f.s f0, v0;" +// "fcvt.s.w f1, %[avl];" +// "fdiv.s %[ret], f0, f1;" + +// : [ret] "=f"(ret) +// : [avl] "r"(n), [input_ptr1] "r"(data) +// : "t0", "t1", "a0", "a1", "f0", "f1", "v0", "v8"); +// return ret; +// } + +// static float get_var(const float *data, int n, float mean) +// { +// float ret; +// __asm volatile( + +// "mv a0, %[avl];" +// "mv a1, %[input_ptr1];" RVVSETVLI2(t0, a0, e32) "vmv.s.x v0, x0;" + +// "vle32.v v8, (a1);" +// "sub a0,a0, t0;" +// "slli t1, t0, 2;" +// "vfsub.vf v8, v8, %[mean];" +// "vfmul.vv v8, v8, v8;" +// "add a1, a1, t1;" +// "beqz a0, X1_END%=;" +// "X1_STRAT%=:;" RVVSETVLI2(t0, a0, e32) "vle32.v v16, (a1);" +// "sub a0,a0, t0;" +// "slli t1, t0, 2;" +// "vfsub.vf v16, v16, %[mean];" +// "vfmacc.vv v8, v16, v16;" + +// "add a1, a1, t1;" +// "bnez a0, X1_STRAT%=;" + +// "X1_END%=:" + +// "vfredsum.vs v0,v8,v0;" + +// "vfmv.f.s f0, v0;" +// "fcvt.s.w f1, %[avl];" +// "fdiv.s %[ret], f0, f1;" + +// : [ret] "=f"(ret) +// : [avl] "r"(n), [input_ptr1] "r"(data), [mean] "f"(mean) +// : "t0", "t1", "a0", "a1", "v0", "v8", "v16", "f0", "f1"); +// return ret; +// } + +// static void layer_norm_update1(const float *data, float *out, int len, float mean, float var, float *r1, float e, float *b) +// { +// float r_sqrt = 1.0f / sqrtf(var + e); +// __asm volatile( +// "mv a0, %[avl];" +// "mv a1, %[input_ptr1];" +// "mv a2, %[out];" +// "mv a3, %[scale];" +// "mv a4, %[b];" +// "layer_norm_update1%=:;" RVVSETVLI2(t0, a0, e32) "vle32.v v16, (a1);" +// "vle32.v v8, (a3);" +// "sub a0,a0, t0;" +// "slli t1, t0, 2;" +// "vfsub.vf v16, v16, %[mean];" +// "add a1, a1, t1;" +// "vfmul.vf v16, v16, %[r_sqrt];" + +// "add a3, a3, t1;" +// "vfmul.vv v16, v8, v16;" + +// "vle32.v v8, (a4);" +// "vfadd.vv v16, v16, v8;" +// "add a4, a4, t1;" + +// "vse32.v v16, (a2);" +// "add a2, a2, t1;" +// "bnez a0, layer_norm_update1%=;" + +// : +// : [avl] "r"(len), [input_ptr1] "r"(data), [mean] "f"(mean), [r_sqrt] "f"(r_sqrt), [b] "r"(b), [out] "r"(out), [scale] "r"(r1) +// : "t0", "t1", "a0", "a1", "a2", "v0", "v16", "a3", "a4", "v8"); +// } + +// result layernorm_impl(const float *input, float *output, float *scale, float *bias, const runtime_shape_t &in_shape, int32_t axis, float epsilon) +// { +// if (axis < 0) +// { +// axis = (int)in_shape.size() + axis; +// } +// auto outer_size = 1; +// auto inner_size = 1; +// for (auto i = 0; i < axis; i++) +// outer_size *= in_shape[i]; +// for (auto i = axis; i < static_cast(in_shape.size()); i++) +// inner_size *= in_shape[i]; + +// for (int32_t batch = 0; batch < outer_size; batch++) +// { +// const float *src = input + batch * inner_size; +// float *dest = output + batch * inner_size; + +// float mean = get_mean(src, inner_size); + +// float var_data = get_var(src, inner_size, mean); + +// layer_norm_update1(src, dest, inner_size, mean, var_data, scale, epsilon, bias); +// } +// return ok(); +// } +// #endif + +// template <> +result optimized::instancenorm(const float *input, float *output, float *scale, float *bias, const runtime_shape_t &in_shape, float epsilon) noexcept +{ + // #if __riscv_vector + // return instancenorm_impl(input, output, scale, bias, in_shape, epsilon); + // #else + return cpu::reference::instancenorm(input, output, scale, bias, in_shape, epsilon); + // #endif +} + +template +result optimized::instancenorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, float epsilon) noexcept +{ + return cpu::reference::instancenorm(input, output, scale, bias, in_shape, epsilon); +} diff --git a/src/kernels/cpu/optimized/x86_64/instancenorm.cpp b/src/kernels/cpu/optimized/x86_64/instancenorm.cpp new file mode 100644 index 0000000000..b80b8d7f6b --- /dev/null +++ b/src/kernels/cpu/optimized/x86_64/instancenorm.cpp @@ -0,0 +1,34 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +using namespace nncase; +using namespace nncase::runtime; +using namespace nncase::kernels; +using namespace nncase::kernels::cpu; +using namespace nncase::kernels::cpu::optimized; + +template result optimized::instancenorm(const float *input, float *output, float *scale, float *bias, const runtime_shape_t &in_shape, float epsilon) noexcept; + +template +result optimized::instancenorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, float epsilon) noexcept +{ + return cpu::reference::instancenorm(input, output, scale, bias, in_shape, epsilon); +} \ No newline at end of file diff --git a/src/kernels/cpu/reference/CMakeLists.txt b/src/kernels/cpu/reference/CMakeLists.txt index 2742763ef1..0c14cde7c8 100644 --- a/src/kernels/cpu/reference/CMakeLists.txt +++ b/src/kernels/cpu/reference/CMakeLists.txt @@ -39,5 +39,6 @@ set(SRCS batch_to_space.cpp trilu.cpp tflite_detection_postprocess.cpp unary.cpp - layernorm.cpp) + layernorm.cpp + instancenorm.cpp) target_sources(kernels PRIVATE ${SRCS}) diff --git a/src/kernels/cpu/reference/instancenorm.cpp b/src/kernels/cpu/reference/instancenorm.cpp new file mode 100644 index 0000000000..c671a773b4 --- /dev/null +++ b/src/kernels/cpu/reference/instancenorm.cpp @@ -0,0 +1,73 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +using namespace nncase; +using namespace nncase::runtime; +using namespace nncase::kernels; +using namespace nncase::kernels::cpu; +using namespace nncase::kernels::cpu::reference; + +template result reference::instancenorm(const float *input, float *output, float *scale, float *bias, const runtime_shape_t &in_shape, float epsilon) noexcept; + +template +result reference::instancenorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, float epsilon) noexcept +{ + auto outer_size = in_shape[0]; + auto inner_size = 1; + for (auto i = 2; i < static_cast(in_shape.size()); i++) + inner_size *= in_shape[i]; + std::cout << "inner_size = " << inner_size << std::endl; + for (int32_t batch = 0; batch < outer_size; batch++) + { + for (int32_t c = 0; c < in_shape[1]; c++) + { + auto src = input + batch * inner_size * in_shape[1] + c * inner_size; + auto dest = output + batch * inner_size * in_shape[1] + c * inner_size; + + double mean1 = 0.f; + for (auto i = 0; i < inner_size; i++) + mean1 += src[i] / inner_size; + + std::vector sub(inner_size, 0.f); + for (auto i = 0; i < inner_size; i++) + sub[i] = (src[i] - mean1); + + std::vector pow(inner_size, 0.f); + for (auto i = 0; i < inner_size; i++) + pow[i] = sub[i] * sub[i]; + + double mean2 = 0.f; + for (auto i = 0; i < inner_size; i++) + mean2 += pow[i] / inner_size; + + double add = mean2 + epsilon; + double sqrt = std::sqrt(add); + + for (auto i = 0; i < inner_size; i++) + dest[i] = sub[i] * scale[c] / sqrt + bias[c]; + } + } + for (auto i = 0; i < 10; i++) + { + std::cout << "src[i] = " << input[i] << " --> dest[i] = " << output[i] << std::endl; + } + + return ok(); +} \ No newline at end of file diff --git a/src/kernels/tensor_compute.cpp b/src/kernels/tensor_compute.cpp index adea84c34e..9e29a234e7 100644 --- a/src/kernels/tensor_compute.cpp +++ b/src/kernels/tensor_compute.cpp @@ -521,6 +521,14 @@ result kernels::gather_elements(const TI *input, const TK *indices, TI *ou return cpu::reference::gather_elements(input, indices, output, in_shape, indices_shape, axis); } +template result kernels::instancenorm(const float *input, float *output, float *scale, float *bias, const runtime_shape_t &in_shape, float epsilon) noexcept; + +template +result kernels::instancenorm(const T *input, T *output, T *scale, T *bias, const runtime_shape_t &in_shape, float epsilon) noexcept +{ + return cpu::optimized::instancenorm(input, output, scale, bias, in_shape, epsilon); +} + template result kernels::layernorm(const float *input, float *output, float *scale, float *bias, const runtime_shape_t &in_shape, int32_t axis, float epsilon) noexcept; template diff --git a/src/runtime/stackvm/CMakeLists.txt b/src/runtime/stackvm/CMakeLists.txt index dc6d72f612..7aaeaf3755 100644 --- a/src/runtime/stackvm/CMakeLists.txt +++ b/src/runtime/stackvm/CMakeLists.txt @@ -48,7 +48,8 @@ set(SRCS runtime_module.cpp ops/tensor.trilu.cpp ops/tensor.tflite_detection_postprocess.cpp ops/tensor.unary.cpp - ops/tensor.layernorm.cpp) + ops/tensor.layernorm.cpp + ops/tensor.instancenorm.cpp) if (BUILDING_RUNTIME) add_library(runtime_stackvm OBJECT ${SRCS}) diff --git a/src/runtime/stackvm/op_reader.cpp b/src/runtime/stackvm/op_reader.cpp index cebf0a50e4..e36fcabf11 100644 --- a/src/runtime/stackvm/op_reader.cpp +++ b/src/runtime/stackvm/op_reader.cpp @@ -1,4 +1,4 @@ -/* This file is generated by tools/stackvm_gen/IsaGen at 2023/2/6 下午2:28:53 +08:00. +/* This file is generated by tools/stackvm_gen/IsaGen at 2023/5/9 下午5:18:43 +08:00. * * Copyright 2019-2021 Canaan Inc. * @@ -309,6 +309,13 @@ result op_visitor::next() noexcept #endif return visit(op_reader()(reader_)); } + case tensor_function_t::INSTANCE_NORMALIZATION: + { +#if defined ENABLE_OP_PROFILE + op_profile st("tensor_instance_normalization"); +#endif + return visit(op_reader()(reader_)); + } default: break; } diff --git a/src/runtime/stackvm/ops/tensor.instancenorm.cpp b/src/runtime/stackvm/ops/tensor.instancenorm.cpp new file mode 100644 index 0000000000..ebcaa1ced7 --- /dev/null +++ b/src/runtime/stackvm/ops/tensor.instancenorm.cpp @@ -0,0 +1,42 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../runtime_function.h" +#include +#include +#include + +using namespace nncase; +using namespace nncase::runtime; +using namespace nncase::runtime::stackvm; + +result stackvm_runtime_function::visit(const tensor_instance_normalization_op_t &op) noexcept +{ + try_var(output, pop_addr()); + try_var(bias, pop_addr()); + try_var(scale, pop_addr()); + try_var(input, pop_addr()); + try_var(in_shape, module().shape_reg(op.input_shape)); + + switch (op.datatype) + { + case dt_float32: + return kernels::instancenorm(reinterpret_cast(input), reinterpret_cast(output), + reinterpret_cast(scale), reinterpret_cast(bias), in_shape, op.epsilon); + break; + default: + std::cerr << "unsupported dtype for instancenorm: " + std::string(datatype_names(op.datatype)); + return err(std::errc::invalid_argument); + } +} diff --git a/src/runtime/stackvm/runtime_function.h b/src/runtime/stackvm/runtime_function.h index 7b99b8fa35..0e10304d41 100644 --- a/src/runtime/stackvm/runtime_function.h +++ b/src/runtime/stackvm/runtime_function.h @@ -177,6 +177,7 @@ class stackvm_runtime_function : public runtime_function, private op_visitor result visit(const tensor_tflite_detection_postprocess_op_t &op) noexcept override; result visit(const tensor_unary_op_t &op) noexcept override; result visit(const tensor_layer_normalization_op_t &op) noexcept override; + result visit(const tensor_instance_normalization_op_t &op) noexcept override; private: uintptr_t pc() const noexcept; diff --git a/src/targets/neutral_target.cpp b/src/targets/neutral_target.cpp index 37e0cfdf40..e24a058528 100644 --- a/src/targets/neutral_target.cpp +++ b/src/targets/neutral_target.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -191,6 +192,11 @@ void neutral_target::register_target_independent_passes(const module_type_t &typ using namespace nncase::ir; using namespace nncase::ir::transforms; + { + transform_pass p("fold_instancenorm"); + p.emplace(); + pass_mgr.add_pass(std::move(p)); + } // fix tflite_detection_postprocess shape error in tflite { transform_pass p("fix_shape_tdp"); diff --git a/src/transforms/neutral/CMakeLists.txt b/src/transforms/neutral/CMakeLists.txt index 38c1422975..a6950b78c3 100644 --- a/src/transforms/neutral/CMakeLists.txt +++ b/src/transforms/neutral/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.13) +cmake_minimum_required(VERSION 3.13) target_sources(transforms PRIVATE add_quant_checkpoints.cpp @@ -50,4 +50,5 @@ target_sources(transforms PRIVATE squeeze_dims.cpp fix_output_shape.cpp fold_layernorm.cpp - ) + fold_instancenorm.cpp +) diff --git a/src/transforms/neutral/fold_instancenorm.cpp b/src/transforms/neutral/fold_instancenorm.cpp new file mode 100644 index 0000000000..f7e7e5556a --- /dev/null +++ b/src/transforms/neutral/fold_instancenorm.cpp @@ -0,0 +1,73 @@ +/* Copyright 2019-2021 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace nncase; +using namespace nncase::ir; +using namespace nncase::ir::transforms; + +bool fold_instancenorm_transform::on_try_match(node &node, transform_context &context) +{ + binary *add_bias = nullptr, *mul_scale = nullptr, *div = nullptr, *add_e = nullptr, *sub_mean = nullptr, *sub_mean_cmp = nullptr; + unary *u_sqrt = nullptr, *u_square = nullptr; + reduce *reduce_mean0 = nullptr, *reduce_mean1 = nullptr; + constant *scale = nullptr, *bias = nullptr, *eps = nullptr; + if (((add_bias = node_cast(node)) && (bias = try_get_direct_parent(*add_bias))) && add_bias->binary_op() == binary_add + && (div = try_get_direct_parent(*add_bias)) && div->binary_op() == binary_div + && (mul_scale = try_get_direct_parent(*div)) && (scale = try_get_direct_parent(*mul_scale)) && mul_scale->binary_op() == binary_mul + && (u_sqrt = try_get_direct_parent(*div)) && u_sqrt->unary_op() == unary_sqrt + && (add_e = try_get_direct_parent(*u_sqrt)) && (eps = try_get_direct_parent(*add_e)) && add_e->binary_op() == binary_add + && (reduce_mean0 = try_get_direct_parent(*add_e)) && reduce_mean0->reduce_op() == reduce_mean + && (u_square = try_get_direct_parent(*reduce_mean0)) && u_square->unary_op() == unary_square + && ((sub_mean = try_get_direct_parent(*u_square)) && (sub_mean_cmp = try_get_direct_parent(*mul_scale)) + && (sub_mean == sub_mean_cmp) && sub_mean->binary_op() == binary_sub) + && (reduce_mean1 = try_get_direct_parent(*sub_mean)) && reduce_mean1->reduce_op() == reduce_mean) + { + context.inputs.emplace_back(&reduce_mean1->input()); + context.outputs.emplace_back(&add_bias->output()); + context.matched_nodes.emplace_back(scale); + context.matched_nodes.emplace_back(bias); + context.matched_nodes.emplace_back(eps); + return true; + } + + return false; +} + +void fold_instancenorm_transform::process(transform_context &context) +{ + auto &output = *context.inputs[0]->connection(); + auto inputs = context.outputs[0]->connections(); + + auto scale = node_cast(*context.matched_nodes[0]); + auto bias = node_cast(*context.matched_nodes[1]); + auto eps = node_cast(*context.matched_nodes[2]); + + auto instancenorm_ = context.graph.emplace(output.type(), output.shape(), *reinterpret_cast(eps->data().data())); + instancenorm_->name(scale->name()); + instancenorm_->input().connect(output); + instancenorm_->scale().connect(scale->output()); + instancenorm_->bias().connect(bias->output()); + + for (auto &in : dup(inputs)) + in->connect(instancenorm_->output()); +} \ No newline at end of file diff --git a/tools/stackvm_gen/IsaGen/Instructions.cs b/tools/stackvm_gen/IsaGen/Instructions.cs index 535a06db1e..1ce60f6596 100644 --- a/tools/stackvm_gen/IsaGen/Instructions.cs +++ b/tools/stackvm_gen/IsaGen/Instructions.cs @@ -187,7 +187,8 @@ public enum TensorFunction TFLITE_DETECTION_POSTPROCESS, LAYER_NORMALIZATION, COMPRESS, - GATHER_ELEMENTS + GATHER_ELEMENTS, + INSTANCE_NORMALIZATION } [BitLength(8)] @@ -2438,5 +2439,25 @@ public class Gather_ElementsInstruction : TensorInstruction [Description("Axis")] public int Axis { get; set; } } + + [DisplayName("TENSOR.INSTANCE_NORMALIZATION")] + [Category("Tensor Instructions")] + [Description("INSTANCE_NORMALIZATION")] + public class InstanceNormInstruction : TensorInstruction + { + public override TensorFunction Function => TensorFunction.INSTANCE_NORMALIZATION; + + [DisplayName("datatype")] + [Description("Datatype")] + public DataType DataType { get; set; } + + [DisplayName("input_shape")] + [Description("input_shape")] + public byte input_shape { get; set; } + + [DisplayName("epsilon")] + [Description("epsilon")] + public float epsilon { get; set; } + } } }