diff --git a/tests/lib/cmsis_nn/CMakeLists.txt b/tests/lib/cmsis_nn/CMakeLists.txt new file mode 100644 index 00000000000..141fcdd6dfd --- /dev/null +++ b/tests/lib/cmsis_nn/CMakeLists.txt @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.20.0) +find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE}) +project(cmsis_nn) + +target_sources(app PRIVATE + src/main.c +) diff --git a/tests/lib/cmsis_nn/prj.conf b/tests/lib/cmsis_nn/prj.conf new file mode 100644 index 00000000000..b8dea680868 --- /dev/null +++ b/tests/lib/cmsis_nn/prj.conf @@ -0,0 +1,14 @@ +CONFIG_ZTEST=y +CONFIG_NEWLIB_LIBC=y +CONFIG_CMSIS_DSP=y +CONFIG_CMSIS_NN=y +CONFIG_CMSIS_NN_ACTIVATION=y +CONFIG_CMSIS_NN_BASICMATH=y +CONFIG_CMSIS_NN_CONCATENATION=y +CONFIG_CMSIS_NN_CONVOLUTION=y +CONFIG_CMSIS_NN_FULLYCONNECTED=y +CONFIG_CMSIS_NN_NNSUPPORT=y +CONFIG_CMSIS_NN_POOLING=y +CONFIG_CMSIS_NN_RESHAPE=y +CONFIG_CMSIS_NN_SOFTMAX=y +CONFIG_CMSIS_NN_SVD=y diff --git a/tests/lib/cmsis_nn/src/main.c b/tests/lib/cmsis_nn/src/main.c new file mode 100644 index 00000000000..350b49ba417 --- /dev/null +++ b/tests/lib/cmsis_nn/src/main.c @@ -0,0 +1,668 @@ +/* + * Copyright (c) 2021, Commonwealth Scientific and Industrial Research + * Organisation (CSIRO) ABN 41 687 119 230. + * + * SPDX-License-Identifier: Apache-2.0 + * + * This is not exhaustive functional testing of the CMSIS-NN library. + * + * Individual tests have been pulled from CMSIS/NN/Tests/UnitTest to + * validate the integration of CMSIS-NN and Zephyr + */ + +#include +#include +#include + +#include "arm_nnfunctions.h" + +#define REPEAT_NUM 3 + +#define AVGPOOLING_2_OUT_CH 5 +#define AVGPOOLING_2_IN_CH 5 +#define AVGPOOLING_2_INPUT_W 12 +#define AVGPOOLING_2_INPUT_H 1 +#define AVGPOOLING_2_DST_SIZE 60 +#define AVGPOOLING_2_INPUT_SIZE 60 +#define AVGPOOLING_2_OUT_ACTIVATION_MIN -128 +#define AVGPOOLING_2_OUT_ACTIVATION_MAX 127 +#define AVGPOOLING_2_INPUT_BATCHES 1 +#define AVGPOOLING_2_FILTER_X 3 +#define AVGPOOLING_2_FILTER_Y 1 +#define AVGPOOLING_2_STRIDE_X 1 +#define AVGPOOLING_2_STRIDE_Y 2 +#define AVGPOOLING_2_PAD_X 1 +#define AVGPOOLING_2_PAD_Y 0 +#define AVGPOOLING_2_OUTPUT_W 12 +#define AVGPOOLING_2_OUTPUT_H 1 + +const int8_t avgpooling_2_input[60] = { + 80, 16, -80, -96, 96, -64, -112, -112, 48, 16, -80, -80, 80, 64, -80, + 16, 48, -112, 0, 48, 96, -80, -112, -64, -32, -16, -112, -64, -64, 80, + -96, -112, -16, -80, -80, -112, -64, -48, 16, 64, 32, 48, 16, 64, 16, + -48, -64, -32, -80, 64, -48, -32, -32, -112, 32, 32, -112, -96, -96, 48 +}; + +const int8_t avgpooling_2_output_ref[60] = { + 8, -48, -96, -24, 56, -21, -59, -37, 5, 11, -43, -48, -48, 37, -5, + 11, -37, -48, 0, -21, 32, -48, -96, -43, 32, -5, -101, -64, -69, -11, + -75, -96, -43, -43, 21, -59, -43, -16, 0, 0, -43, -27, -21, 0, 48, + -21, -16, -16, -43, 37, -21, -69, -53, -96, 48, -8, -72, -64, -104, 40 +}; + +void test_avgpool(void) +{ + q7_t output[AVGPOOLING_2_DST_SIZE] = { 0 }; + + cmsis_nn_context ctx; + cmsis_nn_pool_params pool_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims output_dims; + + input_dims.n = AVGPOOLING_2_INPUT_BATCHES; + input_dims.w = AVGPOOLING_2_INPUT_W; + input_dims.h = AVGPOOLING_2_INPUT_H; + input_dims.c = AVGPOOLING_2_IN_CH; + filter_dims.w = AVGPOOLING_2_FILTER_X; + filter_dims.h = AVGPOOLING_2_FILTER_Y; + output_dims.w = AVGPOOLING_2_OUTPUT_W; + output_dims.h = AVGPOOLING_2_OUTPUT_H; + output_dims.c = AVGPOOLING_2_OUT_CH; + + pool_params.padding.w = AVGPOOLING_2_PAD_X; + pool_params.padding.h = AVGPOOLING_2_PAD_Y; + pool_params.stride.w = AVGPOOLING_2_STRIDE_X; + pool_params.stride.h = AVGPOOLING_2_STRIDE_Y; + + pool_params.activation.min = AVGPOOLING_2_OUT_ACTIVATION_MIN; + pool_params.activation.max = AVGPOOLING_2_OUT_ACTIVATION_MAX; + + ctx.size = arm_avgpool_s8_get_buffer_size(AVGPOOLING_2_OUTPUT_W, AVGPOOLING_2_IN_CH); + ctx.buf = malloc(ctx.size); + + arm_status result = arm_avgpool_s8(&ctx, &pool_params, &input_dims, avgpooling_2_input, + &filter_dims, &output_dims, output); + + free(ctx.buf); + + zassert_equal(ARM_MATH_SUCCESS, result, ""); + zassert_mem_equal(avgpooling_2_output_ref, output, sizeof(output), ""); +} + +#define CONV_4_OUT_CH 3 +#define CONV_4_IN_CH 3 +#define CONV_4_INPUT_W 5 +#define CONV_4_INPUT_H 5 +#define CONV_4_DST_SIZE 36 +#define CONV_4_INPUT_SIZE 75 +#define CONV_4_OUT_ACTIVATION_MIN -128 +#define CONV_4_OUT_ACTIVATION_MAX 127 +#define CONV_4_INPUT_BATCHES 3 +#define CONV_4_INPUT_OFFSET 0 +#define CONV_4_OUTPUT_OFFSET 0 +#define CONV_4_FILTER_X 2 +#define CONV_4_FILTER_Y 3 +#define CONV_4_STRIDE_X 2 +#define CONV_4_STRIDE_Y 2 +#define CONV_4_PAD_X 0 +#define CONV_4_PAD_Y 0 +#define CONV_4_OUTPUT_W 2 +#define CONV_4_OUTPUT_H 2 + +const int32_t conv_4_biases[3] = { 2699, -5398, -2699 }; + +const q7_t conv_4_weights[54] = { + -127, 64, 64, -64, 0, 0, 64, -64, 0, -64, 64, 64, 64, -127, + 64, 0, -127, -64, 64, 64, -64, -64, -64, -64, -64, 0, 0, 64, + 64, 64, 0, 0, 0, -127, -64, -127, -127, 0, 0, 0, 0, -127, + -127, -127, -127, 64, -127, 64, 64, 0, 0, -64, -127, 64 +}; + +const q7_t conv_4_input[225] = { + 42, -85, -85, 0, 42, 42, -42, -42, -42, -85, 42, 42, -42, -42, -85, + 0, -85, 0, 42, -42, 0, -42, 42, -42, -42, 42, -42, 42, -85, -42, + -85, -42, 0, -42, -42, -42, 42, -85, -42, -42, -42, 0, -42, 0, 0, + 0, 42, -42, 42, 0, -42, 0, 0, -85, 0, 42, 42, 0, 42, 42, -85, 42, + 42, -85, -42, 0, -85, 42, -42, -85, -42, -85, 42, 42, -85, -85, 42, + 42, 42, -85, 42, -85, -42, -42, 0, -42, -85, -85, 42, -85, 0, -85, + 42, 42, 0, 42, 42, 42, 42, -85, 42, -85, -42, 0, 42, 0, 0, -85, -42, + 0, -85, 0, 42, -85, -42, 0, -42, 0, 42, -42, -42, -85, 0, -85, -42, + -85, 0, 42, -85, -85, -85, -85, 0, -85, 42, 42, 0, -42, -85, -85, 0, + -42, 0, 0, -85, -85, -42, 42, -85, -42, -42, 42, -85, 0, 42, 0, -85, + 0, 0, 42, 42, -85, -85, -85, 0, 42, 0, 0, 42, -85, -85, 42, -85, -42, + -42, 0, -85, -85, 42, -85, 0, -85, -42, -85, 42, 0, 42, 42, 0, -85, + 0, 0, 0, 0, 0, -42, -85, 42, 0, -85, -42, 0, -42, 42, 42, -85, 0, + 42, 42, 0, -42, -85, -42, -85, 0, 42, -85, -85, -42, 42, -42, -42, + -42, -42, 42 +}; + +const int32_t conv_4_output_mult[3] = { 1629660588, 1629660588, 1629660588 }; + +const int32_t conv_4_output_shift[3] = { -11, -11, -11 }; + +const q7_t conv_4_output_ref[36] = { + -2, 2, 2, 8, 0, 1, 1, 3, 7, -2, 11, 0, 8, 4, 4, 1, -1, -5, + 4, 5, 14, 2, 5, 7, -1, -2, 2, 5, -4, 11, -1, -2, 8, 4, 2, 0 +}; + +void test_convolve(void) +{ + q7_t output[CONV_4_DST_SIZE] = { 0 }; + + cmsis_nn_context ctx; + cmsis_nn_conv_params conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + const q31_t *bias_data = conv_4_biases; + const q7_t *kernel_data = conv_4_weights; + const q7_t *input_data = conv_4_input; + + input_dims.n = CONV_4_INPUT_BATCHES; + input_dims.w = CONV_4_INPUT_W; + input_dims.h = CONV_4_INPUT_H; + input_dims.c = CONV_4_IN_CH; + filter_dims.w = CONV_4_FILTER_X; + filter_dims.h = CONV_4_FILTER_Y; + output_dims.w = CONV_4_OUTPUT_W; + output_dims.h = CONV_4_OUTPUT_H; + output_dims.c = CONV_4_OUT_CH; + + conv_params.padding.w = CONV_4_PAD_X; + conv_params.padding.h = CONV_4_PAD_Y; + conv_params.stride.w = CONV_4_STRIDE_X; + conv_params.stride.h = CONV_4_STRIDE_Y; + + conv_params.input_offset = CONV_4_INPUT_OFFSET; + conv_params.output_offset = CONV_4_OUTPUT_OFFSET; + conv_params.activation.min = CONV_4_OUT_ACTIVATION_MIN; + conv_params.activation.max = CONV_4_OUT_ACTIVATION_MAX; + quant_params.multiplier = (int32_t *)conv_4_output_mult; + quant_params.shift = (int32_t *)conv_4_output_shift; + + int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims); + + ctx.buf = malloc(buf_size); + ctx.size = 0; + + arm_status result = arm_convolve_s8(&ctx, + &conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + free(ctx.buf); + zassert_equal(ARM_MATH_SUCCESS, result, ""); + zassert_mem_equal(conv_4_output_ref, output, sizeof(output), ""); + + buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims, + &filter_dims, &output_dims); + ctx.buf = malloc(buf_size); + ctx.size = 0; + + result = arm_convolve_wrapper_s8(&ctx, + &conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + free(ctx.buf); + zassert_equal(ARM_MATH_SUCCESS, result, ""); + zassert_mem_equal(conv_4_output_ref, output, sizeof(output), ""); +} + +#define STRIDE2PAD1_OUT_CH 1 +#define STRIDE2PAD1_IN_CH 1 +#define STRIDE2PAD1_INPUT_W 7 +#define STRIDE2PAD1_INPUT_H 7 +#define STRIDE2PAD1_DST_SIZE 16 +#define STRIDE2PAD1_INPUT_SIZE 49 +#define STRIDE2PAD1_OUT_ACTIVATION_MIN -128 +#define STRIDE2PAD1_OUT_ACTIVATION_MAX 127 +#define STRIDE2PAD1_INPUT_BATCHES 1 +#define STRIDE2PAD1_INPUT_OFFSET 128 +#define STRIDE2PAD1_OUTPUT_OFFSET 0 +#define STRIDE2PAD1_FILTER_X 3 +#define STRIDE2PAD1_FILTER_Y 3 +#define STRIDE2PAD1_STRIDE_X 2 +#define STRIDE2PAD1_STRIDE_Y 2 +#define STRIDE2PAD1_PAD_X 1 +#define STRIDE2PAD1_PAD_Y 1 +#define STRIDE2PAD1_OUTPUT_W 4 +#define STRIDE2PAD1_OUTPUT_H 4 + +const int32_t stride2pad1_biases[1] = { 4318 }; + +const q7_t stride2pad1_weights[9] = { 42, 127, 127, 127, 42, 127, 85, 42, 85 }; + +const q7_t stride2pad1_input[49] = { + -26, -77, -26, -26, 25, -77, -77, -26, 25, -26, -77, -26, -26, -77, 25, -77, -26, + -26, -77, -26, -77, -26, -77, -26, 25, -77, -26, -26, -26, 25, -26, -77, -77, -77, + -26, 25, 25, -26, -77, -26, -26, -26, -26, -26, -77, -26, 25, -77, -26 +}; + +const int32_t stride2pad1_output_mult[1] = { 2037075735 }; + +const int32_t stride2pad1_output_shift[1] = { -11 }; + +const q7_t stride2pad1_output_ref[16] = { + 15, 23, 22, 11, 27, 35, 39, 20, 31, 42, 29, 21, 28, 27, 27, 15 +}; + +void test_depthwise_convolve(void) +{ + q7_t output[STRIDE2PAD1_DST_SIZE] = { 0 }; + + cmsis_nn_context ctx; + cmsis_nn_dw_conv_params dw_conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + const q31_t *bias_data = stride2pad1_biases; + const q7_t *kernel_data = stride2pad1_weights; + const q7_t *input_data = stride2pad1_input; + + input_dims.n = STRIDE2PAD1_INPUT_BATCHES; + input_dims.w = STRIDE2PAD1_INPUT_W; + input_dims.h = STRIDE2PAD1_INPUT_H; + input_dims.c = STRIDE2PAD1_IN_CH; + filter_dims.w = STRIDE2PAD1_FILTER_X; + filter_dims.h = STRIDE2PAD1_FILTER_Y; + output_dims.w = STRIDE2PAD1_OUTPUT_W; + output_dims.h = STRIDE2PAD1_OUTPUT_H; + output_dims.c = STRIDE2PAD1_OUT_CH; + + dw_conv_params.padding.w = STRIDE2PAD1_PAD_X; + dw_conv_params.padding.h = STRIDE2PAD1_PAD_Y; + dw_conv_params.stride.w = STRIDE2PAD1_STRIDE_X; + dw_conv_params.stride.h = STRIDE2PAD1_STRIDE_Y; + dw_conv_params.ch_mult = 1; + + dw_conv_params.input_offset = STRIDE2PAD1_INPUT_OFFSET; + dw_conv_params.output_offset = STRIDE2PAD1_OUTPUT_OFFSET; + dw_conv_params.activation.min = STRIDE2PAD1_OUT_ACTIVATION_MIN; + dw_conv_params.activation.max = STRIDE2PAD1_OUT_ACTIVATION_MAX; + quant_params.multiplier = (int32_t *)stride2pad1_output_mult; + quant_params.shift = (int32_t *)stride2pad1_output_shift; + + ctx.buf = NULL; + ctx.size = 0; + + arm_status result = arm_depthwise_conv_s8(&ctx, + &dw_conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + free(ctx.buf); + zassert_equal(ARM_MATH_SUCCESS, result, ""); + zassert_mem_equal(stride2pad1_output_ref, output, sizeof(output), ""); +} + +#define FULLY_CONNECTED_MVE_0_OUT_CH 9 +#define FULLY_CONNECTED_MVE_0_IN_CH 16 +#define FULLY_CONNECTED_MVE_0_INPUT_W 1 +#define FULLY_CONNECTED_MVE_0_INPUT_H 1 +#define FULLY_CONNECTED_MVE_0_DST_SIZE 9 +#define FULLY_CONNECTED_MVE_0_INPUT_SIZE 16 +#define FULLY_CONNECTED_MVE_0_OUT_ACTIVATION_MIN -128 +#define FULLY_CONNECTED_MVE_0_OUT_ACTIVATION_MAX 127 +#define FULLY_CONNECTED_MVE_0_INPUT_BATCHES 1 +#define FULLY_CONNECTED_MVE_0_INPUT_OFFSET 3 +#define FULLY_CONNECTED_MVE_0_OUTPUT_OFFSET -2 +#define FULLY_CONNECTED_MVE_0_OUTPUT_MULTIPLIER 1073741824 +#define FULLY_CONNECTED_MVE_0_OUTPUT_SHIFT 1 +#define FULLY_CONNECTED_MVE_0_ACCUMULATION_DEPTH 16 + +const int32_t fully_connected_mve_0_biases[9] = { -1, 0, 0, 2, -1, -1, 1, -3, -4 }; + +const q7_t fully_connected_mve_0_input[16] = { + -5, -3, -5, -3, -3, -6, -1, -5, -4, -3, -2, 0, -2, -1, -2, -6 +}; + +const q7_t fully_connected_mve_0_output_ref[9] = { 0, -29, 33, -5, 28, -5, 19, -7, 16 }; + +const q7_t fully_connected_mve_0_weights[144] = { + 1, 0, -1, -3, -4, -3, 3, -2, 3, 3, 1, 2, -2, -4, -4, 2, 3, 2, 3, -1, -2, 2, + -4, 0, 1, -3, -3, -3, 1, 1, -3, -4, -3, 3, 2, 3, 1, -4, 3, -3, -1, 3, 1, -2, + 2, 3, -4, -3, 2, -4, 0, 3, 0, -2, 0, -1, -2, 0, 3, -3, -1, -2, -3, -1, -4, + 1, 2, -1, -4, -4, 1, -3, -3, 2, 3, 1, -3, -2, -4, -3, -2, 2, 1, 1, 1, -2, 0, + 3, -3, -2, -1, -4, -2, 2, 1, -1, -4, 2, 2, 3, 3, 2, 0, -3, 2, 3, 0, 3, 3, -1, + -4, -4, 0, 1, -4, -1, -3, 3, 2, 3, 2, -3, -1, -3, 0, 3, -2, -3, -2, 3, -4, 3, + -1, -4, 2, 2, 3, 1, -1, 1, 0, -4, -2, -3 +}; + +void test_fully_connected(void) +{ + q7_t output[FULLY_CONNECTED_MVE_0_DST_SIZE] = { 0 }; + + cmsis_nn_context ctx; + cmsis_nn_fc_params fc_params; + cmsis_nn_per_tensor_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + const q31_t *bias_data = fully_connected_mve_0_biases; + const q7_t *kernel_data = fully_connected_mve_0_weights; + const q7_t *input_data = fully_connected_mve_0_input; + + input_dims.n = FULLY_CONNECTED_MVE_0_INPUT_BATCHES; + input_dims.w = FULLY_CONNECTED_MVE_0_INPUT_W; + input_dims.h = FULLY_CONNECTED_MVE_0_INPUT_H; + input_dims.c = FULLY_CONNECTED_MVE_0_IN_CH; + filter_dims.n = FULLY_CONNECTED_MVE_0_ACCUMULATION_DEPTH; + filter_dims.c = FULLY_CONNECTED_MVE_0_OUT_CH; + output_dims.n = FULLY_CONNECTED_MVE_0_INPUT_BATCHES; + output_dims.c = FULLY_CONNECTED_MVE_0_OUT_CH; + + fc_params.input_offset = FULLY_CONNECTED_MVE_0_INPUT_OFFSET; + fc_params.filter_offset = 0; + fc_params.output_offset = FULLY_CONNECTED_MVE_0_OUTPUT_OFFSET; + fc_params.activation.min = FULLY_CONNECTED_MVE_0_OUT_ACTIVATION_MIN; + fc_params.activation.max = FULLY_CONNECTED_MVE_0_OUT_ACTIVATION_MAX; + + quant_params.multiplier = FULLY_CONNECTED_MVE_0_OUTPUT_MULTIPLIER; + quant_params.shift = FULLY_CONNECTED_MVE_0_OUTPUT_SHIFT; + + int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims); + + ctx.buf = malloc(buf_size); + ctx.size = buf_size; + arm_status result = arm_fully_connected_s8(&ctx, + &fc_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + free(ctx.buf); + zassert_equal(ARM_MATH_SUCCESS, result, ""); + zassert_mem_equal(fully_connected_mve_0_output_ref, output, sizeof(output), ""); +} + +#define MAXPOOLING_2_OUT_CH 5 +#define MAXPOOLING_2_IN_CH 5 +#define MAXPOOLING_2_INPUT_W 12 +#define MAXPOOLING_2_INPUT_H 1 +#define MAXPOOLING_2_DST_SIZE 60 +#define MAXPOOLING_2_INPUT_SIZE 60 +#define MAXPOOLING_2_OUT_ACTIVATION_MIN -128 +#define MAXPOOLING_2_OUT_ACTIVATION_MAX 127 +#define MAXPOOLING_2_INPUT_BATCHES 1 +#define MAXPOOLING_2_FILTER_X 3 +#define MAXPOOLING_2_FILTER_Y 1 +#define MAXPOOLING_2_STRIDE_X 1 +#define MAXPOOLING_2_STRIDE_Y 2 +#define MAXPOOLING_2_PAD_X 1 +#define MAXPOOLING_2_PAD_Y 0 +#define MAXPOOLING_2_OUTPUT_W 12 +#define MAXPOOLING_2_OUTPUT_H 1 + +const int8_t maxpooling_2_input[60] = { + -16, 32, -16, -48, -16, 16, 64, 0, -112, 80, -64, 48, -64, 80, -16, + -80, -96, 48, 32, 96, 64, 80, 16, -96, 32, -112, -16, -80, -48, 32, + -64, -32, -16, 80, 48, -80, 96, -96, 64, -64, -112, 32, 96, -16, -16, + 96, 0, -16, -16, -32, 64, -96, 96, 96, -48, -64, -16, 32, 16, 64 +}; + +const int8_t maxpooling_2_output_ref[60] = { + 16, 64, 0, -48, 80, 16, 64, 0, 80, 80, 16, 64, 48, 80, 96, + 64, 80, 48, 80, 96, 64, 80, 48, 32, 96, 64, 80, 16, 80, 48, + -64, 96, -16, 80, 48, -64, 96, 96, 80, 48, 96, 96, 96, 64, -16, + 96, 32, 96, 96, -16, 96, 0, 96, 96, 64, 64, -16, 96, 96, 64 +}; + +void test_max_pool(void) +{ + q7_t output[MAXPOOLING_2_DST_SIZE] = { 0 }; + + cmsis_nn_context ctx; + cmsis_nn_pool_params pool_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims output_dims; + + const q7_t *input_data = maxpooling_2_input; + + input_dims.n = MAXPOOLING_2_INPUT_BATCHES; + input_dims.w = MAXPOOLING_2_INPUT_W; + input_dims.h = MAXPOOLING_2_INPUT_H; + input_dims.c = MAXPOOLING_2_IN_CH; + filter_dims.w = MAXPOOLING_2_FILTER_X; + filter_dims.h = MAXPOOLING_2_FILTER_Y; + output_dims.w = MAXPOOLING_2_OUTPUT_W; + output_dims.h = MAXPOOLING_2_OUTPUT_H; + output_dims.c = MAXPOOLING_2_OUT_CH; + + pool_params.padding.w = MAXPOOLING_2_PAD_X; + pool_params.padding.h = MAXPOOLING_2_PAD_Y; + pool_params.stride.w = MAXPOOLING_2_STRIDE_X; + pool_params.stride.h = MAXPOOLING_2_STRIDE_Y; + + pool_params.activation.min = MAXPOOLING_2_OUT_ACTIVATION_MIN; + pool_params.activation.max = MAXPOOLING_2_OUT_ACTIVATION_MAX; + + for (int i = 0; i < REPEAT_NUM; i++) { + arm_status result = arm_max_pool_s8(&ctx, &pool_params, &input_dims, input_data, + &filter_dims, &output_dims, output); + + zassert_equal(ARM_MATH_SUCCESS, result, ""); + zassert_mem_equal(maxpooling_2_output_ref, output, sizeof(output), ""); + } +} + +#define SOFTMAX_NUM_ROWS 1 +#define SOFTMAX_ROW_SIZE 5 +#define SOFTMAX_INPUT_MULT 1077952576 +#define SOFTMAX_INPUT_LEFT_SHIFT 23 +#define SOFTMAX_DIFF_MIN -248 +#define SOFTMAX_DST_SIZE 5 + +const q7_t softmax_input[5] = { -80, -48, 16, 0, -96 }; + +const q7_t softmax_output_ref[5] = { -128, -125, 56, -60, -128 }; + +void test_softmax(void) +{ + const int32_t num_rows = SOFTMAX_NUM_ROWS; + const int32_t row_size = SOFTMAX_ROW_SIZE; + const int32_t mult = SOFTMAX_INPUT_MULT; + const int32_t shift = SOFTMAX_INPUT_LEFT_SHIFT; + const int32_t diff_min = SOFTMAX_DIFF_MIN; + const q7_t *input_data = softmax_input; + int8_t output[SOFTMAX_DST_SIZE]; + + for (int i = 0; i < REPEAT_NUM; i++) { + arm_softmax_s8(input_data, num_rows, row_size, mult, shift, diff_min, output); + zassert_mem_equal(softmax_output_ref, output, sizeof(output), ""); + } +} + +#define SVDF_2_INPUT_OFFSET 0 +#define SVDF_2_OUTPUT_OFFSET 0 +#define SVDF_2_MULTIPLIER_IN 1347440720 +#define SVDF_2_MULTIPLIER_OUT 1073741824 +#define SVDF_2_SHIFT_1 -4 +#define SVDF_2_SHIFT_2 1 +#define SVDF_2_IN_ACTIVATION_MIN -32767 +#define SVDF_2_IN_ACTIVATION_MAX 32767 +#define SVDF_2_RANK 2 +#define SVDF_2_FEATURE_BATCHES 10 +#define SVDF_2_TIME_BATCHES 2 +#define SVDF_2_INPUT_SIZE 7 +#define SVDF_2_DST_SIZE 15 +#define SVDF_2_OUT_ACTIVATION_MIN -128 +#define SVDF_2_OUT_ACTIVATION_MAX 127 +#define SVDF_2_INPUT_BATCHES 3 + +const int32_t svdf_2_biases[5] = { 0, 0, 0, 0, 0 }; + + +const q15_t svdf_2_state[60] = { + 3, 1, -1, 2, 1, 4, 3, 2, 2, 1, 4, -1, -3, 3, 4, 3, 1, -1, 3, 2, + 0, -2, -1, -2, -1, -3, 0, -3, 4, 3, -1, 4, -4, -1, 2, 3, -4, -3, -2, 1, + 1, 4, 3, -2, -3, -2, 4, 0, -2, 1, -2, -3, -4, 2, 0, -2, -3, 0, -1, 0 +}; + +const q7_t svdf_2_weights_feature[70] = { + -4, 0, 2, -2, 1, 1, -1, 0, -1, 2, -1, 1, 1, 3, -3, -2, -2, 3, + 3, -3, 1, 2, 1, -4, 0, 2, -2, -1, 3, 1, 0, 0, 1, -2, 0, 2, + 1, 0, -1, 2, 3, -1, 3, -1, -1, -2, -4, -3, 1, 1, 2, -3, 3, -3, + 0, 0, 2, 0, 2, -1, -1, -3, -3, 1, 2, 2, 3, -2, 3, 1 +}; + +const q15_t svdf_2_weights_time[20] = { + -4, 3, 0, -3, -2, 0, 3, 0, -3, -2, 2, 1, -4, 3, 1, 0, 3, -2, 1, 1 +}; + +const q7_t svdf_2_input_sequence[42] = { + -51, 0, -26, 76, -102, -102, -76, 0, -51, -26, -51, -26, 51, 0, + 51, -102, 51, -102, -76, 51, 76, -26, 26, -51, -76, -26, -102, -76, + -26, 26, 0, 51, 76, 0, 0, 26, -26, 76, -26, 76, 76, 26 +}; + +const q7_t svdf_2_output_ref[15] = { + 80, -19, -61, 17, -17, -3, 6, 30, -84, -4, -24, -11, 35, -128, 19 +}; + +static bool check_null_bias(const int32_t *bias, int32_t size) +{ + bool null_bias = true; + + for (int i = 0; i < size; i++) { + if (bias[i] != 0) { + null_bias = false; + break; + } + } + return null_bias; +} + +void test_svdf(void) +{ + cmsis_nn_context input_ctx; + cmsis_nn_context output_ctx; + cmsis_nn_svdf_params svdf_2_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims weights_feature_dims; + cmsis_nn_dims weights_time_dims; + cmsis_nn_dims state_dims; + cmsis_nn_dims output_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_per_tensor_quant_params input_quant_params; + cmsis_nn_per_tensor_quant_params output_quant_params; + int8_t output_data[SVDF_2_DST_SIZE]; + + const q7_t *weights_feature_data = svdf_2_weights_feature; + const q15_t *weights_time_data = svdf_2_weights_time; + + input_dims.n = SVDF_2_INPUT_BATCHES; + input_dims.h = SVDF_2_INPUT_SIZE; + weights_feature_dims.n = SVDF_2_FEATURE_BATCHES; + weights_time_dims.h = SVDF_2_TIME_BATCHES; + + input_quant_params.multiplier = SVDF_2_MULTIPLIER_IN; + input_quant_params.shift = SVDF_2_SHIFT_1; + output_quant_params.multiplier = SVDF_2_MULTIPLIER_OUT; + output_quant_params.shift = SVDF_2_SHIFT_2; + + svdf_2_params.input_activation.min = SVDF_2_IN_ACTIVATION_MIN; + svdf_2_params.input_activation.max = SVDF_2_IN_ACTIVATION_MAX; + svdf_2_params.output_activation.min = SVDF_2_OUT_ACTIVATION_MIN; + svdf_2_params.output_activation.max = SVDF_2_OUT_ACTIVATION_MAX; + svdf_2_params.input_offset = SVDF_2_INPUT_OFFSET; + svdf_2_params.output_offset = SVDF_2_OUTPUT_OFFSET; + svdf_2_params.rank = SVDF_2_RANK; + + const int input_round_size = SVDF_2_INPUT_BATCHES * SVDF_2_INPUT_SIZE; + const int number_inputs = sizeof(svdf_2_input_sequence) / input_round_size; + const int32_t number_units = SVDF_2_FEATURE_BATCHES / SVDF_2_RANK; + const int scratch_size = SVDF_2_INPUT_BATCHES * SVDF_2_FEATURE_BATCHES * sizeof(int32_t); + const int scratch_size_out = SVDF_2_INPUT_BATCHES * number_units * sizeof(int32_t); + + input_ctx.buf = malloc(scratch_size); + output_ctx.buf = malloc(scratch_size_out); + + int8_t *input_data = malloc(input_round_size); + q15_t *state_data = malloc(sizeof(svdf_2_state)); + const bool null_bias = check_null_bias(svdf_2_biases, + SVDF_2_DST_SIZE / SVDF_2_INPUT_BATCHES); + + for (int i = 0; i < REPEAT_NUM; i++) { + memcpy(state_data, svdf_2_state, sizeof(svdf_2_state)); + for (int j = 0; j < number_inputs; j++) { + memcpy(input_data, svdf_2_input_sequence + j * input_round_size, + input_round_size); + arm_status result = arm_svdf_s8(&input_ctx, + &output_ctx, + &svdf_2_params, + &input_quant_params, + &output_quant_params, + &input_dims, + input_data, + &state_dims, + state_data, + &weights_feature_dims, + weights_feature_data, + &weights_time_dims, + weights_time_data, + &bias_dims, + null_bias == true ? NULL : svdf_2_biases, + &output_dims, + output_data); + zassert_equal(ARM_MATH_SUCCESS, result, ""); + } + + zassert_mem_equal(svdf_2_output_ref, output_data, sizeof(output_data), ""); + } + free(state_data); + free(input_data); + free(input_ctx.buf); + free(output_ctx.buf); +} + +void test_main(void) +{ + ztest_test_suite(test_cmsis_nn, + ztest_unit_test(test_avgpool), + ztest_unit_test(test_convolve), + ztest_unit_test(test_depthwise_convolve), + ztest_unit_test(test_fully_connected), + ztest_unit_test(test_max_pool), + ztest_unit_test(test_softmax), + ztest_unit_test(test_svdf) + ); + ztest_run_test_suite(test_cmsis_nn); +} diff --git a/tests/lib/cmsis_nn/testcase.yaml b/tests/lib/cmsis_nn/testcase.yaml new file mode 100644 index 00000000000..5b5cdeb88ed --- /dev/null +++ b/tests/lib/cmsis_nn/testcase.yaml @@ -0,0 +1,14 @@ +common: + # TODO: Remove when QEMU 6.2 is released with MVE emulation (see #37694). + platform_exclude: mps3_an547 + +tests: + libraries.cmsis_nn: + filter: CPU_CORTEX_M and TOOLCHAIN_HAS_NEWLIB == 1 + integration_platforms: + - frdm_k64f + - sam_e70_xplained + - mps2_an521 + tags: cmsis_nn + min_flash: 64 + min_ram: 32