Initial commit
This commit is contained in:
		| @@ -0,0 +1,20 @@ | ||||
|  | ||||
| /* | ||||
|  * Auto generated Run-Time-Environment Component Configuration File | ||||
|  *      *** Do not modify ! *** | ||||
|  * | ||||
|  * Project: 'arm_nnexamples_cifar10'  | ||||
|  * Target:  'ARMCM0'  | ||||
|  */ | ||||
|  | ||||
| #ifndef RTE_COMPONENTS_H | ||||
| #define RTE_COMPONENTS_H | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Define the Device Header File:  | ||||
|  */ | ||||
| #define CMSIS_device_header "ARMCM0.h" | ||||
|  | ||||
|  | ||||
| #endif /* RTE_COMPONENTS_H */ | ||||
| @@ -0,0 +1,26 @@ | ||||
|  | ||||
| /* | ||||
|  * Auto generated Run-Time-Environment Component Configuration File | ||||
|  *      *** Do not modify ! *** | ||||
|  * | ||||
|  * Project: 'arm_nnexamples_nn_test'  | ||||
|  * Target:  'ARMCM3'  | ||||
|  */ | ||||
|  | ||||
| #ifndef RTE_COMPONENTS_H | ||||
| #define RTE_COMPONENTS_H | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Define the Device Header File:  | ||||
|  */ | ||||
| #define CMSIS_device_header "ARMCM3.h" | ||||
|  | ||||
| #define RTE_Compiler_IO_STDERR          /* Compiler I/O: STDERR */ | ||||
|           #define RTE_Compiler_IO_STDERR_ITM      /* Compiler I/O: STDERR ITM */ | ||||
| #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */ | ||||
|           #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */ | ||||
| #define RTE_Compiler_IO_TTY             /* Compiler I/O: TTY */ | ||||
|           #define RTE_Compiler_IO_TTY_ITM         /* Compiler I/O: TTY ITM */ | ||||
|  | ||||
| #endif /* RTE_COMPONENTS_H */ | ||||
| @@ -0,0 +1,26 @@ | ||||
|  | ||||
| /* | ||||
|  * Auto generated Run-Time-Environment Component Configuration File | ||||
|  *      *** Do not modify ! *** | ||||
|  * | ||||
|  * Project: 'arm_nnexamples_nn_test'  | ||||
|  * Target:  'ARMCM4_FP'  | ||||
|  */ | ||||
|  | ||||
| #ifndef RTE_COMPONENTS_H | ||||
| #define RTE_COMPONENTS_H | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Define the Device Header File:  | ||||
|  */ | ||||
| #define CMSIS_device_header "ARMCM4_FP.h" | ||||
|  | ||||
| #define RTE_Compiler_IO_STDERR          /* Compiler I/O: STDERR */ | ||||
|           #define RTE_Compiler_IO_STDERR_ITM      /* Compiler I/O: STDERR ITM */ | ||||
| #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */ | ||||
|           #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */ | ||||
| #define RTE_Compiler_IO_TTY             /* Compiler I/O: TTY */ | ||||
|           #define RTE_Compiler_IO_TTY_ITM         /* Compiler I/O: TTY ITM */ | ||||
|  | ||||
| #endif /* RTE_COMPONENTS_H */ | ||||
| @@ -0,0 +1,26 @@ | ||||
|  | ||||
| /* | ||||
|  * Auto generated Run-Time-Environment Component Configuration File | ||||
|  *      *** Do not modify ! *** | ||||
|  * | ||||
|  * Project: 'arm_nnexamples_nn_test'  | ||||
|  * Target:  'ARMCM7_SP'  | ||||
|  */ | ||||
|  | ||||
| #ifndef RTE_COMPONENTS_H | ||||
| #define RTE_COMPONENTS_H | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Define the Device Header File:  | ||||
|  */ | ||||
| #define CMSIS_device_header "ARMCM7_SP.h" | ||||
|  | ||||
| #define RTE_Compiler_IO_STDERR          /* Compiler I/O: STDERR */ | ||||
|           #define RTE_Compiler_IO_STDERR_ITM      /* Compiler I/O: STDERR ITM */ | ||||
| #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */ | ||||
|           #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */ | ||||
| #define RTE_Compiler_IO_TTY             /* Compiler I/O: TTY */ | ||||
|           #define RTE_Compiler_IO_TTY_ITM         /* Compiler I/O: TTY ITM */ | ||||
|  | ||||
| #endif /* RTE_COMPONENTS_H */ | ||||
| @@ -0,0 +1,71 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_convolve_HWC_q15_ref(const q15_t * Im_in,  // input image | ||||
|                               const uint16_t dim_im_in, // input image dimention | ||||
|                               const uint16_t ch_im_in,  // number of input image channels | ||||
|                               const q15_t * wt, // kernel weights  | ||||
|                               const uint16_t ch_im_out, // number of filters, i.e., output image channels | ||||
|                               const uint16_t dim_kernel,    // filter kernel size | ||||
|                               const uint16_t padding,   // padding sizes | ||||
|                               const uint16_t stride,    // stride | ||||
|                               const q15_t * bias,   // bias | ||||
|                               const uint16_t bias_shift, const uint16_t out_shift, q15_t * Im_out,  // output image | ||||
|                               const uint16_t dim_im_out,    // output image dimension | ||||
|                               q15_t * bufferA,  //buffer space for input | ||||
|                               q7_t * bufferB    //buffer space for output | ||||
|     ) | ||||
| { | ||||
|     int       i, j, k, l, m, n; | ||||
|     int       conv_out; | ||||
|     int       in_row, in_col; | ||||
|  | ||||
|     for (i = 0; i < ch_im_out; i++) | ||||
|     { | ||||
|         for (j = 0; j < dim_im_out; j++) | ||||
|         { | ||||
|             for (k = 0; k < dim_im_out; k++) | ||||
|             { | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|                 conv_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|                 conv_out = bias[i] << bias_shift; | ||||
| #endif | ||||
|                 for (m = 0; m < dim_kernel; m++) | ||||
|                 { | ||||
|                     for (n = 0; n < dim_kernel; n++) | ||||
|                     { | ||||
|                         in_row = stride * j + m - padding; | ||||
|                         in_col = stride * k + n - padding; | ||||
|                         if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) | ||||
|                         { | ||||
|                             for (l = 0; l < ch_im_in; l++) | ||||
|                             { | ||||
|                                 conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] * | ||||
|                                     wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l]; | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,83 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void | ||||
| arm_convolve_HWC_q15_nonsquare_ref(const q15_t * Im_in, | ||||
|                           const uint16_t dim_im_in_x, | ||||
|                           const uint16_t dim_im_in_y, | ||||
|                           const uint16_t ch_im_in, | ||||
|                           const q15_t * wt, | ||||
|                           const uint16_t ch_im_out, | ||||
|                           const uint16_t dim_kernel_x, | ||||
|                           const uint16_t dim_kernel_y, | ||||
|                           const uint16_t padding_x, | ||||
|                           const uint16_t padding_y, | ||||
|                           const uint16_t stride_x, | ||||
|                           const uint16_t stride_y, | ||||
|                           const q15_t * bias, | ||||
|                           const uint16_t bias_shift, | ||||
|                           const uint16_t out_shift, | ||||
|                           q15_t * Im_out, | ||||
|                           const uint16_t dim_im_out_x, | ||||
|                           const uint16_t dim_im_out_y,  | ||||
|                           q15_t * bufferA,  | ||||
|                           q7_t * bufferB) | ||||
|  | ||||
| {	 | ||||
|     uint16_t  i, j, k, l, m, n; | ||||
|     int       conv_out; | ||||
|     signed char in_row, in_col; | ||||
|  | ||||
|     for (i = 0; i < ch_im_out; i++) | ||||
|     { | ||||
|         for (j = 0; j < dim_im_out_y; j++) | ||||
|         { | ||||
|             for (k = 0; k < dim_im_out_x; k++) | ||||
|             { | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|                 conv_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|                 conv_out = bias[i] << bias_shift; | ||||
| #endif | ||||
|                 for (m = 0; m < dim_kernel_y; m++) | ||||
|                 { | ||||
|                     for (n = 0; n < dim_kernel_x; n++) | ||||
|                     { | ||||
|                         in_row = stride_y * j + m - padding_y; | ||||
|                         in_col = stride_x * k + n - padding_x; | ||||
|                         if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) | ||||
|                         { | ||||
|                             for (l = 0; l < ch_im_in; l++) | ||||
|                             { | ||||
|                                 conv_out += | ||||
|                                     Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + | ||||
|                                           l] * wt[i * ch_im_in * dim_kernel_x * dim_kernel_y + (m * dim_kernel_x + | ||||
|                                                                                             n) * ch_im_in + l]; | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| }	 | ||||
|  | ||||
| 	 | ||||
| @@ -0,0 +1,72 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_convolve_HWC_q7_ref(const q7_t * Im_in,    // input image | ||||
|                              const uint16_t dim_im_in,  // input image dimention | ||||
|                              const uint16_t ch_im_in,   // number of input image channels | ||||
|                              const q7_t * wt,   // kernel weights  | ||||
|                              const uint16_t ch_im_out,  // number of filters, i.e., output image channels | ||||
|                              const uint16_t dim_kernel, // filter kernel size | ||||
|                              const uint16_t padding,    // padding sizes | ||||
|                              const uint16_t stride, // stride | ||||
|                              const q7_t * bias, // bias | ||||
|                              const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,    // output image | ||||
|                              const uint16_t dim_im_out, // output image dimension | ||||
|                              q15_t * bufferA,   //buffer space for input | ||||
|                              q7_t * bufferB //buffer space for output | ||||
|     ) | ||||
| { | ||||
|     int       i, j, k, l, m, n; | ||||
|     int       conv_out; | ||||
|     int       in_row, in_col; | ||||
|  | ||||
|     for (i = 0; i < ch_im_out; i++) | ||||
|     { | ||||
|         for (j = 0; j < dim_im_out; j++) | ||||
|         { | ||||
|             for (k = 0; k < dim_im_out; k++) | ||||
|             { | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|                 conv_out = ((q31_t) (bias[i]) << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|                 conv_out = bias[i] << bias_shift; | ||||
| #endif | ||||
|                 for (m = 0; m < dim_kernel; m++) | ||||
|                 { | ||||
|                     for (n = 0; n < dim_kernel; n++) | ||||
|                     { | ||||
|                         // if-for implementation | ||||
|                         in_row = stride * j + m - padding; | ||||
|                         in_col = stride * k + n - padding; | ||||
|                         if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) | ||||
|                         { | ||||
|                             for (l = 0; l < ch_im_in; l++) | ||||
|                             { | ||||
|                                 conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] * | ||||
|                                     wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l]; | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,78 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_convolve_HWC_q7_ref_nonsquare(const q7_t * Im_in,  // input image | ||||
|                                        const uint16_t dim_im_in_x,  // input image dimention x | ||||
|                                        const uint16_t dim_im_in_y,  // input image dimention y | ||||
|                                        const uint16_t ch_im_in, // number of input image channels | ||||
|                                        const q7_t * wt, // kernel weights  | ||||
|                                        const uint16_t ch_im_out,    // number of filters, i.e., output image channels | ||||
|                                        const uint16_t dim_kernel_x, // filter kernel size x | ||||
|                                        const uint16_t dim_kernel_y, // filter kernel size y | ||||
|                                        const uint16_t padding_x,    // padding sizes x | ||||
|                                        const uint16_t padding_y,    // padding sizes y | ||||
|                                        const uint16_t stride_x, // stride x | ||||
|                                        const uint16_t stride_y, // stride y | ||||
|                                        const q7_t * bias,   // bias | ||||
|                                        const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,  // output image | ||||
|                                        const uint16_t dim_im_out_x, // output image dimension x | ||||
|                                        const uint16_t dim_im_out_y, // output image dimension y | ||||
|                                        q15_t * bufferA, //buffer space for input | ||||
|                                        q7_t * bufferB   //buffer space for output | ||||
|     ) | ||||
| { | ||||
|     int       i, j, k, l, m, n; | ||||
|     int       conv_out; | ||||
|     int       in_row, in_col; | ||||
|  | ||||
|     for (i = 0; i < ch_im_out; i++) | ||||
|     { | ||||
|         for (j = 0; j < dim_im_out_y; j++) | ||||
|         { | ||||
|             for (k = 0; k < dim_im_out_x; k++) | ||||
|             { | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|                 conv_out = ((q31_t) (bias[i]) << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|                 conv_out = bias[i] << bias_shift; | ||||
| #endif | ||||
|                 for (m = 0; m < dim_kernel_y; m++) | ||||
|                 { | ||||
|                     for (n = 0; n < dim_kernel_x; n++) | ||||
|                     { | ||||
|                         // if-for implementation | ||||
|                         in_row = stride_y * j + m - padding_y; | ||||
|                         in_col = stride_x * k + n - padding_x; | ||||
|                         if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) | ||||
|                         { | ||||
|                             for (l = 0; l < ch_im_in; l++) | ||||
|                             { | ||||
|                                 conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * | ||||
|                                     wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in + | ||||
|                                        l]; | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,70 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_depthwise_separable_conv_HWC_q7_ref(const q7_t * Im_in,    // input image | ||||
|                                              const uint16_t dim_im_in,  // input image dimention | ||||
|                                              const uint16_t ch_im_in,   // number of input image channels | ||||
|                                              const q7_t * wt,   // kernel weights  | ||||
|                                              const uint16_t ch_im_out,  // number of filters, i.e., output image channels | ||||
|                                              const uint16_t dim_kernel, // filter kernel size | ||||
|                                              const uint16_t padding,    // padding sizes | ||||
|                                              const uint16_t stride, // stride | ||||
|                                              const q7_t * bias, // bias | ||||
|                                              const uint16_t bias_shift, // amount of left-shift for bias | ||||
|                                              const uint16_t out_shift,  // amount of right-shift for output | ||||
|                                              q7_t * Im_out, // output image | ||||
|                                              const uint16_t dim_im_out, // output image dimension | ||||
|                                              q15_t * bufferA,   //buffer space for input | ||||
|                                              q7_t * bufferB //buffer space for output | ||||
|     ) | ||||
| { | ||||
|     int       i_out_y, i_out_x, i_ch_out; | ||||
|     int       i_ker_y, i_ker_x; | ||||
|     for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) | ||||
|     { | ||||
|         for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) | ||||
|         { | ||||
|             for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++) | ||||
|             { | ||||
|                 // for each output | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|                 int       conv_out = (bias[i_ch_out] << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|                 int       conv_out = bias[i_ch_out] << bias_shift; | ||||
| #endif | ||||
|                 for (i_ker_y = 0; i_ker_y < dim_kernel; i_ker_y++) | ||||
|                 { | ||||
|                     for (i_ker_x = 0; i_ker_x < dim_kernel; i_ker_x++) | ||||
|                     { | ||||
|                         int       in_row = stride * i_out_y + i_ker_y - padding; | ||||
|                         int       in_col = stride * i_out_x + i_ker_x - padding; | ||||
|                         if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) | ||||
|                         { | ||||
|                             conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + i_ch_out] * | ||||
|                                 wt[(i_ker_y * dim_kernel + i_ker_x) * ch_im_out + i_ch_out]; | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Im_out[(i_out_y * dim_im_out + i_out_x) * ch_im_out + i_ch_out] = | ||||
|                     (q7_t) __SSAT((conv_out >> out_shift), 8); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,75 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in,  // input image | ||||
|                                                        const uint16_t dim_im_in_x,  // input image dimention x | ||||
|                                                        const uint16_t dim_im_in_y,  // input image dimention y | ||||
|                                                        const uint16_t ch_im_in, // number of input image channels | ||||
|                                                        const q7_t * wt, // kernel weights  | ||||
|                                                        const uint16_t ch_im_out,    // number of filters, i.e., output image channels | ||||
|                                                        const uint16_t dim_kernel_x, // filter kernel size x | ||||
|                                                        const uint16_t dim_kernel_y, // filter kernel size y | ||||
|                                                        const uint16_t padding_x,    // padding sizes x | ||||
|                                                        const uint16_t padding_y,    // padding sizes y | ||||
|                                                        const uint16_t stride_x, // stride x | ||||
|                                                        const uint16_t stride_y, // stride y | ||||
|                                                        const q7_t * bias,   // bias | ||||
|                                                        const uint16_t bias_shift,   // amount of left-shift for bias | ||||
|                                                        const uint16_t out_shift,    // amount of right-shift for output | ||||
|                                                        q7_t * Im_out,   // output image | ||||
|                                                        const uint16_t dim_im_out_x, // output image dimension x | ||||
|                                                        const uint16_t dim_im_out_y, // output image dimension y | ||||
|                                                        q15_t * bufferA, //buffer space for input | ||||
|                                                        q7_t * bufferB   //buffer space for output | ||||
|     ) | ||||
| { | ||||
|     int       i_out_y, i_out_x, i_ch_out; | ||||
|     int       i_ker_y, i_ker_x; | ||||
|     for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) | ||||
|     { | ||||
|         for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) | ||||
|         { | ||||
|             for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++) | ||||
|             { | ||||
|                 // for each output | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|                 int       conv_out = (bias[i_ch_out] << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|                 int       conv_out = bias[i_ch_out] << bias_shift; | ||||
| #endif | ||||
|                 for (i_ker_y = 0; i_ker_y < dim_kernel_y; i_ker_y++) | ||||
|                 { | ||||
|                     for (i_ker_x = 0; i_ker_x < dim_kernel_x; i_ker_x++) | ||||
|                     { | ||||
|                         int       in_row = stride_y * i_out_y + i_ker_y - padding_y; | ||||
|                         int       in_col = stride_x * i_out_x + i_ker_x - padding_x; | ||||
|                         if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) | ||||
|                         { | ||||
|                             conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + i_ch_out] * | ||||
|                                 wt[(i_ker_y * dim_kernel_x + i_ker_x) * ch_im_out + i_ch_out]; | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Im_out[(i_out_y * dim_im_out_x + i_out_x) * ch_im_out + i_ch_out] = | ||||
|                     (q7_t) __SSAT((conv_out >> out_shift), 8); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,120 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV,   // pointer to vector | ||||
|                                                 const q7_t * pM,    // pointer to matrix | ||||
|                                                 const uint16_t dim_vec, // length of the vector | ||||
|                                                 const uint16_t num_of_rows, // numCol of A | ||||
|                                                 const uint16_t bias_shift,  // amount of left-shift for bias | ||||
|                                                 const uint16_t out_shift,   // amount of right-shift for output | ||||
|                                                 const q7_t * bias, q15_t * pOut,    // output operand | ||||
|                                                 q15_t * vec_buffer) | ||||
| { | ||||
|  | ||||
|     uint16_t  rowCnt = num_of_rows >> 2; | ||||
|     const q7_t *pB = pM; | ||||
|     const q15_t *pA; | ||||
|     q15_t    *pO = pOut; | ||||
|     const q7_t *pBias = bias; | ||||
|  | ||||
|     while (rowCnt) | ||||
|     { | ||||
|         pA = pV; | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         q31_t     sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         q31_t     sum = *pBias++ << bias_shift; | ||||
|         q31_t     sum2 = *pBias++ << bias_shift; | ||||
|         q31_t     sum3 = *pBias++ << bias_shift; | ||||
|         q31_t     sum4 = *pBias++ << bias_shift; | ||||
| #endif | ||||
|  | ||||
|         uint16_t  colCnt = dim_vec >> 1; | ||||
|  | ||||
|         while (colCnt) | ||||
|         { | ||||
|             q15_t     inA1 = *pA++; | ||||
|             q15_t     inA2 = *pA++; | ||||
|  | ||||
|             q7_t      inB1 = *pB++; | ||||
|             q7_t      inB3 = *pB++; | ||||
|             q7_t      inB2 = *pB++; | ||||
|             q7_t      inB4 = *pB++; | ||||
|  | ||||
|             sum += inA1 * inB1 + inA2 * inB2; | ||||
|             sum2 += inA1 * inB3 + inA2 * inB4; | ||||
|  | ||||
|             inB1 = *pB++; | ||||
|             inB3 = *pB++; | ||||
|             inB2 = *pB++; | ||||
|             inB4 = *pB++; | ||||
|  | ||||
|             sum3 += inA1 * inB1 + inA2 * inB2; | ||||
|             sum4 += inA1 * inB3 + inA2 * inB4; | ||||
|  | ||||
|             colCnt--; | ||||
|         } | ||||
|         colCnt = dim_vec & 0x1; | ||||
|         while (colCnt) | ||||
|         { | ||||
|             q15_t     inA = *pA++; | ||||
|             q7_t      inB = *pB++; | ||||
|             sum += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum2 += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum3 += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum4 += inA * inB; | ||||
|  | ||||
|             colCnt--; | ||||
|         } | ||||
|         *pO++ = (q15_t) __SSAT((sum >> out_shift), 16); | ||||
|         *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16); | ||||
|         *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16); | ||||
|         *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16); | ||||
|  | ||||
|         rowCnt--; | ||||
|     } | ||||
|  | ||||
|     rowCnt = num_of_rows & 0x3; | ||||
|  | ||||
|     while (rowCnt) | ||||
|     { | ||||
|         pA = pV; | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         int       ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         int       ip_out = *pBias++ << bias_shift; | ||||
| #endif | ||||
|         for (int j = 0; j < dim_vec; j++) | ||||
|         { | ||||
|             q15_t     inA = *pA++; | ||||
|             q7_t      inB = *pB++; | ||||
|             ip_out += inA * inB; | ||||
|         } | ||||
|         *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16); | ||||
|  | ||||
|         rowCnt--; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,43 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV,   // pointer to vector | ||||
|                                             const q7_t * pM,    // pointer to matrix | ||||
|                                             const uint16_t dim_vec, // length of the vector | ||||
|                                             const uint16_t num_of_rows, // numCol of A | ||||
|                                             const uint16_t bias_shift,  // amount of left-shift for bias | ||||
|                                             const uint16_t out_shift,   // amount of right-shift for output | ||||
|                                             const q7_t * bias, q15_t * pOut,    // output operand | ||||
|                                             q15_t * vec_buffer) | ||||
| { | ||||
|     for (int i = 0; i < num_of_rows; i++) | ||||
|     { | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         int       ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         int       ip_out = bias[i] << bias_shift; | ||||
| #endif | ||||
|         for (int j = 0; j < dim_vec; j++) | ||||
|         { | ||||
|             ip_out += pV[j] * pM[i * dim_vec + j]; | ||||
|         } | ||||
|         pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16); | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,119 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_fully_connected_q15_opt_ref(const q15_t * pV,  // pointer to vector | ||||
|                                      const q15_t * pM,  // pointer to matrix | ||||
|                                      const uint16_t dim_vec,    // length of the vector | ||||
|                                      const uint16_t num_of_rows,    // numCol of A | ||||
|                                      const uint16_t bias_shift, // amount of left-shift for bias | ||||
|                                      const uint16_t out_shift,  // amount of right-shift for output | ||||
|                                      const q15_t * bias, q15_t * pOut,  // output operand | ||||
|                                      q15_t * vec_buffer) | ||||
| { | ||||
|  | ||||
|     uint16_t  rowCnt = num_of_rows >> 2; | ||||
|     const q15_t *pB = pM; | ||||
|     const q15_t *pA; | ||||
|     q15_t    *pO = pOut; | ||||
|     const q15_t *pBias = bias; | ||||
|  | ||||
|     while (rowCnt) | ||||
|     { | ||||
|         pA = pV; | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         q31_t     sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         q31_t     sum = *pBias++ << bias_shift; | ||||
|         q31_t     sum2 = *pBias++ << bias_shift; | ||||
|         q31_t     sum3 = *pBias++ << bias_shift; | ||||
|         q31_t     sum4 = *pBias++ << bias_shift; | ||||
| #endif | ||||
|  | ||||
|         uint16_t  colCnt = dim_vec >> 1; | ||||
|  | ||||
|         while (colCnt) | ||||
|         { | ||||
|             q15_t     inA1 = *pA++; | ||||
|             q15_t     inA2 = *pA++; | ||||
|  | ||||
|             q15_t     inB1 = *pB++; | ||||
|             q15_t     inB2 = *pB++; | ||||
|             sum += inA1 * inB1 + inA2 * inB2; | ||||
|  | ||||
|             inB1 = *pB++; | ||||
|             inB2 = *pB++; | ||||
|             sum2 += inA1 * inB1 + inA2 * inB2; | ||||
|  | ||||
|             inB1 = *pB++; | ||||
|             inB2 = *pB++; | ||||
|             sum3 += inA1 * inB1 + inA2 * inB2; | ||||
|  | ||||
|             inB1 = *pB++; | ||||
|             inB2 = *pB++; | ||||
|             sum4 += inA1 * inB1 + inA2 * inB2; | ||||
|  | ||||
|             colCnt--; | ||||
|         } | ||||
|         colCnt = dim_vec & 0x1; | ||||
|         while (colCnt) | ||||
|         { | ||||
|             q15_t     inA = *pA++; | ||||
|             q15_t     inB = *pB++; | ||||
|             sum += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum2 += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum3 += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum4 += inA * inB; | ||||
|             colCnt--; | ||||
|         } | ||||
|         *pO++ = (q15_t) __SSAT((sum >> out_shift), 16); | ||||
|         *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16); | ||||
|         *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16); | ||||
|         *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16); | ||||
|  | ||||
|         rowCnt--; | ||||
|     } | ||||
|  | ||||
|     rowCnt = num_of_rows & 0x3; | ||||
|  | ||||
|     while (rowCnt) | ||||
|     { | ||||
|         pA = pV; | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         int       ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         int       ip_out = *pBias++ << bias_shift; | ||||
| #endif | ||||
|         for (int j = 0; j < dim_vec; j++) | ||||
|         { | ||||
|             q15_t     inA = *pA++; | ||||
|             q15_t     inB = *pB++; | ||||
|             ip_out += inA * inB; | ||||
|         } | ||||
|         *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16); | ||||
|  | ||||
|         rowCnt--; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,43 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_fully_connected_q15_ref(const q15_t * pV,  // pointer to vector | ||||
|                                  const q15_t * pM,  // pointer to matrix | ||||
|                                  const uint16_t dim_vec,    // length of the vector | ||||
|                                  const uint16_t num_of_rows,    // numCol of A | ||||
|                                  const uint16_t bias_shift, // amount of left-shift for bias | ||||
|                                  const uint16_t out_shift,  // amount of right-shift for output | ||||
|                                  const q15_t * bias, q15_t * pOut,  // output operand | ||||
|                                  q15_t * vec_buffer) | ||||
| { | ||||
|     for (int i = 0; i < num_of_rows; i++) | ||||
|     { | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         int       ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         int       ip_out = bias[i] << bias_shift; | ||||
| #endif | ||||
|         for (int j = 0; j < dim_vec; j++) | ||||
|         { | ||||
|             ip_out += pV[j] * pM[i * dim_vec + j]; | ||||
|         } | ||||
|         pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16); | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,138 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_fully_connected_q7_opt_ref(const q7_t * pV,    // pointer to vector | ||||
|                                     const q7_t * pM,    // pointer to matrix | ||||
|                                     const uint16_t dim_vec, // length of the vector | ||||
|                                     const uint16_t num_of_rows, // numCol of A | ||||
|                                     const uint16_t bias_shift,  // amount of left-shift for bias | ||||
|                                     const uint16_t out_shift,   // amount of right-shift for output | ||||
|                                     const q7_t * bias, q7_t * pOut, // output operand | ||||
|                                     q15_t * vec_buffer) | ||||
| { | ||||
|  | ||||
|     uint16_t  rowCnt = num_of_rows >> 2; | ||||
|     const q7_t *pB = pM; | ||||
|     const q7_t *pA; | ||||
|     q7_t     *pO = pOut; | ||||
|     const q7_t *pBias = bias; | ||||
|  | ||||
|     while (rowCnt) | ||||
|     { | ||||
|         pA = pV; | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         q31_t     sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
|         q31_t     sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         q31_t     sum = *pBias++ << bias_shift; | ||||
|         q31_t     sum2 = *pBias++ << bias_shift; | ||||
|         q31_t     sum3 = *pBias++ << bias_shift; | ||||
|         q31_t     sum4 = *pBias++ << bias_shift; | ||||
| #endif | ||||
|  | ||||
|         uint16_t  colCnt = dim_vec >> 2; | ||||
|  | ||||
|         while (colCnt) | ||||
|         { | ||||
|             q7_t      inA1 = *pA++; | ||||
|             q7_t      inA3 = *pA++; | ||||
|             q7_t      inA2 = *pA++; | ||||
|             q7_t      inA4 = *pA++; | ||||
|  | ||||
|             q7_t      inB1 = *pB++; | ||||
|             q7_t      inB3 = *pB++; | ||||
|             q7_t      inB2 = *pB++; | ||||
|             q7_t      inB4 = *pB++; | ||||
|  | ||||
|             sum += inA1 * inB1 + inA2 * inB2; | ||||
|             sum2 += inA1 * inB3 + inA2 * inB4; | ||||
|  | ||||
|             inB1 = *pB++; | ||||
|             inB3 = *pB++; | ||||
|             inB2 = *pB++; | ||||
|             inB4 = *pB++; | ||||
|  | ||||
|             sum3 += inA1 * inB1 + inA2 * inB2; | ||||
|             sum4 += inA1 * inB3 + inA2 * inB4; | ||||
|  | ||||
|             inB1 = *pB++; | ||||
|             inB3 = *pB++; | ||||
|             inB2 = *pB++; | ||||
|             inB4 = *pB++; | ||||
|  | ||||
|             sum += inA3 * inB1 + inA4 * inB2; | ||||
|             sum2 += inA3 * inB3 + inA4 * inB4; | ||||
|  | ||||
|             inB1 = *pB++; | ||||
|             inB3 = *pB++; | ||||
|             inB2 = *pB++; | ||||
|             inB4 = *pB++; | ||||
|  | ||||
|             sum3 += inA3 * inB1 + inA4 * inB2; | ||||
|             sum4 += inA3 * inB3 + inA4 * inB4; | ||||
|  | ||||
|             colCnt--; | ||||
|         } | ||||
|         colCnt = dim_vec & 0x3; | ||||
|         while (colCnt) | ||||
|         { | ||||
|             q7_t      inA = *pA++; | ||||
|             q7_t      inB = *pB++; | ||||
|             sum += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum2 += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum3 += inA * inB; | ||||
|             inB = *pB++; | ||||
|             sum4 += inA * inB; | ||||
|  | ||||
|             colCnt--; | ||||
|         } | ||||
|         *pO++ = (q7_t) __SSAT((sum >> out_shift), 8); | ||||
|         *pO++ = (q7_t) __SSAT((sum2 >> out_shift), 8); | ||||
|         *pO++ = (q7_t) __SSAT((sum3 >> out_shift), 8); | ||||
|         *pO++ = (q7_t) __SSAT((sum4 >> out_shift), 8); | ||||
|  | ||||
|         rowCnt--; | ||||
|     } | ||||
|  | ||||
|     rowCnt = num_of_rows & 0x3; | ||||
|  | ||||
|     while (rowCnt) | ||||
|     { | ||||
|         pA = pV; | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         int       ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         int       ip_out = *pBias++ << bias_shift; | ||||
| #endif | ||||
|         for (int j = 0; j < dim_vec; j++) | ||||
|         { | ||||
|             q7_t      inA = *pA++; | ||||
|             q7_t      inB = *pB++; | ||||
|             ip_out += inA * inB; | ||||
|         } | ||||
|         *pO++ = (q7_t) __SSAT((ip_out >> out_shift), 8); | ||||
|  | ||||
|         rowCnt--; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,43 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_fully_connected_q7_ref(const q7_t * pV,    // pointer to vector | ||||
|                                 const q7_t * pM,    // pointer to matrix | ||||
|                                 const uint16_t dim_vec, // length of the vector | ||||
|                                 const uint16_t num_of_rows, // numCol of A | ||||
|                                 const uint16_t bias_shift,  // amount of left-shift for bias | ||||
|                                 const uint16_t out_shift,   // amount of right-shift for output | ||||
|                                 const q7_t * bias, q7_t * pOut, // output operand | ||||
|                                 q15_t * vec_buffer) | ||||
| { | ||||
|     for (int i = 0; i < num_of_rows; i++) | ||||
|     { | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         int       ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1)); | ||||
| #else | ||||
|         int       ip_out = bias[i] << bias_shift; | ||||
| #endif | ||||
|         for (int j = 0; j < dim_vec; j++) | ||||
|         { | ||||
|             ip_out += pV[j] * pM[i * dim_vec + j]; | ||||
|         } | ||||
|         pOut[i] = (q7_t) __SSAT((ip_out >> out_shift), 8); | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,58 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "arm_math.h" | ||||
| #include "arm_nnfunctions.h" | ||||
|  | ||||
| void      arm_nn_mult_q7_ref(q7_t * pSrcA,  | ||||
|                              q7_t * pSrcB,  | ||||
|                              q7_t * pDst,  | ||||
|                              const uint16_t out_shift,  | ||||
|                              uint32_t blockSize) { | ||||
|     uint16_t  i; | ||||
|  | ||||
| for (i = 0; i < blockSize; i++) | ||||
|     { | ||||
| 		q31_t product = pSrcA[i] * pSrcB[i]; | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         pDst[i] = (q7_t)__SSAT((product + (0x1 << (out_shift - 1)))>>out_shift, 8); | ||||
| #else | ||||
|         pDst[i] = (q7_t)__SSAT(product >> out_shift, 8); | ||||
| #endif | ||||
|     } | ||||
| } | ||||
|  | ||||
| void     arm_nn_mult_q15_ref(q15_t * pSrcA,  | ||||
|                              q15_t * pSrcB,  | ||||
|                              q15_t * pDst,  | ||||
|                              const uint16_t out_shift,  | ||||
|                              uint32_t blockSize) { | ||||
|     uint16_t  i; | ||||
|  | ||||
| for (i = 0; i < blockSize; i++) | ||||
|     { | ||||
| 		q31_t product = pSrcA[i] * pSrcB[i]; | ||||
| #ifndef ARM_NN_TRUNCATE | ||||
|         pDst[i] = (q15_t)__SSAT((product + (0x1 << (out_shift - 1)))>>out_shift, 16); | ||||
| #else | ||||
|         pDst[i] = (q15_t)__SSAT(product >> out_shift, 16); | ||||
| #endif | ||||
|  | ||||
|  | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,96 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| void arm_avepool_q7_HWC_ref(const q7_t * Im_in, // input image | ||||
|                             const uint16_t dim_im_in,   // input image dimension | ||||
|                             const uint16_t ch_im_in,    // number of input image channels | ||||
|                             const uint16_t dim_kernel,  // window kernel size | ||||
|                             const uint16_t padding, // padding sizes | ||||
|                             const uint16_t stride,  // stride | ||||
|                             const uint16_t dim_im_out,  // output image dimension | ||||
|                             q7_t * bufferA, // a buffer for local storage | ||||
|                             q7_t * Im_out) | ||||
| { | ||||
|     int16_t   i_ch_in, i_x, i_y; | ||||
|     int16_t   k_x, k_y; | ||||
|  | ||||
|     for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++) | ||||
|     { | ||||
|         for (i_y = 0; i_y < dim_im_out; i_y++) | ||||
|         { | ||||
|             for (i_x = 0; i_x < dim_im_out; i_x++) | ||||
|             { | ||||
|                 int       sum = 0; | ||||
|                 int       count = 0; | ||||
|                 for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++) | ||||
|                 { | ||||
|                     for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++) | ||||
|                     { | ||||
|                         if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in) | ||||
|                         { | ||||
|                             sum += Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)]; | ||||
|                             count++; | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = sum / count; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| void arm_maxpool_q7_HWC_ref(const q7_t * Im_in, // input image | ||||
|                             const uint16_t dim_im_in,   // input image dimension | ||||
|                             const uint16_t ch_im_in,    // number of input image channels | ||||
|                             const uint16_t dim_kernel,  // window kernel size | ||||
|                             const uint16_t padding, // padding sizes | ||||
|                             const uint16_t stride,  // stride | ||||
|                             const uint16_t dim_im_out,  // output image dimension | ||||
|                             q7_t * bufferA, // a buffer for local storage | ||||
|                             q7_t * Im_out) | ||||
| { | ||||
|     int16_t   i_ch_in, i_x, i_y; | ||||
|     int16_t   k_x, k_y; | ||||
|  | ||||
|     for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++) | ||||
|     { | ||||
|         for (i_y = 0; i_y < dim_im_out; i_y++) | ||||
|         { | ||||
|             for (i_x = 0; i_x < dim_im_out; i_x++) | ||||
|             { | ||||
|                 int       max = -129; | ||||
|                 for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++) | ||||
|                 { | ||||
|                     for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++) | ||||
|                     { | ||||
|                         if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in) | ||||
|                         { | ||||
|                             if (Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)] > max) | ||||
|                             { | ||||
|                                 max = Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)]; | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = max; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,42 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "arm_math.h" | ||||
| #include "arm_nnfunctions.h" | ||||
|  | ||||
| void arm_relu_q7_ref(q7_t * data, uint16_t size) | ||||
| { | ||||
|     uint16_t  i; | ||||
|  | ||||
|     for (i = 0; i < size; i++) | ||||
|     { | ||||
|         if (data[i] < 0) | ||||
|             data[i] = 0; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void arm_relu_q15_ref(q15_t * data, uint16_t size) | ||||
| { | ||||
|     uint16_t  i; | ||||
|  | ||||
|     for (i = 0; i < size; i++) | ||||
|     { | ||||
|         if (data[i] < 0) | ||||
|             data[i] = 0; | ||||
|     } | ||||
| } | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -0,0 +1,250 @@ | ||||
| /* | ||||
|  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. | ||||
|  * | ||||
|  * SPDX-License-Identifier: Apache-2.0 | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the License); you may | ||||
|  * not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||||
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #ifndef _REF_FUNCTIONS_H_ | ||||
| #define _REF_FUNCTIONS_H_ | ||||
|  | ||||
| #include "arm_math.h" | ||||
| #include "arm_nnfunctions.h" | ||||
| //#include "arm_nnsupportfunctions.h" | ||||
| #include "fully_connected_testing_weights.h" | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern    "C" | ||||
| { | ||||
| #endif | ||||
|  | ||||
| /* | ||||
|  * | ||||
|  * Convolution reference implemenation | ||||
|  * | ||||
|  */ | ||||
|  | ||||
|     void      arm_convolve_HWC_q7_ref(const q7_t * Im_in,   // input image | ||||
|                                       const uint16_t dim_im_in, // input image dimention | ||||
|                                       const uint16_t ch_im_in,  // number of input image channels | ||||
|                                       const q7_t * wt,  // kernel weights  | ||||
|                                       const uint16_t ch_im_out, // number of filters, i.e., output image channels | ||||
|                                       const uint16_t dim_kernel,    // filter kernel size | ||||
|                                       const uint16_t padding,   // padding sizes | ||||
|                                       const uint16_t stride,    // stride | ||||
|                                       const q7_t * bias,    // bias | ||||
|                                       const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,   // output image | ||||
|                                       const uint16_t dim_im_out,    // output image dimension | ||||
|                                       q15_t * bufferA,  //buffer space for input | ||||
|                                       q7_t * bufferB    //buffer space for output | ||||
|         ); | ||||
|  | ||||
|     void      arm_convolve_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image | ||||
|                                                 const uint16_t dim_im_in_x, // input image dimention x | ||||
|                                                 const uint16_t dim_im_in_y, // input image dimention y | ||||
|                                                 const uint16_t ch_im_in,    // number of input image channels | ||||
|                                                 const q7_t * wt,    // kernel weights  | ||||
|                                                 const uint16_t ch_im_out,   // number of filters, i.e., output image channels | ||||
|                                                 const uint16_t dim_kernel_x,    // filter kernel size x | ||||
|                                                 const uint16_t dim_kernel_y,    // filter kernel size y | ||||
|                                                 const uint16_t padding_x,   // padding sizes x | ||||
|                                                 const uint16_t padding_y,   // padding sizes y | ||||
|                                                 const uint16_t stride_x,    // stride x | ||||
|                                                 const uint16_t stride_y,    // stride y | ||||
|                                                 const q7_t * bias,  // bias | ||||
|                                                 const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image | ||||
|                                                 const uint16_t dim_im_out_x,    // output image dimension x | ||||
|                                                 const uint16_t dim_im_out_y,    // output image dimension y | ||||
|                                                 q15_t * bufferA,    //buffer space for input | ||||
|                                                 q7_t * bufferB  //buffer space for output | ||||
|         ); | ||||
|  | ||||
|     void      arm_convolve_HWC_q15_ref(const q15_t * Im_in, // input image | ||||
|                                        const uint16_t dim_im_in,    // input image dimention | ||||
|                                        const uint16_t ch_im_in, // number of input image channels | ||||
|                                        const q15_t * wt,    // kernel weights  | ||||
|                                        const uint16_t ch_im_out,    // number of filters, i.e., output image channels | ||||
|                                        const uint16_t dim_kernel,   // filter kernel size | ||||
|                                        const uint16_t padding,  // padding sizes | ||||
|                                        const uint16_t stride,   // stride | ||||
|                                        const q15_t * bias,  // bias | ||||
|                                        const uint16_t bias_shift, const uint16_t out_shift, q15_t * Im_out, // output image | ||||
|                                        const uint16_t dim_im_out,   // output image dimension | ||||
|                                        q15_t * bufferA, //buffer space for input | ||||
|                                        q7_t * bufferB   //buffer space for output | ||||
|         ); | ||||
|     void      arm_convolve_HWC_q15_nonsquare_ref(const q15_t * Im_in, | ||||
|                                                       const uint16_t dim_im_in_x, | ||||
|                                                       const uint16_t dim_im_in_y, | ||||
|                                                       const uint16_t ch_im_in, | ||||
|                                                       const q15_t * wt, | ||||
|                                                       const uint16_t ch_im_out, | ||||
|                                                       const uint16_t dim_kernel_x, | ||||
|                                                       const uint16_t dim_kernel_y, | ||||
|                                                       const uint16_t padding_x, | ||||
|                                                       const uint16_t padding_y, | ||||
|                                                       const uint16_t stride_x, | ||||
|                                                       const uint16_t stride_y, | ||||
|                                                       const q15_t * bias, | ||||
|                                                       const uint16_t bias_shift, | ||||
|                                                       const uint16_t out_shift, | ||||
|                                                       q15_t * Im_out, | ||||
|                                                       const uint16_t dim_im_out_x, | ||||
|                                                       const uint16_t dim_im_out_y,  | ||||
|                                                       q15_t * bufferA,  | ||||
|                                                       q7_t * bufferB); | ||||
| 													   | ||||
|     void      arm_depthwise_separable_conv_HWC_q7_ref(const q7_t * Im_in,   // input image | ||||
|                                                       const uint16_t dim_im_in, // input image dimention | ||||
|                                                       const uint16_t ch_im_in,  // number of input image channels | ||||
|                                                       const q7_t * wt,  // kernel weights  | ||||
|                                                       const uint16_t ch_im_out, // number of filters, i.e., output image channels | ||||
|                                                       const uint16_t dim_kernel,    // filter kernel size | ||||
|                                                       const uint16_t padding,   // padding sizes | ||||
|                                                       const uint16_t stride,    // stride | ||||
|                                                       const q7_t * bias,    // bias | ||||
|                                                       const uint16_t bias_shift,    // amount of left-shift for bias | ||||
|                                                       const uint16_t out_shift, // amount of right-shift for output | ||||
|                                                       q7_t * Im_out,    // output image | ||||
|                                                       const uint16_t dim_im_out,    // output image dimension | ||||
|                                                       q15_t * bufferA,  //buffer space for input | ||||
|                                                       q7_t * bufferB    //buffer space for output | ||||
|         ); | ||||
|     void      arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image | ||||
|                                                                 const uint16_t dim_im_in_x, // input image dimention x | ||||
|                                                                 const uint16_t dim_im_in_y, // input image dimention y | ||||
|                                                                 const uint16_t ch_im_in,    // number of input image channels | ||||
|                                                                 const q7_t * wt,    // kernel weights  | ||||
|                                                                 const uint16_t ch_im_out,   // number of filters, i.e., output image channels | ||||
|                                                                 const uint16_t dim_kernel_x,    // filter kernel size x | ||||
|                                                                 const uint16_t dim_kernel_y,    // filter kernel size y | ||||
|                                                                 const uint16_t padding_x,   // padding sizes x | ||||
|                                                                 const uint16_t padding_y,   // padding sizes y | ||||
|                                                                 const uint16_t stride_x,    // stride x | ||||
|                                                                 const uint16_t stride_y,    // stride y | ||||
|                                                                 const q7_t * bias,  // bias | ||||
|                                                                 const uint16_t bias_shift,  // amount of left-shift for bias | ||||
|                                                                 const uint16_t out_shift,   // amount of right-shift for output | ||||
|                                                                 q7_t * Im_out,  // output image | ||||
|                                                                 const uint16_t dim_im_out_x,    // output image dimension x | ||||
|                                                                 const uint16_t dim_im_out_y,    // output image dimension y | ||||
|                                                                 q15_t * bufferA,    //buffer space for input | ||||
|                                                                 q7_t * bufferB  //buffer space for output | ||||
|         ); | ||||
|  | ||||
| /* | ||||
|  * | ||||
|  * Fully-connected reference implemenation | ||||
|  * | ||||
|  */ | ||||
|  | ||||
|     void      arm_fully_connected_q7_ref(const q7_t * pV,   // pointer to vector | ||||
|                                          const q7_t * pM,   // pointer to matrix | ||||
|                                          const uint16_t dim_vec,    // length of the vector | ||||
|                                          const uint16_t num_of_rows,    // numCol of A | ||||
|                                          const uint16_t bias_shift, // amount of left-shift for bias | ||||
|                                          const uint16_t out_shift,  // amount of right-shift for output | ||||
|                                          const q7_t * bias, q7_t * pOut,    // output operand | ||||
|                                          q15_t * vec_buffer); | ||||
|  | ||||
|     void      arm_fully_connected_q15_ref(const q15_t * pV, // pointer to vector | ||||
|                                           const q15_t * pM, // pointer to matrix | ||||
|                                           const uint16_t dim_vec,   // length of the vector | ||||
|                                           const uint16_t num_of_rows,   // numCol of A | ||||
|                                           const uint16_t bias_shift,    // amount of left-shift for bias | ||||
|                                           const uint16_t out_shift, // amount of right-shift for output | ||||
|                                           const q15_t * bias, q15_t * pOut, // output operand | ||||
|                                           q15_t * vec_buffer); | ||||
|  | ||||
|     void      arm_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV,  // pointer to vector | ||||
|                                                      const q7_t * pM,   // pointer to matrix | ||||
|                                                      const uint16_t dim_vec,    // length of the vector | ||||
|                                                      const uint16_t num_of_rows,    // numCol of A | ||||
|                                                      const uint16_t bias_shift, // amount of left-shift for bias | ||||
|                                                      const uint16_t out_shift,  // amount of right-shift for output | ||||
|                                                      const q7_t * bias, q15_t * pOut,   // output operand | ||||
|                                                      q15_t * vec_buffer); | ||||
|  | ||||
|     void      arm_fully_connected_q7_opt_ref(const q7_t * pV,   // pointer to vector | ||||
|                                              const q7_t * pM,   // pointer to matrix | ||||
|                                              const uint16_t dim_vec,    // length of the vector | ||||
|                                              const uint16_t num_of_rows,    // numCol of A | ||||
|                                              const uint16_t bias_shift, // amount of left-shift for bias | ||||
|                                              const uint16_t out_shift,  // amount of right-shift for output | ||||
|                                              const q7_t * bias, q7_t * pOut,    // output operand | ||||
|                                              q15_t * vec_buffer); | ||||
|  | ||||
|     void      arm_fully_connected_q15_opt_ref(const q15_t * pV, // pointer to vector | ||||
|                                               const q15_t * pM, // pointer to matrix | ||||
|                                               const uint16_t dim_vec,   // length of the vector | ||||
|                                               const uint16_t num_of_rows,   // numCol of A | ||||
|                                               const uint16_t bias_shift,    // amount of left-shift for bias | ||||
|                                               const uint16_t out_shift, // amount of right-shift for output | ||||
|                                               const q15_t * bias, q15_t * pOut, // output operand | ||||
|                                               q15_t * vec_buffer); | ||||
|  | ||||
|     void      arm_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV,  // pointer to vector | ||||
|                                                          const q7_t * pM,   // pointer to matrix | ||||
|                                                          const uint16_t dim_vec,    // length of the vector | ||||
|                                                          const uint16_t num_of_rows,    // numCol of A | ||||
|                                                          const uint16_t bias_shift, // amount of left-shift for bias | ||||
|                                                          const uint16_t out_shift,  // amount of right-shift for output | ||||
|                                                          const q7_t * bias, q15_t * pOut,   // output operand | ||||
|                                                          q15_t * vec_buffer); | ||||
|  | ||||
| /* | ||||
|  * | ||||
|  * Pooling reference implemenation | ||||
|  * | ||||
|  */ | ||||
|  | ||||
|     void      arm_avepool_q7_HWC_ref(const q7_t * Im_in,    // input image | ||||
|                                      const uint16_t dim_im_in,  // input image dimension | ||||
|                                      const uint16_t ch_im_in,   // number of input image channels | ||||
|                                      const uint16_t dim_kernel, // window kernel size | ||||
|                                      const uint16_t padding,    // padding sizes | ||||
|                                      const uint16_t stride, // stride | ||||
|                                      const uint16_t dim_im_out, // output image dimension | ||||
|                                      q7_t * bufferA,    // a buffer for local storage | ||||
|                                      q7_t * Im_out); | ||||
|  | ||||
|     void      arm_maxpool_q7_HWC_ref(const q7_t * Im_in,    // input image | ||||
|                                      const uint16_t dim_im_in,  // input image dimension | ||||
|                                      const uint16_t ch_im_in,   // number of input image channels | ||||
|                                      const uint16_t dim_kernel, // window kernel size | ||||
|                                      const uint16_t padding,    // padding sizes | ||||
|                                      const uint16_t stride, // stride | ||||
|                                      const uint16_t dim_im_out, // output image dimension | ||||
|                                      q7_t * bufferA,    // a buffer for local storage | ||||
|                                      q7_t * Im_out); | ||||
|  | ||||
| /* | ||||
|  * | ||||
|  * Other reference implemenation | ||||
|  * | ||||
|  */ | ||||
|  | ||||
|     void      arm_relu_q7_ref(q7_t * data, uint16_t size); | ||||
|  | ||||
|     void      arm_relu_q15_ref(q15_t * data, uint16_t size); | ||||
|  | ||||
|     void      arm_nn_mult_q7_ref(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize); | ||||
|  | ||||
|     void      arm_nn_mult_q15_ref(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize); | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										801
									
								
								Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										801
									
								
								Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,801 @@ | ||||
| /* ---------------------------------------------------------------------- | ||||
| * Copyright (C) 2010-2018 Arm Limited. All rights reserved. | ||||
| * | ||||
| * | ||||
| * Project:       CMSIS NN Library | ||||
| * Title:         arm_nnexamples_nn_test.cpp | ||||
| * | ||||
| * Description:   Example code for NN kernel testing. | ||||
| * | ||||
| * Target Processor: Cortex-M cores | ||||
| * | ||||
| * Redistribution and use in source and binary forms, with or without | ||||
| * modification, are permitted provided that the following conditions | ||||
| * are met: | ||||
| *   - Redistributions of source code must retain the above copyright | ||||
| *     notice, this list of conditions and the following disclaimer. | ||||
| *   - Redistributions in binary form must reproduce the above copyright | ||||
| *     notice, this list of conditions and the following disclaimer in | ||||
| *     the documentation and/or other materials provided with the | ||||
| *     distribution. | ||||
| *   - Neither the name of ARM LIMITED nor the names of its contributors | ||||
| *     may be used to endorse or promote products derived from this | ||||
| *     software without specific prior written permission. | ||||
| * | ||||
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||||
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||||
| * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||||
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | ||||
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||||
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||||
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||||
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN | ||||
| * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||||
| * POSSIBILITY OF SUCH DAMAGE. | ||||
| * -------------------------------------------------------------------- */ | ||||
|  | ||||
| #include "arm_nnexamples_nn_test.h" | ||||
|  | ||||
| //#define TEST_SIGMOID | ||||
| //#define TEST_TANH | ||||
| #define TEST_POOL | ||||
| #define TEST_RELU | ||||
| #define TEST_IP | ||||
| #define TEST_CONV | ||||
| #define TEST_NONSQUARE | ||||
| #define TEST_NNMULT | ||||
|  | ||||
| int test_index = 0; | ||||
| q7_t test_flags[50]; | ||||
| bool test_pass; | ||||
|  | ||||
| int main() | ||||
| { | ||||
|     printf("start tests\n"); | ||||
|  | ||||
|     srand(1); | ||||
|  | ||||
|     // common pointers for testing data | ||||
|     q7_t     *test1; | ||||
|     q15_t    *test2; | ||||
|     q7_t     *test3; | ||||
|     q15_t    *test4; | ||||
|  | ||||
|     for (test_index = 0; test_index<50; test_index++) { | ||||
|         test_flags[test_index] = -1; | ||||
|     } | ||||
|     test_index = 0; | ||||
|  | ||||
| #ifdef TEST_NNMULT | ||||
| #define NNMULT_DIM 128 | ||||
|     test1 = new q7_t[NNMULT_DIM*2]; | ||||
|     test2 = new q15_t[NNMULT_DIM*2]; | ||||
|     test3 = new q7_t[NNMULT_DIM*2]; | ||||
|     test4 = new q15_t[NNMULT_DIM*2]; | ||||
|  | ||||
|     q7_t * mult_out_q7 = test3; | ||||
|     q7_t * mult_ref_q7 = test3 + NNMULT_DIM; | ||||
|     q15_t * mult_out_q15 = test4; | ||||
|     q15_t * mult_ref_q15 = test4 + NNMULT_DIM; | ||||
|  | ||||
|     for (int i=0;i<NNMULT_DIM*2;i++) { | ||||
|         test1[i] = (rand() % 256 - 128); | ||||
|         test2[i] = (rand() % 65536 - 32768); | ||||
|     } | ||||
|  | ||||
|     // Test q7 | ||||
|     arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 5, NNMULT_DIM); | ||||
|  | ||||
|     arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 5, NNMULT_DIM); | ||||
|  | ||||
|     verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM); | ||||
|  | ||||
|     arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 9, NNMULT_DIM); | ||||
|  | ||||
|     arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 9, NNMULT_DIM); | ||||
|  | ||||
|     verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM); | ||||
|  | ||||
|     // Test q15 | ||||
|     arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 13, NNMULT_DIM); | ||||
|  | ||||
|     arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 13, NNMULT_DIM); | ||||
|  | ||||
|     verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM); | ||||
|  | ||||
|     arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 18, NNMULT_DIM); | ||||
|  | ||||
|     arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 18, NNMULT_DIM); | ||||
|  | ||||
|     verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM); | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #ifdef TEST_SIGMOID | ||||
|  | ||||
| #define SIGMOID_DIM 128 | ||||
|  | ||||
|     /* This part tests the running of sigmoid functions */ | ||||
|  | ||||
|     test1 = new q7_t[SIGMOID_DIM]; | ||||
|     test2 = new q15_t[SIGMOID_DIM]; | ||||
|     test3 = new q7_t[SIGMOID_DIM]; | ||||
|     test4 = new q15_t[SIGMOID_DIM]; | ||||
|  | ||||
|     srand(1); | ||||
|  | ||||
|     for (int i = 0; i < SIGMOID_DIM; i++) | ||||
|     { | ||||
|         test1[i] = (rand() % 256 - 128); | ||||
|         test2[i] = (rand() % 65536 - 32768); | ||||
|         test3[i] = test1[i]; | ||||
|         test4[i] = test2[i]; | ||||
|     } | ||||
|  | ||||
|     arm_nn_activations_direct_q7(test3, SIGMOID_DIM, 3, ARM_SIGMOID); | ||||
|  | ||||
|     for (int i = 0; i < SIGMOID_DIM; i++) | ||||
|     { | ||||
|         printf("in: %d  out: %d\n", test1[i], test3[i]); | ||||
|     } | ||||
|  | ||||
|     printf("start testing q15_t sigmoid\n\n"); | ||||
|  | ||||
|     arm_nn_activations_direct_q15(test4, SIGMOID_DIM, 3, ARM_SIGMOID); | ||||
|  | ||||
|     for (int i = 0; i < SIGMOID_DIM; i++) | ||||
|     { | ||||
|         printf("in: %d  out: %d\n", test2[i], test4[i]); | ||||
|     } | ||||
|  | ||||
|     delete[]test1; | ||||
|     delete[]test2; | ||||
|     delete[]test3; | ||||
|     delete[]test4; | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #ifdef TEST_TANH | ||||
|  | ||||
| #define TANH_DIM 128 | ||||
|  | ||||
|     /* This part tests the running of sigmoid functions */ | ||||
|  | ||||
|     test1 = new q7_t[TANH_DIM]; | ||||
|     test2 = new q15_t[TANH_DIM]; | ||||
|     test3 = new q7_t[TANH_DIM]; | ||||
|     test4 = new q15_t[TANH_DIM]; | ||||
|  | ||||
|     srand(1); | ||||
|  | ||||
|     for (int i = 0; i < TANH_DIM; i++) | ||||
|     { | ||||
|         test1[i] = (rand() % 256 - 128); | ||||
|         test2[i] = (rand() % 65536 - 32768); | ||||
|         test3[i] = test1[i]; | ||||
|         test4[i] = test2[i]; | ||||
|     } | ||||
|  | ||||
|     arm_nn_activations_direct_q7(test3, TANH_DIM, 3, ARM_TANH); | ||||
|  | ||||
|     printf("start testing q7_t tanh\n\n"); | ||||
|  | ||||
|     for (int i = 0; i < TANH_DIM; i++) | ||||
|     { | ||||
|         printf("in: %d  out: %d\n", test1[i], test3[i]); | ||||
|     } | ||||
|  | ||||
|     printf("start testing q15_t tanh\n\n"); | ||||
|  | ||||
|     arm_nn_activations_direct_q15(test4, TANH_DIM, 3, ARM_TANH); | ||||
|  | ||||
|     for (int i = 0; i < TANH_DIM; i++) | ||||
|     { | ||||
|         printf("in: %d  out: %d\n", test2[i], test4[i]); | ||||
|     } | ||||
|  | ||||
|     delete[]test1; | ||||
|     delete[]test2; | ||||
|     delete[]test3; | ||||
|     delete[]test4; | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #ifdef TEST_POOL | ||||
|  | ||||
| #define POOL_IM_DIM 32 | ||||
| #define POOL_IM_CH 8 | ||||
|  | ||||
|     test1 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH * 2]; | ||||
|     test2 = new q15_t[POOL_IM_DIM * POOL_IM_CH]; | ||||
|     test3 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH]; | ||||
|  | ||||
|     for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++) | ||||
|     { | ||||
|         test1[i] = (rand() % 256 - 128); | ||||
|     } | ||||
|  | ||||
|     q7_t     *img_in = test1 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; | ||||
|     q7_t     *pool_out_ref = test3; | ||||
|     q7_t     *pool_out_opt = test3 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH / 2; | ||||
|  | ||||
|     for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++) | ||||
|     { | ||||
|         test3[i] = 0; | ||||
|     } | ||||
|  | ||||
|     // copy over the img input | ||||
|     for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++) | ||||
|     { | ||||
|         img_in[i] = test1[i]; | ||||
|     } | ||||
|  | ||||
|     initialize_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH); | ||||
|  | ||||
|     printf("Start maxpool reference implementation\n"); | ||||
|  | ||||
|     arm_maxpool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref); | ||||
|  | ||||
|     // copy over the img input | ||||
|     for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++) | ||||
|     { | ||||
|         img_in[i] = test1[i]; | ||||
|     } | ||||
|  | ||||
|     printf("Start maxpool opt implementation\n"); | ||||
|  | ||||
|     arm_maxpool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt); | ||||
|  | ||||
|     verify_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH); | ||||
|  | ||||
|     // copy over the img input | ||||
|     for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++) | ||||
|     { | ||||
|         img_in[i] = test1[i]; | ||||
|     } | ||||
|  | ||||
|     // copy over the img input | ||||
|     for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++) | ||||
|     { | ||||
|         img_in[i] = test1[i]; | ||||
|     } | ||||
|  | ||||
|     printf("Start avepool ref implementation\n"); | ||||
|  | ||||
|     arm_avepool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref); | ||||
|  | ||||
|     // copy over the img input | ||||
|     for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++) | ||||
|     { | ||||
|         img_in[i] = test1[i]; | ||||
|     } | ||||
|  | ||||
|     printf("Start avepool opt implementation\n"); | ||||
|  | ||||
|     arm_avepool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt); | ||||
|  | ||||
|     // special check here | ||||
|     bool      if_ave_pool_match = true; | ||||
|     for (int i = 0; i < POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH; i++) | ||||
|     { | ||||
|         // we tolerate at most difference of 1 here because of rounding errors | ||||
|         if (pool_out_ref[i] - pool_out_opt[i] >= 2 || pool_out_opt[i] - pool_out_ref[i] >= 2) | ||||
|         { | ||||
|             printf("Output mismatch at %d, expected %d, actual %d\n", i, pool_out_ref[i], pool_out_opt[i]); | ||||
|             if_ave_pool_match = false; | ||||
|         } | ||||
|     } | ||||
|     if (if_ave_pool_match == true) | ||||
|     { | ||||
|         printf("Outputs match.\n"); | ||||
|     } | ||||
|  | ||||
|     delete[]test1; | ||||
|     delete[]test2; | ||||
|     delete[]test3; | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #ifdef TEST_RELU | ||||
|  | ||||
| #define RELU_DIM 127 | ||||
|  | ||||
|     test1 = new q7_t[RELU_DIM]; | ||||
|     test2 = new q15_t[RELU_DIM]; | ||||
|     test3 = new q7_t[RELU_DIM]; | ||||
|     test4 = new q15_t[RELU_DIM]; | ||||
|  | ||||
|     for (int i = 0; i < RELU_DIM; i++) | ||||
|     { | ||||
|         test1[i] = (rand() % 256 - 128); | ||||
|         test2[i] = (rand() % 65536 - 32768); | ||||
|         test3[i] = test1[i]; | ||||
|         test4[i] = test2[i]; | ||||
|     } | ||||
|  | ||||
|     q7_t     *relu_ref_data_q7 = test1; | ||||
|     q7_t     *relu_opt_data_q7 = test3; | ||||
|     q15_t    *relu_ref_data_q15 = test2; | ||||
|     q15_t    *relu_opt_data_q15 = test4; | ||||
|  | ||||
|     printf("Start ref relu q7 implementation\n"); | ||||
|  | ||||
|     arm_relu_q7_ref(relu_ref_data_q7, RELU_DIM); | ||||
|  | ||||
|     printf("Start opt relu q7 implementation\n"); | ||||
|  | ||||
|     arm_relu_q7(relu_opt_data_q7, RELU_DIM); | ||||
|  | ||||
|     verify_results_q7(relu_ref_data_q7, relu_opt_data_q7, RELU_DIM); | ||||
|  | ||||
|     printf("Start ref relu q15 implementation\n"); | ||||
|  | ||||
|     arm_relu_q15_ref(relu_ref_data_q15, RELU_DIM); | ||||
|  | ||||
|     printf("Start opt relu q15 implementation\n"); | ||||
|  | ||||
|     arm_relu_q15(relu_opt_data_q15, RELU_DIM); | ||||
|  | ||||
|     verify_results_q15(relu_ref_data_q15, relu_opt_data_q15, RELU_DIM); | ||||
|  | ||||
|     delete[]test1; | ||||
|     delete[]test2; | ||||
|     delete[]test3; | ||||
|     delete[]test4; | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #ifdef TEST_IP | ||||
|  | ||||
| #define IP_ROW_DIM 127 | ||||
| #define IP_COL_DIM 127 | ||||
|  | ||||
|     q7_t      ip_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT; | ||||
|     q7_t      ip_q7_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT; | ||||
|     q7_t      ip_q7_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_q7_q15_WEIGHT; | ||||
|     q15_t     ip_q15_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT; | ||||
|     q15_t     ip_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT_Q15; | ||||
|  | ||||
|     test1 = new q7_t[IP_COL_DIM + IP_ROW_DIM]; | ||||
|     test2 = new q15_t[IP_COL_DIM]; | ||||
|     test3 = new q7_t[IP_ROW_DIM * 3]; | ||||
|     test4 = new q15_t[IP_COL_DIM + IP_ROW_DIM * 2]; | ||||
|  | ||||
|     for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++) | ||||
|     { | ||||
|         test1[i] = rand() % 256 - 100; | ||||
|     } | ||||
|     for (int i = 0; i < IP_ROW_DIM * 3; i++) | ||||
|     { | ||||
|         test3[i] = 0; | ||||
|     } | ||||
|  | ||||
|     q7_t     *ip_bias_q7 = test1 + IP_COL_DIM; | ||||
|  | ||||
|     q7_t     *ip_out_q7_ref = test3; | ||||
|     q7_t     *ip_out_q7_opt = test3 + IP_ROW_DIM; | ||||
|     q7_t     *ip_out_q7_opt_fast = test3 + 2 * IP_ROW_DIM; | ||||
|     q15_t    *ip_out_q15_ref = test4 + IP_COL_DIM; | ||||
|     q15_t    *ip_out_q15_opt = test4 + IP_COL_DIM + IP_ROW_DIM; | ||||
|  | ||||
|     initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM); | ||||
|     initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM); | ||||
|     initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start ref q7 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_q7_ref(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_ref, test2); | ||||
|  | ||||
|     printf("Start q7 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_q7(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt, test2); | ||||
|  | ||||
|     verify_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start q7 ref opt implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_q7_opt_ref(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, | ||||
|                                    ip_out_q7_opt_fast, test2); | ||||
|  | ||||
|     verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start q7 opt implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_q7_opt(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt_fast, | ||||
|                                test2); | ||||
|  | ||||
|     verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM); | ||||
|  | ||||
|     for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++) | ||||
|     { | ||||
|         test4[i] = (rand() % 65536 - 32768); | ||||
|     } | ||||
|  | ||||
|     initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start ref q15 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_q15_ref(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_ref, NULL); | ||||
|  | ||||
|     printf("Start q15 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_q15(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL); | ||||
|  | ||||
|     verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start ref opt q15 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_q15_opt_ref(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, | ||||
|                                     NULL); | ||||
|  | ||||
|     verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start opt q15 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_q15_opt(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL); | ||||
|  | ||||
|     verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); | ||||
|  | ||||
|     initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start ref q7_q15 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_mat_q7_vec_q15_ref(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_ref, | ||||
|                                            test2); | ||||
|  | ||||
|     printf("Start q7_q15 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_mat_q7_vec_q15(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_opt, | ||||
|                                        test2); | ||||
|  | ||||
|     verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start ref opt q7_q15 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_mat_q7_vec_q15_opt_ref(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, | ||||
|                                                ip_out_q15_opt, test2); | ||||
|  | ||||
|     verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); | ||||
|  | ||||
|     printf("Start opt q7_q15 implementation\n"); | ||||
|  | ||||
|     arm_fully_connected_mat_q7_vec_q15_opt(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, | ||||
|                                            ip_out_q15_opt, test2); | ||||
|  | ||||
|     verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); | ||||
|  | ||||
|     delete[]test1; | ||||
|     delete[]test2; | ||||
|     delete[]test3; | ||||
|     delete[]test4; | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #ifdef TEST_NONSQUARE | ||||
|  | ||||
| /* Use RCONV to differential with square CONV */ | ||||
|  | ||||
| #define RCONV_IM_DIM_X 10 | ||||
| #define RCONV_IM_DIM_Y 8 | ||||
| #define RCONV_IM_CH 4 | ||||
| #define RCONV_KER_DIM_X 5 | ||||
| #define RCONV_KER_DIM_Y 3 | ||||
| #define RCONV_STRIDE_X 1 | ||||
| #define RCONV_STRIDE_Y 1 | ||||
| #define RCONV_PADDING_X 2 | ||||
| #define RCONV_PADDING_Y 1 | ||||
| #define RCONV_OUT_CH 4 | ||||
| #define RCONV_OUT_DIM_X 10 | ||||
| #define RCONV_OUT_DIM_Y 8 | ||||
|  | ||||
|     test1 = new q7_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH]; | ||||
|     test2 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH]; | ||||
|     test3 = | ||||
|         new q7_t[RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH]; | ||||
|  | ||||
|     for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++) | ||||
|     { | ||||
|         test1[i] = rand() % 256 - 100; | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; | ||||
|          i < RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; i++) | ||||
|     { | ||||
|         test3[i] = rand() % 256 - 100; | ||||
|     } | ||||
|  | ||||
|     q7_t     *rconv_weight_q7 = test1; | ||||
|     q7_t     *rconv_bias_q7 = test1 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH; | ||||
|  | ||||
|     q15_t    *rconv_buf = test2; | ||||
|  | ||||
|     q7_t     *rconv_im_in_q7 = test3; | ||||
|     q7_t     *rconv_im_out_ref_q7 = test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH; | ||||
|     q7_t     *rconv_im_out_opt_q7 = | ||||
|         test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; | ||||
|  | ||||
|     initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
|  | ||||
|     printf("start conv q7 nonsquare ref implementation\n"); | ||||
|     arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, | ||||
|                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, | ||||
|                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, | ||||
|                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     printf("start conv q7 nonsquare opt implementation\n"); | ||||
|     arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, | ||||
|                                        RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, | ||||
|                                        RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, | ||||
|                                        RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
|  | ||||
|     initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
|  | ||||
|     printf("start conv q7 nonsquare ref implementation\n"); | ||||
|     arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, | ||||
|                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, | ||||
|                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, | ||||
|                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     printf("start conv q7 nonsquare basic implementation\n"); | ||||
|     arm_convolve_HWC_q7_basic_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, | ||||
|                                        RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, | ||||
|                                        RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, | ||||
|                                        RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
|  | ||||
|     initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
|  | ||||
|     printf("start 1x1 conv q7 nonsquare fast implementation\n"); | ||||
|     arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, | ||||
|                                        RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X, | ||||
|                                        RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X, | ||||
|                                        RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     printf("start 1x1 conv q7 nonsquare dedicated function implementation\n"); | ||||
|     arm_convolve_1x1_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, | ||||
|                                            RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X, | ||||
|                                            RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X, | ||||
|                                            RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
|  | ||||
|     printf("start depthwise separable conv q7 nonsquare ref implementation\n"); | ||||
|     arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, | ||||
|                                                       rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, | ||||
|                                                       RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y, | ||||
|                                                       rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X, | ||||
|                                                       RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     printf("start depthwise separable conv q7 nonsquare opt implementation\n"); | ||||
|     arm_depthwise_separable_conv_HWC_q7_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, | ||||
|                                                   rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, | ||||
|                                                   RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y, | ||||
|                                                   rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X, | ||||
|                                                   RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
|  | ||||
|     delete[]test1; | ||||
|     delete[]test2; | ||||
|     delete[]test3; | ||||
| 	 | ||||
| 	test2 = new q15_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH]; // weights + bias | ||||
| 	test4 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH   //buffer | ||||
| 	         + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH]; // i/o | ||||
|  | ||||
|     for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++) | ||||
|     { | ||||
|         test2[i] = rand() % 256 - 100; | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; | ||||
|          i < 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH | ||||
|          + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; | ||||
|         i++) | ||||
|     { | ||||
|         test4[i] = rand() % 256 - 100; | ||||
|     } | ||||
|  | ||||
|     q15_t     *rconv_weight_q15 = test2; | ||||
|     q15_t     *rconv_bias_q15 = test2 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH; | ||||
|  | ||||
|     rconv_buf = test4; | ||||
|  | ||||
|     q15_t     *rconv_im_in_q15 = test4 + 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH; | ||||
|     q15_t     *rconv_im_out_ref_q15 = rconv_im_in_q15 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH; | ||||
|     q15_t     *rconv_im_out_opt_q15 = rconv_im_out_ref_q15 + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; | ||||
|  | ||||
|     initialize_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
|  | ||||
|     printf("start conv q15 nonsquare ref implementation\n"); | ||||
|     arm_convolve_HWC_q15_nonsquare_ref(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15, | ||||
|                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, | ||||
|                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_ref_q15, | ||||
|                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     printf("start conv q5 nonsquare opt implementation\n"); | ||||
|     arm_convolve_HWC_q15_fast_nonsquare(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15, | ||||
|                                        RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, | ||||
|                                        RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_opt_q15, | ||||
|                                        RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); | ||||
|  | ||||
|     verify_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); | ||||
| 	 | ||||
|     delete [] test2; | ||||
|     delete [] test4; | ||||
| #endif | ||||
|  | ||||
| #ifdef TEST_CONV | ||||
|  | ||||
| #define CONV_IM_DIM 16 | ||||
| #define CONV_IM_CH 16 | ||||
| #define CONV_KER_DIM 5 | ||||
| #define CONV_OUT_CH 16 | ||||
| #define CONV_OUT_DIM 16 | ||||
|  | ||||
|     test1 = new q7_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH]; | ||||
|     test2 = | ||||
|         new q15_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + | ||||
|                   2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH]; | ||||
|     test3 = new q7_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH]; | ||||
|     test4 = new q15_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH]; | ||||
|  | ||||
|     for (int i = 0; i < CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++) | ||||
|     { | ||||
|         test1[i] = rand() % 256 - 100; | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; | ||||
|          i < | ||||
|          CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + | ||||
|          2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++) | ||||
|     { | ||||
|         test2[i] = (rand() % 65536 - 32768); | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++) | ||||
|     { | ||||
|         test3[i] = rand() % 256 - 100; | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++) | ||||
|     { | ||||
|         test4[i] = (rand() % 65536 - 32768); | ||||
|     } | ||||
|  | ||||
|     q7_t     *conv_weight_q7 = test1; | ||||
|     q7_t     *conv_bias_q7 = test1 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH; | ||||
|  | ||||
|     q15_t    *conv_weight_q15 = test2; | ||||
|     q15_t    *conv_buf = test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH; | ||||
|     q15_t    *conv_bias_q15 = | ||||
|         test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + | ||||
|         2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH; | ||||
|  | ||||
|     q7_t     *conv_im_in_q7 = test3; | ||||
|     q7_t     *conv_im_out_ref_q7 = test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH; | ||||
|     q7_t     *conv_im_out_opt_q7 = | ||||
|         test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; | ||||
|  | ||||
|     q15_t    *conv_im_in_q15 = test4; | ||||
|     q15_t    *conv_im_out_ref_q15 = test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH; | ||||
|     q15_t    *conv_im_out_opt_q15 = | ||||
|         test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; | ||||
|  | ||||
|     initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     printf("start q7 ref implementation\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, | ||||
|                             CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7, | ||||
|                             CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     printf("start q7 basic implementation\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, | ||||
|                               CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, | ||||
|                               CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     printf("start q7 fast implementation\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q7_fast(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, | ||||
|                              CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, | ||||
|                              CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     // testing with RGB | ||||
|     printf("start q7 ref implementation for RGB\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7, | ||||
|                             CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7, | ||||
|                             CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     printf("start q7 basic implementation for RGB\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7, | ||||
|                               CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, | ||||
|                               CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     printf("start q7 RGB implementation for RGB\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q7_RGB(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7, | ||||
|                             CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, | ||||
|                             CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     // testing q15 | ||||
|     initialize_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     printf("start q15 ref implementation\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q15_ref(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15, | ||||
|                              CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_ref_q15, | ||||
|                              CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     printf("start q15 basic implementation\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q15_basic(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15, | ||||
|                                CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15, | ||||
|                                CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     printf("start q15 fast implementation\n"); | ||||
|  | ||||
|     arm_convolve_HWC_q15_fast(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15, | ||||
|                               CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15, | ||||
|                               CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     // depthwise separable conv | ||||
|     initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     printf("start q7 depthwise_separable_conv ref implementation\n"); | ||||
|  | ||||
|     arm_depthwise_separable_conv_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, | ||||
|                                             CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7, | ||||
|                                             CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     printf("start q7 depthwise_separable_conv implementation\n"); | ||||
|  | ||||
|     arm_depthwise_separable_conv_HWC_q7(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, | ||||
|                                         CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, | ||||
|                                         CONV_OUT_DIM, conv_buf, NULL); | ||||
|  | ||||
|     verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); | ||||
|  | ||||
|     delete[]test1; | ||||
|     delete[]test2; | ||||
|     delete[]test3; | ||||
|     delete[]test4; | ||||
|  | ||||
| #endif | ||||
|  | ||||
|     test_pass = true; | ||||
|     test_index = 0; | ||||
|     while (test_flags[test_index] != -1) { | ||||
|         if (test_flags[test_index]) { | ||||
|              test_pass = false; | ||||
|         } | ||||
|         test_index ++; | ||||
|     } | ||||
|     if (test_pass) { | ||||
|         printf("All tests passed\n"); | ||||
|     } else { | ||||
|         printf("Test failed passed\n"); | ||||
|     } | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
| @@ -0,0 +1,78 @@ | ||||
| #ifndef _MAIN_H_ | ||||
| #define _MAIN_H_ | ||||
|  | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <math.h> | ||||
|  | ||||
| #include "arm_math.h" | ||||
|  | ||||
| #include "arm_nnfunctions.h" | ||||
| #include "ref_functions.h" | ||||
|  | ||||
| extern int test_index; | ||||
| extern q7_t test_flags[50]; | ||||
|  | ||||
| void initialize_results_q7(q7_t * ref, q7_t * opt, int length) | ||||
| { | ||||
|     arm_fill_q7(0, ref, length); | ||||
|     arm_fill_q7(37, opt, length); | ||||
| } | ||||
|  | ||||
| void initialize_results_q15(q15_t * ref, q15_t * opt, int length) | ||||
| { | ||||
|     arm_fill_q15(0, ref, length); | ||||
|     arm_fill_q15(0x5F5, opt, length); | ||||
| } | ||||
|  | ||||
| void verify_results_q7(q7_t * ref, q7_t * opt, int length) | ||||
| { | ||||
|  | ||||
|     bool      if_match = true; | ||||
|  | ||||
|     for (int i = 0; i < length; i++) | ||||
|     { | ||||
|         if (ref[i] != opt[i]) | ||||
|         { | ||||
|             printf("Output mismatch at %d, expected %d, actual %d\r\n", i, ref[i], opt[i]); | ||||
|  | ||||
|             if_match = false; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if (if_match == true) | ||||
|     { | ||||
|         printf("Outputs match.\r\n\r\n"); | ||||
|         test_flags[test_index++] = 0; | ||||
|     } else { | ||||
|         test_flags[test_index++] = 1; | ||||
|     } | ||||
|  | ||||
| } | ||||
|  | ||||
| void verify_results_q15(q15_t * ref, q15_t * opt, int length) | ||||
| { | ||||
|  | ||||
|     bool      if_match = true; | ||||
|  | ||||
|     for (int i = 0; i < length; i++) | ||||
|     { | ||||
|         if (ref[i] != opt[i]) | ||||
|         { | ||||
|             printf("Output mismatch at %d, expected %d, actual %d\r\n", i, ref[i], opt[i]); | ||||
|  | ||||
|             if_match = false; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if (if_match == true) | ||||
|     { | ||||
|         printf("Outputs match.\r\n\r\n"); | ||||
|         test_flags[test_index++] = 0; | ||||
|     } else { | ||||
|         test_flags[test_index++] = 1; | ||||
|     } | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										4
									
								
								Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/readme.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/readme.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| CMSIS DSP_Lib example arm_nnexample_nn_test for | ||||
|   Cortex-M3, Cortex-M4 and Cortex-M7. | ||||
|  | ||||
| The example is configured for uVision Simulator. | ||||
		Reference in New Issue
	
	Block a user