vreinterpret_s32_s8() casts int8x8_t to int32x2_t. However, third argument of vdotq_lane_s32() is of type int8x8_t. --- a/third_party/tflite/src/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h +++ b/third_party/tflite/src/tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h @@ -129,16 +129,14 @@ inline int32x4_t vdotq_four_lane_s32(int32x4_t acc, int8x16_t lhs, int8x16_t rhs, const int lane) { switch (lane) { case 0: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 0); + return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 0); case 1: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_low_s8(rhs)), 1); + return vdotq_lane_s32(acc, lhs, vget_low_s8(rhs), 1); case 2: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)), - 0); + return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 0); case 3: default: - return vdotq_lane_s32(acc, lhs, vreinterpret_s32_s8(vget_high_s8(rhs)), - 1); + return vdotq_lane_s32(acc, lhs, vget_high_s8(rhs), 1); } }