17#if (TM_MDL_TYPE != TM_MDL_FP8_143) && (TM_MDL_TYPE != TM_MDL_FP8_152)
23 for (; i + 8 - 1 < cnt;) {
24 sum += sptr[i] * kptr[i];
26 sum += sptr[i] * kptr[i];
28 sum += sptr[i] * kptr[i];
30 sum += sptr[i] * kptr[i];
32 sum += sptr[i] * kptr[i];
34 sum += sptr[i] * kptr[i];
36 sum += sptr[i] * kptr[i];
38 sum += sptr[i] * kptr[i];
41 for (; i < size; i++) { sum += sptr[i] * kptr[i]; }
54 for (; i + 8 - 1 < cnt;) {
55 sum0 += sptr[i] * kptr0[i];
56 sum1 += sptr[i] * kptr1[i];
58 sum0 += sptr[i] * kptr0[i];
59 sum1 += sptr[i] * kptr1[i];
61 sum0 += sptr[i] * kptr0[i];
62 sum1 += sptr[i] * kptr1[i];
64 sum0 += sptr[i] * kptr0[i];
65 sum1 += sptr[i] * kptr1[i];
67 sum0 += sptr[i] * kptr0[i];
68 sum1 += sptr[i] * kptr1[i];
70 sum0 += sptr[i] * kptr0[i];
71 sum1 += sptr[i] * kptr1[i];
73 sum0 += sptr[i] * kptr0[i];
74 sum1 += sptr[i] * kptr1[i];
76 sum0 += sptr[i] * kptr0[i];
77 sum1 += sptr[i] * kptr1[i];
80 for (; i < size; i++) {
81 sum0 += sptr[i] * kptr0[i];
82 sum1 += sptr[i] * kptr1[i];
91 *result = sptr[
k_oft[0]] * kptr[0] + sptr[
k_oft[1]] * kptr[1] + sptr[
k_oft[2]] * kptr[2] + sptr[
k_oft[3]] * kptr[3] + sptr[
k_oft[4]] * kptr[4] + sptr[
k_oft[5]] * kptr[5] +
92 sptr[
k_oft[6]] * kptr[6] + sptr[
k_oft[7]] * kptr[7] + sptr[
k_oft[8]] * kptr[8];
97 *result = sptr[0] * kptr[0] + sptr[1] * kptr[1] + sptr[2] * kptr[2] + sptr[3] * kptr[3] + sptr[4] * kptr[4] + sptr[5] * kptr[5] + sptr[6] * kptr[6] + sptr[7] * kptr[7] +
109 for (
int i = 0; i < size; i++) {
124 sum = sum > 0 ? sum : 0;
127 sum = sum > 0 ? sum : 0;
128 sum = sum > 6 ? 6 : sum;
141#if (TM_MDL_TYPE == TM_MDL_FP32) || (TM_MDL_TYPE == TM_MDL_FP16)
144 for (
int i = 0; i < n; i++) {
150 sum = sum > 0 ? sum : 0;
163#elif (TM_MDL_TYPE == TM_MDL_INT8) || (TM_MDL_TYPE == TM_MDL_INT16)
171 for (
int i = 0; i < n; i++) {
175 float sumf = sum * scales[i];
181 sumf = sumf > 0 ? sumf : 0;
184 sumf = sumf > 0 ? sumf : 0;
186 sumf = sumf > 6 ? 6 : sumf;
195 outp[i] = (
mtype_t) (sumf * out_s_inv + out_zp);
TM_INLINE void tm_dot_prod(mtype_t *sptr, mtype_t *kptr, uint32_t size, sumtype_t *result)
Definition arch_cpu.h:19
TM_INLINE void tm_dot_prod_pack2(mtype_t *sptr, mtype_t *kptr, uint32_t size, sumtype_t *result)
Definition arch_cpu.h:46
TM_INLINE void tm_postprocess_sum(int n, sumtype_t *sums, btype_t *bs, int act, mtype_t *outp, sctype_t *scales, sctype_t out_s_inv, zptype_t out_zp)
Definition arch_cpu.h:166
TM_INLINE void tm_dot_prod_3x3x1(mtype_t *sptr, mtype_t *kptr, sumtype_t *result)
Definition arch_cpu.h:96
TM_INLINE void tm_dot_prod_gap_3x3x1(mtype_t *sptr, mtype_t *kptr, uint32_t *k_oft, sumtype_t *result)
Definition arch_cpu.h:90
u32_t uint32_t
Definition stdint.h:13
s32_t int32_t
Definition stdint.h:12
@ TM_ACT_RELU
Definition tinymaix.h:125
@ TM_ACT_RELU6
Definition tinymaix.h:127
uint8_t TM_WEAK tm_fp32to8(float fp32)
int32_t btype_t
Definition tinymaix.h:38
float sctype_t
Definition tinymaix.h:90
#define TM_FASTSCALE_SHIFT
Definition tinymaix.h:91
int32_t zptype_t
Definition tinymaix.h:40
int8_t mtype_t
Definition tinymaix.h:36
int32_t sumtype_t
Definition tinymaix.h:39
float TM_WEAK tm_fp8to32(uint8_t fp8)
static uint32_t k_oft[TM_MAX_KSIZE]
Definition tm_layers.c:49
#define TM_INLINE
Definition tm_port.h:43