47#ifndef NE10_FFT_GENERIC_INT32_H
48#define NE10_FFT_GENERIC_INT32_H
50#include "NE10_types.h"
51#include "NE10_macros.h"
54#define NE10_CPX_MUL_S32(Z,A,B) \
56 ne10_int32_t ARBR = ((NE10_F2I32_SAMPPROD) A.r * B.r) >> 31; \
57 ne10_int32_t ARBI = ((NE10_F2I32_SAMPPROD) A.r * B.i) >> 31; \
58 ne10_int32_t AIBR = ((NE10_F2I32_SAMPPROD) A.i * B.r) >> 31; \
59 ne10_int32_t AIBI = ((NE10_F2I32_SAMPPROD) A.i * B.i) >> 31; \
64#define NE10_S_MUL_S32(A,S) (((NE10_F2I32_SAMPPROD) (A) * (S)) >> 31)
84 NE10_CPX_MUL_S32 (out[1], in[1], tw[0]);
92 FFT_MUL_TW<RADIX - 1> (out, in, tw);
93 NE10_CPX_MUL_S32 (out[RADIX - 1], in[RADIX - 1], tw[RADIX - 2]);
121 NE10_CPX_ADD (scratch_out[0], scratch_in[0], scratch_in[1]);
122 NE10_CPX_SUB (scratch_out[1], scratch_in[0], scratch_in[1]);
137 scratch_in[0] = Fin[0];
138 scratch_in[1] = Fin[1];
139 scratch_in[2] = Fin[2];
141 scratch[1] = scratch_in[1];
142 scratch[2] = scratch_in[2];
144 NE10_CPX_ADD (scratch[3], scratch[1], scratch[2]);
145 NE10_CPX_SUB (scratch[0], scratch[1], scratch[2]);
147 scratch_in[1].r = scratch_in[0].r - (scratch[3].r >> 1);
148 scratch_in[1].i = scratch_in[0].i - (scratch[3].i >> 1);
150 scratch[0].r = NE10_S_MUL_S32 (scratch[0].r , -TW_3I_S32);
151 scratch[0].i = NE10_S_MUL_S32 (scratch[0].i , -TW_3I_S32);
153 scratch_in[0].r += scratch[3].r;
154 scratch_in[0].i += scratch[3].i;
156 scratch_in[2].r = scratch_in[1].r + scratch[0].i;
157 scratch_in[2].i = scratch_in[1].i - scratch[0].r;
159 scratch_in[1].r -= scratch[0].i;
160 scratch_in[1].i += scratch[0].r;
162 Fout[0] = scratch_in[0];
163 Fout[1] = scratch_in[1];
164 Fout[2] = scratch_in[2];
178 NE10_CPX_ADD (scratch[0], scratch_in[0], scratch_in[2]);
179 NE10_CPX_SUB (scratch[1], scratch_in[0], scratch_in[2]);
180 NE10_CPX_ADD (scratch[2], scratch_in[1], scratch_in[3]);
181 NE10_CPX_SUB (scratch[3], scratch_in[1], scratch_in[3]);
183 NE10_CPX_SUB (scratch_out[2], scratch[0], scratch[2]);
184 NE10_CPX_ADD (scratch_out[0], scratch[0], scratch[2]);
186 scratch_out[1].r = scratch[1].r + scratch[3].i;
187 scratch_out[1].i = scratch[1].i - scratch[3].r;
188 scratch_out[3].r = scratch[1].r - scratch[3].i;
189 scratch_out[3].i = scratch[1].i + scratch[3].r;
203 scratch_in[0] = Fin[0];
204 scratch_in[1] = Fin[1];
205 scratch_in[2] = Fin[2];
206 scratch_in[3] = Fin[3];
207 scratch_in[4] = Fin[4];
209 scratch[0] = scratch_in[0];
210 scratch[1] = scratch_in[1];
211 scratch[2] = scratch_in[2];
212 scratch[3] = scratch_in[3];
213 scratch[4] = scratch_in[4];
215 NE10_CPX_ADD (scratch[ 7], scratch[1], scratch[4]);
216 NE10_CPX_SUB (scratch[10], scratch[1], scratch[4]);
217 NE10_CPX_ADD (scratch[ 8], scratch[2], scratch[3]);
218 NE10_CPX_SUB (scratch[ 9], scratch[2], scratch[3]);
220 scratch_in[0].r += scratch[7].r + scratch[8].r;
221 scratch_in[0].i += scratch[7].i + scratch[8].i;
223 scratch[5].r = scratch[0].r
224 + NE10_S_MUL_S32 (scratch[7].r, TW_5A_S32.r)
225 + NE10_S_MUL_S32 (scratch[8].r, TW_5B_S32.r);
226 scratch[5].i = scratch[0].i
227 + NE10_S_MUL_S32 (scratch[7].i, TW_5A_S32.r)
228 + NE10_S_MUL_S32 (scratch[8].i, TW_5B_S32.r);
230 scratch[6].r = NE10_S_MUL_S32 (scratch[10].i, TW_5A_S32.i)
231 + NE10_S_MUL_S32 (scratch[9].i, TW_5B_S32.i);
232 scratch[6].i = -NE10_S_MUL_S32 (scratch[10].r, TW_5A_S32.i)
233 - NE10_S_MUL_S32 (scratch[9].r, TW_5B_S32.i);
235 NE10_CPX_SUB (scratch_in[1], scratch[5], scratch[6]);
236 NE10_CPX_ADD (scratch_in[4], scratch[5], scratch[6]);
238 scratch[11].r = scratch[0].r
239 + NE10_S_MUL_S32 (scratch[7].r, TW_5B_S32.r)
240 + NE10_S_MUL_S32 (scratch[8].r, TW_5A_S32.r);
241 scratch[11].i = scratch[0].i
242 + NE10_S_MUL_S32 (scratch[7].i, TW_5B_S32.r)
243 + NE10_S_MUL_S32 (scratch[8].i, TW_5A_S32.r);
245 scratch[12].r = -NE10_S_MUL_S32 (scratch[10].i, TW_5B_S32.i)
246 + NE10_S_MUL_S32 (scratch[9].i, TW_5A_S32.i);
247 scratch[12].i = NE10_S_MUL_S32 (scratch[10].r, TW_5B_S32.i)
248 - NE10_S_MUL_S32 (scratch[9].r, TW_5A_S32.i);
250 NE10_CPX_ADD (scratch_in[2], scratch[11], scratch[12]);
251 NE10_CPX_SUB (scratch_in[3], scratch[11], scratch[12]);
253 Fout[0] = scratch_in[0];
254 Fout[1] = scratch_in[1];
255 Fout[2] = scratch_in[2];
256 Fout[3] = scratch_in[3];
257 Fout[4] = scratch_in[4];
264inline void NE10_CONJ_S (T &);
269 scalar.i = -scalar.i;
277template<
int RADIX,
class T>
278inline void NE10_CONJ (T in[RADIX])
280 NE10_CONJ<RADIX - 1> (in);
281 NE10_CONJ_S<T> (in[RADIX - 1]);
287 NE10_CONJ_S<ne10_fft_cpx_int32_t> (in[0]);
291inline T NE10_CPX_LOAD_S (
const T *ptr)
297inline void NE10_CPX_STORE_S (T *Fout,
const T in)
309template<
int RADIX,
class T>
310inline void NE10_LOAD_BY_STEP (T out[RADIX],
312 const ne10_int32_t in_step);
315inline void NE10_LOAD_BY_STEP<1, ne10_fft_cpx_int32_t> (
320 out[0] = NE10_CPX_LOAD_S<ne10_fft_cpx_int32_t> (Fin);
323template<
int RADIX,
class T>
324inline void NE10_LOAD_BY_STEP (T out[RADIX],
326 const ne10_int32_t in_step)
328 out[0] = NE10_CPX_LOAD_S<T> (Fin);
329 NE10_LOAD_BY_STEP<RADIX - 1, T> (out + 1, Fin + in_step, in_step);
339template<
int RADIX,
class T>
340inline void NE10_STORE_BY_STEP (T *Fout,
342 const ne10_int32_t out_step)
344 NE10_CPX_STORE_S<T> (Fout, in[0]);
345 NE10_STORE_BY_STEP<RADIX - 1, T> (Fout + out_step, in + 1, out_step);
349inline void NE10_STORE_BY_STEP<1, ne10_fft_cpx_int32_t> (
365 const ne10_int32_t scaling)
367 NE10_F2I32_FIXDIV (out[0], scaling);
368 NE10_SCALED<RADIX - 1> (out + 1, scaling);
373 const ne10_int32_t scaling)
375 NE10_F2I32_FIXDIV (out[0], scaling);
structure for the 32 bits fixed point FFT function.