- 论坛徽章:
- 24
|
本帖最后由 zhujiang73 于 2011-01-13 12:42 编辑
搞了大约两周,其间大部分时间用在补习有关理论上了,上学时学的高数忘了不少了。
先把这个算法:
是这样的? google 到这个:
zhujiang73 发表于 2010-12-29 10:42
改一下:- for(int i = 0; i < mLen / 2; i++)
- {
- float dAngle = -2 * FPI * i / mLen;
- W[i].real = (float)cos(dAngle);
- W[i].image = (float)sin(dAngle);
- }
- reverse(a,b,mLen,M);
复制代码- void reverse(int *a, int *b, int len, int M)
- {
- int i,j;
- for(i=0; i<M; i++)
- {
- a[i] = 0;
- }
- b[0] = 0;
- for(i=1; i<len; i++)
- {
- j = 0;
- while(a[j] != 0)
- {
- a[j] = 0;
- j++;
- }
- a[j] = 1;
- b[i] = 0;
- for(j=0; j<M; j++)
- {
- b[i] = b[i]+a[j]*(int)pow(2,M-1-j);
- }
- }
- }
复制代码- void fft(zcomplex *A, zcomplex *W, int fft_nLen, int fft_M)
- {
- int p,dist;
- zcomplex C,B;
- for(int lev=1; lev<=fft_M; lev++)
- {
- dist = 1<<(lev-1);
- for(int t=0; t<dist; t++)
- {
- p = t*(1<<(fft_M-lev));
- for(int i=t; i<fft_nLen; i+=1<<lev)
- {
- B = Mul(A[i+dist],W[p]);
- C = Add(A[i],B);
- A[i+dist] = Sub(A[i],B);
- A[i].real = C.real;
- A[i].image = C.image;
- }
- }
- }
- }
复制代码 然后再用 OpenCL 并行化,同时计算多行的 FFT,计算结果转置,再算一遍再转置回来就行了:- __kernel void fft4_nx( __global float4 *A, __global const float2 *W, const int nlen, const int fft_M)
- {
- float4 f4_b;
- int i;
- int lev,dist,p,t;
-
- float2 w_l;
- float4 f4_tmp;
- float4 f4_tmp01;
- float4 f4_tmp02;
-
- float4 a4,b4;
-
- int y = get_global_id(0);
- for(lev=1; lev<=fft_M; lev++)
- {
- dist = 1<<(lev-1);
- for(t=0; t<dist; t++)
- {
- p = t*(1<<(fft_M-lev));
- for(i=t; i<nlen; i+=1<<lev)
- {
- w_l = W[p];
- b4 = A[y*nlen+i+dist];
- f4_b.x = b4.x*w_l.x - b4.y*w_l.y;
- f4_b.y = b4.x*w_l.y + b4.y*w_l.x;
- f4_b.z = b4.z*W[p].x - b4.w*W[p].y;
- f4_b.w = b4.z*W[p].y + b4.w*W[p].x;
-
- a4 = A[y*nlen+i];
-
- A[y*nlen+i+dist] = a4 - f4_b;
-
- A[y*nlen+i] = a4 + f4_b;
- }
- }
- }
- }
复制代码 |
|