Changeset 221 for libmpc/branches/r2d/libmpcpsy/fft4g.c
- Timestamp:
- 02/17/07 21:18:35 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
libmpc/branches/r2d/libmpcpsy/fft4g.c
r195 r221 26 26 static void makewt ( const int nw, int* ip, float* w ); 27 27 static void makect ( const int nc, int* ip, float* c ); 28 static mpc_inline void bitrv2 ( const int n, int* ip, float* a ); // 29 static mpc_inline void cftfsub ( const int n, float* a, float* w ); // 30 static mpc_inline void rftfsub ( const int n, float* a, int nc, float* c ); // 31 static mpc_inline void cft1st ( const int n, float* a, float* w ); // 32 static mpc_inline void cftmdl_i386 ( const int n, const int l, float* a, float* w ); // 5648 33 // static mpc_inline void cftmdl_3DNow ( const int n, const int l, float* a, float* w ); // 4954 34 35 #if 0 36 # define cftmdl(n,l,a,w) cftmdl_3DNow ( n, l, a, w ) 37 #else 28 38 29 # define cftmdl(n,l,a,w) cftmdl_i386 ( n, l, a, w ) 39 #endif 30 31 32 /* -------- child routines -------- */ 33 static mpc_inline void 34 bitrv2 ( const int n, int* ip, float* a ) 35 { 36 int j, j1, k, k1, l, m, m2; 37 float xr, xi, yr, yi; 38 39 ip[0] = 0; 40 l = n; 41 m = 1; 42 while ( (m << 3) < l ) { 43 l >>= 1; 44 for ( j = 0; j < m; j++ ) { 45 ip[m + j] = ip[j] + l; 46 } 47 m <<= 1; 48 } 49 m2 = 2 * m; 50 if ( (m << 3) == l ) { 51 for ( k = 0; k < m; k++ ) { 52 for ( j = 0; j < k; j++ ) { 53 j1 = 2 * j + ip[k]; 54 k1 = 2 * k + ip[j]; 55 xr = a[j1]; 56 xi = a[j1 + 1]; 57 yr = a[k1]; 58 yi = a[k1 + 1]; 59 a[j1] = yr; 60 a[j1 + 1] = yi; 61 a[k1] = xr; 62 a[k1 + 1] = xi; 63 j1 += m2; 64 k1 += 2 * m2; 65 xr = a[j1]; 66 xi = a[j1 + 1]; 67 yr = a[k1]; 68 yi = a[k1 + 1]; 69 a[j1] = yr; 70 a[j1 + 1] = yi; 71 a[k1] = xr; 72 a[k1 + 1] = xi; 73 j1 += m2; 74 k1 -= m2; 75 xr = a[j1]; 76 xi = a[j1 + 1]; 77 yr = a[k1]; 78 yi = a[k1 + 1]; 79 a[j1] = yr; 80 a[j1 + 1] = yi; 81 a[k1] = xr; 82 a[k1 + 1] = xi; 83 j1 += m2; 84 k1 += 2 * m2; 85 xr = a[j1]; 86 xi = a[j1 + 1]; 87 yr = a[k1]; 88 yi = a[k1 + 1]; 89 a[j1] = yr; 90 a[j1 + 1] = yi; 91 a[k1] = xr; 92 a[k1 + 1] = xi; 93 } 94 j1 = 2 * k + m2 + ip[k]; 95 k1 = j1 + m2; 96 xr = a[j1]; 97 xi = a[j1 + 1]; 98 yr = a[k1]; 99 yi = a[k1 + 1]; 100 a[j1] = yr; 101 a[j1 + 1] = yi; 102 a[k1] = xr; 103 a[k1 + 1] = xi; 104 } 105 } else { 106 for ( k = 1; k < m; k++ ) { 107 for ( j = 0; j < k; j++ ) { 108 j1 = 2 * j + ip[k]; 109 k1 = 2 * k + ip[j]; 110 xr = a[j1]; 111 xi = a[j1 + 1]; 112 yr = a[k1]; 113 yi = a[k1 + 1]; 114 a[j1] = yr; 115 a[j1 + 1] = yi; 116 a[k1] = xr; 117 a[k1 + 1] = xi; 118 j1 += m2; 119 k1 += m2; 120 xr = a[j1]; 121 xi = a[j1 + 1]; 122 yr = a[k1]; 123 yi = a[k1 + 1]; 124 a[j1] = yr; 125 a[j1 + 1] = yi; 126 a[k1] = xr; 127 a[k1 + 1] = xi; 128 } 129 } 130 } 131 return; 132 } 133 134 135 static mpc_inline void 136 cft1st ( const int n, float* a, float* w ) 137 { 138 int j, k1; 139 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 140 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 141 142 x0r = a[ 0] + a[ 2]; 143 x0i = a[ 1] + a[ 3]; 144 x1r = a[ 0] - a[ 2]; 145 x1i = a[ 1] - a[ 3]; 146 x2r = a[ 4] + a[ 6]; 147 x2i = a[ 5] + a[ 7]; 148 x3r = a[ 4] - a[ 6]; 149 x3i = a[ 5] - a[ 7]; 150 a[ 0] = x0r + x2r; 151 a[ 1] = x0i + x2i; 152 a[ 4] = x0r - x2r; 153 a[ 5] = x0i - x2i; 154 a[ 2] = x1r - x3i; 155 a[ 3] = x1i + x3r; 156 a[ 6] = x1r + x3i; 157 a[ 7] = x1i - x3r; 158 wk1r = w[ 2]; 159 x0r = a[ 8] + a[10]; 160 x0i = a[ 9] + a[11]; 161 x1r = a[ 8] - a[10]; 162 x1i = a[ 9] - a[11]; 163 x2r = a[12] + a[14]; 164 x2i = a[13] + a[15]; 165 x3r = a[12] - a[14]; 166 x3i = a[13] - a[15]; 167 a[ 8] = x0r + x2r; 168 a[ 9] = x0i + x2i; 169 a[12] = x2i - x0i; 170 a[13] = x0r - x2r; 171 x0r = x1r - x3i; 172 x0i = x1i + x3r; 173 a[10] = wk1r * (x0r - x0i); 174 a[11] = wk1r * (x0r + x0i); 175 x0r = x3i + x1r; 176 x0i = x3r - x1i; 177 a[14] = wk1r * (x0i - x0r); 178 a[15] = wk1r * (x0i + x0r); 179 180 k1 = 0; 181 j = 16; 182 do { 183 k1 += 2; 184 wk2r = w[k1]; 185 wk2i = w[k1 + 1]; 186 wk1r = w[2*k1]; 187 wk1i = w[2*k1 + 1]; 188 wk3r = wk1r - 2 * wk2i * wk1i; 189 wk3i = 2 * wk2i * wk1r - wk1i; 190 x0r = a[j] + a[j + 2]; 191 x0i = a[j + 1] + a[j + 3]; 192 x1r = a[j] - a[j + 2]; 193 x1i = a[j + 1] - a[j + 3]; 194 x2r = a[j + 4] + a[j + 6]; 195 x2i = a[j + 5] + a[j + 7]; 196 x3r = a[j + 4] - a[j + 6]; 197 x3i = a[j + 5] - a[j + 7]; 198 a[j] = x0r + x2r; 199 a[j + 1] = x0i + x2i; 200 x0r -= x2r; 201 x0i -= x2i; 202 a[j + 4] = wk2r * x0r - wk2i * x0i; 203 a[j + 5] = wk2r * x0i + wk2i * x0r; 204 x0r = x1r - x3i; 205 x0i = x1i + x3r; 206 a[j + 2] = wk1r * x0r - wk1i * x0i; 207 a[j + 3] = wk1r * x0i + wk1i * x0r; 208 x0r = x1r + x3i; 209 x0i = x1i - x3r; 210 a[j + 6] = wk3r * x0r - wk3i * x0i; 211 a[j + 7] = wk3r * x0i + wk3i * x0r; 212 wk1r = w[2*k1 + 2]; 213 wk1i = w[2*k1 + 3]; 214 wk3r = wk1r - 2 * wk2r * wk1i; 215 wk3i = 2 * wk2r * wk1r - wk1i; 216 x0r = a[j + 8] + a[j + 10]; 217 x0i = a[j + 9] + a[j + 11]; 218 x1r = a[j + 8] - a[j + 10]; 219 x1i = a[j + 9] - a[j + 11]; 220 x2r = a[j + 12] + a[j + 14]; 221 x2i = a[j + 13] + a[j + 15]; 222 x3r = a[j + 12] - a[j + 14]; 223 x3i = a[j + 13] - a[j + 15]; 224 a[j + 8] = x0r + x2r; 225 a[j + 9] = x0i + x2i; 226 x0r -= x2r; 227 x0i -= x2i; 228 a[j + 12] = -wk2i * x0r - wk2r * x0i; 229 a[j + 13] = -wk2i * x0i + wk2r * x0r; 230 x0r = x1r - x3i; 231 x0i = x1i + x3r; 232 a[j + 10] = wk1r * x0r - wk1i * x0i; 233 a[j + 11] = wk1r * x0i + wk1i * x0r; 234 x0r = x1r + x3i; 235 x0i = x1i - x3r; 236 a[j + 14] = wk3r * x0r - wk3i * x0i; 237 a[j + 15] = wk3r * x0i + wk3i * x0r; 238 } while ( j += 16, j < n ); 239 return; 240 } 241 242 static mpc_inline void 243 cftmdl_i386 ( const int n, const int l, float* a, float* w ) 244 { 245 int j, j1, j2, j3, k, k1, m, m2; 246 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 247 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 248 249 m = l << 2; 250 251 for ( j = 0; j < l; j += 2 ) { 252 j1 = j + l; 253 j2 = j1 + l; 254 j3 = j2 + l; 255 x0r = a[j] + a[j1]; 256 x0i = a[j + 1] + a[j1 + 1]; 257 x1r = a[j] - a[j1]; 258 x1i = a[j + 1] - a[j1 + 1]; 259 x2r = a[j2] + a[j3]; 260 x2i = a[j2 + 1] + a[j3 + 1]; 261 x3r = a[j2] - a[j3]; 262 x3i = a[j2 + 1] - a[j3 + 1]; 263 a[j] = x0r + x2r; 264 a[j + 1] = x0i + x2i; 265 a[j2] = x0r - x2r; 266 a[j2 + 1] = x0i - x2i; 267 a[j1] = x1r - x3i; 268 a[j1 + 1] = x1i + x3r; 269 a[j3] = x1r + x3i; 270 a[j3 + 1] = x1i - x3r; 271 } 272 273 wk1r = w[2]; 274 for ( j = m; j < l + m; j += 2 ) { 275 j1 = j + l; 276 j2 = j1 + l; 277 j3 = j2 + l; 278 x0r = a[j] + a[j1]; 279 x0i = a[j + 1] + a[j1 + 1]; 280 x1r = a[j] - a[j1]; 281 x1i = a[j + 1] - a[j1 + 1]; 282 x2r = a[j2] + a[j3]; 283 x2i = a[j2 + 1] + a[j3 + 1]; 284 x3r = a[j2] - a[j3]; 285 x3i = a[j2 + 1] - a[j3 + 1]; 286 a[j] = x0r + x2r; 287 a[j + 1] = x0i + x2i; 288 a[j2] = x2i - x0i; 289 a[j2 + 1] = x0r - x2r; 290 x0r = x1r - x3i; 291 x0i = x1i + x3r; 292 a[j1] = wk1r * (x0r - x0i); 293 a[j1 + 1] = wk1r * (x0r + x0i); 294 x0r = x3i + x1r; 295 x0i = x3r - x1i; 296 a[j3] = wk1r * (x0i - x0r); 297 a[j3 + 1] = wk1r * (x0i + x0r); 298 } 299 300 k1 = 0; 301 m2 = 2 * m; 302 for ( k = m2; k < n; k += m2 ) { 303 k1 += 2; 304 wk2r = w[k1]; 305 wk2i = w[k1 + 1]; 306 wk1r = w[2*k1]; 307 wk1i = w[2*k1 + 1]; 308 wk3r = wk1r - 2 * wk2i * wk1i; 309 wk3i = 2 * wk2i * wk1r - wk1i; 310 j = k; 311 do { 312 j1 = j + l; 313 j2 = j1 + l; 314 j3 = j2 + l; 315 x0r = a[j] + a[j1]; 316 x0i = a[j + 1] + a[j1 + 1]; 317 x1r = a[j] - a[j1]; 318 x1i = a[j + 1] - a[j1 + 1]; 319 x2r = a[j2] + a[j3]; 320 x2i = a[j2 + 1] + a[j3 + 1]; 321 x3r = a[j2] - a[j3]; 322 x3i = a[j2 + 1] - a[j3 + 1]; 323 a[j] = x0r + x2r; 324 a[j + 1] = x0i + x2i; 325 x0r -= x2r; 326 x0i -= x2i; 327 a[j2] = wk2r * x0r - wk2i * x0i; 328 a[j2 + 1] = wk2r * x0i + wk2i * x0r; 329 x0r = x1r - x3i; 330 x0i = x1i + x3r; 331 a[j1] = wk1r * x0r - wk1i * x0i; 332 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 333 x0r = x1r + x3i; 334 x0i = x1i - x3r; 335 a[j3] = wk3r * x0r - wk3i * x0i; 336 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 337 } while ( j += 2, j < l + k ); 338 339 wk1r = w[2*k1 + 2]; 340 wk1i = w[2*k1 + 3]; 341 wk3r = wk1r - 2 * wk2r * wk1i; 342 wk3i = 2 * wk2r * wk1r - wk1i; 343 j = k + m; 344 do { 345 j1 = j + l; 346 j2 = j1 + l; 347 j3 = j2 + l; 348 x0r = a[j] + a[j1]; 349 x0i = a[j + 1] + a[j1 + 1]; 350 x1r = a[j] - a[j1]; 351 x1i = a[j + 1] - a[j1 + 1]; 352 x2r = a[j2] + a[j3]; 353 x2i = a[j2 + 1] + a[j3 + 1]; 354 x3r = a[j2] - a[j3]; 355 x3i = a[j2 + 1] - a[j3 + 1]; 356 a[j] = x0r + x2r; 357 a[j + 1] = x0i + x2i; 358 x0r -= x2r; 359 x0i -= x2i; 360 a[j2] = -wk2i * x0r - wk2r * x0i; 361 a[j2 + 1] = -wk2i * x0i + wk2r * x0r; 362 x0r = x1r - x3i; 363 x0i = x1i + x3r; 364 a[j1] = wk1r * x0r - wk1i * x0i; 365 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 366 x0r = x1r + x3i; 367 x0i = x1i - x3r; 368 a[j3] = wk3r * x0r - wk3i * x0i; 369 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 370 } while ( j += 2, j < l+k+m ); 371 } 372 return; 373 } 374 375 static mpc_inline void 376 cftfsub ( const int n, float* a, float* w ) 377 { 378 int j, j1, j2, j3, l; 379 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 380 381 l = 2; 382 if ( n > 8 ) { 383 cft1st ( n, a, w ); 384 l = 8; 385 while ( (l << 2) < n ) { 386 cftmdl ( n, l, a, w ); 387 l <<= 2; 388 } 389 } 390 if ( (l << 2) == n ) { 391 j = 0; 392 do { 393 j1 = j + l; 394 j2 = j1 + l; 395 j3 = j2 + l; 396 x0r = a[j] + a[j1]; 397 x0i = a[j + 1] + a[j1 + 1]; 398 x1r = a[j] - a[j1]; 399 x1i = a[j + 1] - a[j1 + 1]; 400 x2r = a[j2] + a[j3]; 401 x2i = a[j2 + 1] + a[j3 + 1]; 402 x3r = a[j2] - a[j3]; 403 x3i = a[j2 + 1] - a[j3 + 1]; 404 a[j] = x0r + x2r; 405 a[j + 1] = x0i + x2i; 406 a[j2] = x0r - x2r; 407 a[j2 + 1] = x0i - x2i; 408 a[j1] = x1r - x3i; 409 a[j1 + 1] = x1i + x3r; 410 a[j3] = x1r + x3i; 411 a[j3 + 1] = x1i - x3r; 412 } while ( j += 2, j < l ); 413 } else { 414 j = 0; 415 do { 416 j1 = j + l; 417 x0r = a[j] - a[j1]; 418 x0i = a[j + 1] - a[j1 + 1]; 419 a[j] += a[j1]; 420 a[j + 1] += a[j1 + 1]; 421 a[j1] = x0r; 422 a[j1 + 1] = x0i; 423 } while ( j += 2, j < l ); 424 } 425 return; 426 } 427 428 429 static mpc_inline void 430 rftfsub ( const int n, float* a, int nc, float* c ) 431 { 432 int j, k, kk, ks, m; 433 float wkr, wki, xr, xi, yr, yi; 434 435 m = n >> 1; 436 ks = 2 * nc / m; 437 kk = ks; 438 j = 2; 439 k = n; 440 do { 441 k -= 2; 442 nc -= ks; 443 wkr = 0.5f - c[nc]; 444 wki = c[kk]; 445 xr = a[j] - a[k]; 446 xi = a[j + 1] + a[k + 1]; 447 yr = wkr * xr - wki * xi; 448 yi = wkr * xi + wki * xr; 449 a[j] -= yr; 450 a[j + 1] -= yi; 451 a[k] += yr; 452 a[k + 1] -= yi; 453 kk += ks; 454 } while ( j += 2, j < m ); 455 return; 456 } 40 457 41 458 // generates lookup-tables … … 131 548 } 132 549 133 134 /* -------- child routines -------- */135 static void136 bitrv2 ( const int n, int* ip, float* a )137 {138 int j, j1, k, k1, l, m, m2;139 float xr, xi, yr, yi;140 141 ip[0] = 0;142 l = n;143 m = 1;144 while ( (m << 3) < l ) {145 l >>= 1;146 for ( j = 0; j < m; j++ ) {147 ip[m + j] = ip[j] + l;148 }149 m <<= 1;150 }151 m2 = 2 * m;152 if ( (m << 3) == l ) {153 for ( k = 0; k < m; k++ ) {154 for ( j = 0; j < k; j++ ) {155 j1 = 2 * j + ip[k];156 k1 = 2 * k + ip[j];157 xr = a[j1];158 xi = a[j1 + 1];159 yr = a[k1];160 yi = a[k1 + 1];161 a[j1] = yr;162 a[j1 + 1] = yi;163 a[k1] = xr;164 a[k1 + 1] = xi;165 j1 += m2;166 k1 += 2 * m2;167 xr = a[j1];168 xi = a[j1 + 1];169 yr = a[k1];170 yi = a[k1 + 1];171 a[j1] = yr;172 a[j1 + 1] = yi;173 a[k1] = xr;174 a[k1 + 1] = xi;175 j1 += m2;176 k1 -= m2;177 xr = a[j1];178 xi = a[j1 + 1];179 yr = a[k1];180 yi = a[k1 + 1];181 a[j1] = yr;182 a[j1 + 1] = yi;183 a[k1] = xr;184 a[k1 + 1] = xi;185 j1 += m2;186 k1 += 2 * m2;187 xr = a[j1];188 xi = a[j1 + 1];189 yr = a[k1];190 yi = a[k1 + 1];191 a[j1] = yr;192 a[j1 + 1] = yi;193 a[k1] = xr;194 a[k1 + 1] = xi;195 }196 j1 = 2 * k + m2 + ip[k];197 k1 = j1 + m2;198 xr = a[j1];199 xi = a[j1 + 1];200 yr = a[k1];201 yi = a[k1 + 1];202 a[j1] = yr;203 a[j1 + 1] = yi;204 a[k1] = xr;205 a[k1 + 1] = xi;206 }207 } else {208 for ( k = 1; k < m; k++ ) {209 for ( j = 0; j < k; j++ ) {210 j1 = 2 * j + ip[k];211 k1 = 2 * k + ip[j];212 xr = a[j1];213 xi = a[j1 + 1];214 yr = a[k1];215 yi = a[k1 + 1];216 a[j1] = yr;217 a[j1 + 1] = yi;218 a[k1] = xr;219 a[k1 + 1] = xi;220 j1 += m2;221 k1 += m2;222 xr = a[j1];223 xi = a[j1 + 1];224 yr = a[k1];225 yi = a[k1 + 1];226 a[j1] = yr;227 a[j1 + 1] = yi;228 a[k1] = xr;229 a[k1 + 1] = xi;230 }231 }232 }233 return;234 }235 236 237 static void238 cftfsub ( const int n, float* a, float* w )239 {240 int j, j1, j2, j3, l;241 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;242 243 l = 2;244 if ( n > 8 ) {245 cft1st ( n, a, w );246 l = 8;247 while ( (l << 2) < n ) {248 cftmdl ( n, l, a, w );249 l <<= 2;250 }251 }252 if ( (l << 2) == n ) {253 j = 0;254 do {255 j1 = j + l;256 j2 = j1 + l;257 j3 = j2 + l;258 x0r = a[j] + a[j1];259 x0i = a[j + 1] + a[j1 + 1];260 x1r = a[j] - a[j1];261 x1i = a[j + 1] - a[j1 + 1];262 x2r = a[j2] + a[j3];263 x2i = a[j2 + 1] + a[j3 + 1];264 x3r = a[j2] - a[j3];265 x3i = a[j2 + 1] - a[j3 + 1];266 a[j] = x0r + x2r;267 a[j + 1] = x0i + x2i;268 a[j2] = x0r - x2r;269 a[j2 + 1] = x0i - x2i;270 a[j1] = x1r - x3i;271 a[j1 + 1] = x1i + x3r;272 a[j3] = x1r + x3i;273 a[j3 + 1] = x1i - x3r;274 } while ( j += 2, j < l );275 } else {276 j = 0;277 do {278 j1 = j + l;279 x0r = a[j] - a[j1];280 x0i = a[j + 1] - a[j1 + 1];281 a[j] += a[j1];282 a[j + 1] += a[j1 + 1];283 a[j1] = x0r;284 a[j1 + 1] = x0i;285 } while ( j += 2, j < l );286 }287 return;288 }289 290 291 static void292 cft1st ( const int n, float* a, float* w )293 {294 int j, k1;295 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;296 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;297 298 x0r = a[ 0] + a[ 2];299 x0i = a[ 1] + a[ 3];300 x1r = a[ 0] - a[ 2];301 x1i = a[ 1] - a[ 3];302 x2r = a[ 4] + a[ 6];303 x2i = a[ 5] + a[ 7];304 x3r = a[ 4] - a[ 6];305 x3i = a[ 5] - a[ 7];306 a[ 0] = x0r + x2r;307 a[ 1] = x0i + x2i;308 a[ 4] = x0r - x2r;309 a[ 5] = x0i - x2i;310 a[ 2] = x1r - x3i;311 a[ 3] = x1i + x3r;312 a[ 6] = x1r + x3i;313 a[ 7] = x1i - x3r;314 wk1r = w[ 2];315 x0r = a[ 8] + a[10];316 x0i = a[ 9] + a[11];317 x1r = a[ 8] - a[10];318 x1i = a[ 9] - a[11];319 x2r = a[12] + a[14];320 x2i = a[13] + a[15];321 x3r = a[12] - a[14];322 x3i = a[13] - a[15];323 a[ 8] = x0r + x2r;324 a[ 9] = x0i + x2i;325 a[12] = x2i - x0i;326 a[13] = x0r - x2r;327 x0r = x1r - x3i;328 x0i = x1i + x3r;329 a[10] = wk1r * (x0r - x0i);330 a[11] = wk1r * (x0r + x0i);331 x0r = x3i + x1r;332 x0i = x3r - x1i;333 a[14] = wk1r * (x0i - x0r);334 a[15] = wk1r * (x0i + x0r);335 336 k1 = 0;337 j = 16;338 do {339 k1 += 2;340 wk2r = w[k1];341 wk2i = w[k1 + 1];342 wk1r = w[2*k1];343 wk1i = w[2*k1 + 1];344 wk3r = wk1r - 2 * wk2i * wk1i;345 wk3i = 2 * wk2i * wk1r - wk1i;346 x0r = a[j] + a[j + 2];347 x0i = a[j + 1] + a[j + 3];348 x1r = a[j] - a[j + 2];349 x1i = a[j + 1] - a[j + 3];350 x2r = a[j + 4] + a[j + 6];351 x2i = a[j + 5] + a[j + 7];352 x3r = a[j + 4] - a[j + 6];353 x3i = a[j + 5] - a[j + 7];354 a[j] = x0r + x2r;355 a[j + 1] = x0i + x2i;356 x0r -= x2r;357 x0i -= x2i;358 a[j + 4] = wk2r * x0r - wk2i * x0i;359 a[j + 5] = wk2r * x0i + wk2i * x0r;360 x0r = x1r - x3i;361 x0i = x1i + x3r;362 a[j + 2] = wk1r * x0r - wk1i * x0i;363 a[j + 3] = wk1r * x0i + wk1i * x0r;364 x0r = x1r + x3i;365 x0i = x1i - x3r;366 a[j + 6] = wk3r * x0r - wk3i * x0i;367 a[j + 7] = wk3r * x0i + wk3i * x0r;368 wk1r = w[2*k1 + 2];369 wk1i = w[2*k1 + 3];370 wk3r = wk1r - 2 * wk2r * wk1i;371 wk3i = 2 * wk2r * wk1r - wk1i;372 x0r = a[j + 8] + a[j + 10];373 x0i = a[j + 9] + a[j + 11];374 x1r = a[j + 8] - a[j + 10];375 x1i = a[j + 9] - a[j + 11];376 x2r = a[j + 12] + a[j + 14];377 x2i = a[j + 13] + a[j + 15];378 x3r = a[j + 12] - a[j + 14];379 x3i = a[j + 13] - a[j + 15];380 a[j + 8] = x0r + x2r;381 a[j + 9] = x0i + x2i;382 x0r -= x2r;383 x0i -= x2i;384 a[j + 12] = -wk2i * x0r - wk2r * x0i;385 a[j + 13] = -wk2i * x0i + wk2r * x0r;386 x0r = x1r - x3i;387 x0i = x1i + x3r;388 a[j + 10] = wk1r * x0r - wk1i * x0i;389 a[j + 11] = wk1r * x0i + wk1i * x0r;390 x0r = x1r + x3i;391 x0i = x1i - x3r;392 a[j + 14] = wk3r * x0r - wk3i * x0i;393 a[j + 15] = wk3r * x0i + wk3i * x0r;394 } while ( j += 16, j < n );395 return;396 }397 398 // extern void mpc_cdecl cftmdl_3DNow_1 ( const int n, const int l, float* a, float* w );399 // extern void mpc_cdecl cftmdl_3DNow_2 ( const int n, const int l, float* a, float* w );400 401 402 static void403 cftmdl_i386 ( const int n, const int l, float* a, float* w )404 {405 int j, j1, j2, j3, k, k1, m, m2;406 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;407 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;408 409 m = l << 2;410 411 for ( j = 0; j < l; j += 2 ) {412 j1 = j + l;413 j2 = j1 + l;414 j3 = j2 + l;415 x0r = a[j] + a[j1];416 x0i = a[j + 1] + a[j1 + 1];417 x1r = a[j] - a[j1];418 x1i = a[j + 1] - a[j1 + 1];419 x2r = a[j2] + a[j3];420 x2i = a[j2 + 1] + a[j3 + 1];421 x3r = a[j2] - a[j3];422 x3i = a[j2 + 1] - a[j3 + 1];423 a[j] = x0r + x2r;424 a[j + 1] = x0i + x2i;425 a[j2] = x0r - x2r;426 a[j2 + 1] = x0i - x2i;427 a[j1] = x1r - x3i;428 a[j1 + 1] = x1i + x3r;429 a[j3] = x1r + x3i;430 a[j3 + 1] = x1i - x3r;431 }432 433 wk1r = w[2];434 for ( j = m; j < l + m; j += 2 ) {435 j1 = j + l;436 j2 = j1 + l;437 j3 = j2 + l;438 x0r = a[j] + a[j1];439 x0i = a[j + 1] + a[j1 + 1];440 x1r = a[j] - a[j1];441 x1i = a[j + 1] - a[j1 + 1];442 x2r = a[j2] + a[j3];443 x2i = a[j2 + 1] + a[j3 + 1];444 x3r = a[j2] - a[j3];445 x3i = a[j2 + 1] - a[j3 + 1];446 a[j] = x0r + x2r;447 a[j + 1] = x0i + x2i;448 a[j2] = x2i - x0i;449 a[j2 + 1] = x0r - x2r;450 x0r = x1r - x3i;451 x0i = x1i + x3r;452 a[j1] = wk1r * (x0r - x0i);453 a[j1 + 1] = wk1r * (x0r + x0i);454 x0r = x3i + x1r;455 x0i = x3r - x1i;456 a[j3] = wk1r * (x0i - x0r);457 a[j3 + 1] = wk1r * (x0i + x0r);458 }459 460 k1 = 0;461 m2 = 2 * m;462 for ( k = m2; k < n; k += m2 ) {463 k1 += 2;464 wk2r = w[k1];465 wk2i = w[k1 + 1];466 wk1r = w[2*k1];467 wk1i = w[2*k1 + 1];468 wk3r = wk1r - 2 * wk2i * wk1i;469 wk3i = 2 * wk2i * wk1r - wk1i;470 j = k;471 do {472 j1 = j + l;473 j2 = j1 + l;474 j3 = j2 + l;475 x0r = a[j] + a[j1];476 x0i = a[j + 1] + a[j1 + 1];477 x1r = a[j] - a[j1];478 x1i = a[j + 1] - a[j1 + 1];479 x2r = a[j2] + a[j3];480 x2i = a[j2 + 1] + a[j3 + 1];481 x3r = a[j2] - a[j3];482 x3i = a[j2 + 1] - a[j3 + 1];483 a[j] = x0r + x2r;484 a[j + 1] = x0i + x2i;485 x0r -= x2r;486 x0i -= x2i;487 a[j2] = wk2r * x0r - wk2i * x0i;488 a[j2 + 1] = wk2r * x0i + wk2i * x0r;489 x0r = x1r - x3i;490 x0i = x1i + x3r;491 a[j1] = wk1r * x0r - wk1i * x0i;492 a[j1 + 1] = wk1r * x0i + wk1i * x0r;493 x0r = x1r + x3i;494 x0i = x1i - x3r;495 a[j3] = wk3r * x0r - wk3i * x0i;496 a[j3 + 1] = wk3r * x0i + wk3i * x0r;497 } while ( j += 2, j < l + k );498 499 wk1r = w[2*k1 + 2];500 wk1i = w[2*k1 + 3];501 wk3r = wk1r - 2 * wk2r * wk1i;502 wk3i = 2 * wk2r * wk1r - wk1i;503 j = k + m;504 do {505 j1 = j + l;506 j2 = j1 + l;507 j3 = j2 + l;508 x0r = a[j] + a[j1];509 x0i = a[j + 1] + a[j1 + 1];510 x1r = a[j] - a[j1];511 x1i = a[j + 1] - a[j1 + 1];512 x2r = a[j2] + a[j3];513 x2i = a[j2 + 1] + a[j3 + 1];514 x3r = a[j2] - a[j3];515 x3i = a[j2 + 1] - a[j3 + 1];516 a[j] = x0r + x2r;517 a[j + 1] = x0i + x2i;518 x0r -= x2r;519 x0i -= x2i;520 a[j2] = -wk2i * x0r - wk2r * x0i;521 a[j2 + 1] = -wk2i * x0i + wk2r * x0r;522 x0r = x1r - x3i;523 x0i = x1i + x3r;524 a[j1] = wk1r * x0r - wk1i * x0i;525 a[j1 + 1] = wk1r * x0i + wk1i * x0r;526 x0r = x1r + x3i;527 x0i = x1i - x3r;528 a[j3] = wk3r * x0r - wk3i * x0i;529 a[j3 + 1] = wk3r * x0i + wk3i * x0r;530 } while ( j += 2, j < l+k+m );531 }532 return;533 }534 535 536 // static void537 // cftmdl_3DNow ( const int n, const int l, float* a, float* w )538 // {539 // int j, j1, j2, j3, k, k1, m, m2;540 // float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;541 // float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;542 //543 // cftmdl_3DNow_1 (n,l,a,w);544 //545 // m = l << 2;546 // k1 = 0;547 // m2 = 2 * m;548 // for ( k = m2; k < n; k += m2 ) {549 // k1 += 2;550 // wk2r = w[k1];551 // wk2i = w[k1 + 1];552 // wk1r = w[2*k1];553 // wk1i = w[2*k1 + 1];554 // wk3r = wk1r - 2 * wk2i * wk1i;555 // wk3i = 2 * wk2i * wk1r - wk1i;556 // j = k;557 // do {558 // j1 = j + l;559 // j2 = j1 + l;560 // j3 = j2 + l;561 // x0r = a[j] + a[j1];562 // x0i = a[j + 1] + a[j1 + 1];563 // x1r = a[j] - a[j1];564 // x1i = a[j + 1] - a[j1 + 1];565 // x2r = a[j2] + a[j3];566 // x2i = a[j2 + 1] + a[j3 + 1];567 // x3r = a[j2] - a[j3];568 // x3i = a[j2 + 1] - a[j3 + 1];569 // a[j] = x0r + x2r;570 // a[j + 1] = x0i + x2i;571 // x0r -= x2r;572 // x0i -= x2i;573 // a[j2] = wk2r * x0r - wk2i * x0i;574 // a[j2 + 1] = wk2r * x0i + wk2i * x0r;575 // x0r = x1r - x3i;576 // x0i = x1i + x3r;577 // a[j1] = wk1r * x0r - wk1i * x0i;578 // a[j1 + 1] = wk1r * x0i + wk1i * x0r;579 // x0r = x1r + x3i;580 // x0i = x1i - x3r;581 // a[j3] = wk3r * x0r - wk3i * x0i;582 // a[j3 + 1] = wk3r * x0i + wk3i * x0r;583 // } while ( j += 2, j < l + k );584 //585 // wk1r = w[2*k1 + 2];586 // wk1i = w[2*k1 + 3];587 // wk3r = wk1r - 2 * wk2r * wk1i;588 // wk3i = 2 * wk2r * wk1r - wk1i;589 // j = k + m;590 // do {591 // j1 = j + l;592 // j2 = j1 + l;593 // j3 = j2 + l;594 // x0r = a[j] + a[j1];595 // x0i = a[j + 1] + a[j1 + 1];596 // x1r = a[j] - a[j1];597 // x1i = a[j + 1] - a[j1 + 1];598 // x2r = a[j2] + a[j3];599 // x2i = a[j2 + 1] + a[j3 + 1];600 // x3r = a[j2] - a[j3];601 // x3i = a[j2 + 1] - a[j3 + 1];602 // a[j] = x0r + x2r;603 // a[j + 1] = x0i + x2i;604 // x0r -= x2r;605 // x0i -= x2i;606 // a[j2] = -wk2i * x0r - wk2r * x0i;607 // a[j2 + 1] = -wk2i * x0i + wk2r * x0r;608 // x0r = x1r - x3i;609 // x0i = x1i + x3r;610 // a[j1] = wk1r * x0r - wk1i * x0i;611 // a[j1 + 1] = wk1r * x0i + wk1i * x0r;612 // x0r = x1r + x3i;613 // x0i = x1i - x3r;614 // a[j3] = wk3r * x0r - wk3i * x0i;615 // a[j3 + 1] = wk3r * x0i + wk3i * x0r;616 // } while ( j += 2, j < l+k+m );617 // }618 // return;619 // }620 621 622 static void623 rftfsub ( const int n, float* a, int nc, float* c )624 {625 int j, k, kk, ks, m;626 float wkr, wki, xr, xi, yr, yi;627 628 m = n >> 1;629 ks = 2 * nc / m;630 kk = ks;631 j = 2;632 k = n;633 do {634 k -= 2;635 nc -= ks;636 wkr = 0.5f - c[nc];637 wki = c[kk];638 xr = a[j] - a[k];639 xi = a[j + 1] + a[k + 1];640 yr = wkr * xr - wki * xi;641 yi = wkr * xi + wki * xr;642 a[j] -= yr;643 a[j + 1] -= yi;644 a[k] += yr;645 a[k + 1] -= yi;646 kk += ks;647 } while ( j += 2, j < m );648 return;649 }650 651 550 /* end of fft4g.c */
Note: See TracChangeset
for help on using the changeset viewer.