oct-fftw.cc

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2001-2012 John W. Eaton
00004 
00005 This file is part of Octave.
00006 
00007 Octave is free software; you can redistribute it and/or modify it
00008 under the terms of the GNU General Public License as published by the
00009 Free Software Foundation; either version 3 of the License, or (at your
00010 option) any later version.
00011 
00012 Octave is distributed in the hope that it will be useful, but WITHOUT
00013 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00014 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00015 for more details.
00016 
00017 You should have received a copy of the GNU General Public License
00018 along with Octave; see the file COPYING.  If not, see
00019 <http://www.gnu.org/licenses/>.
00020 
00021 */
00022 
00023 #ifdef HAVE_CONFIG_H
00024 #include <config.h>
00025 #endif
00026 
00027 #if defined (HAVE_FFTW)
00028 
00029 #include <iostream>
00030 #include <vector>
00031 
00032 #include "lo-error.h"
00033 #include "oct-fftw.h"
00034 #include "quit.h"
00035 #include "oct-locbuf.h"
00036 #include "singleton-cleanup.h"
00037 
00038 octave_fftw_planner *octave_fftw_planner::instance = 0;
00039 
00040 // Helper class to create and cache FFTW plans for both 1D and
00041 // 2D. This implementation defaults to using FFTW_ESTIMATE to create
00042 // the plans, which in theory is suboptimal, but provides quite
00043 // reasonable performance in practice.
00044 
00045 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
00046 // will destroy the input and output arrays. We must, therefore, create a
00047 // temporary input array with the same size and 16-byte alignment as
00048 // the original array when using a different planner strategy.
00049 // Note that we also use any wisdom that is available, either in a
00050 // FFTW3 system wide file or as supplied by the user.
00051 
00052 // FIXME -- if we can ensure 16 byte alignment in Array<T>
00053 // (<T> *data) the FFTW3 can use SIMD instructions for further
00054 // acceleration.
00055 
00056 // Note that it is profitable to store the FFTW3 plans, for small FFTs.
00057 
00058 octave_fftw_planner::octave_fftw_planner (void)
00059   : meth (ESTIMATE), rplan (0), rd (0), rs (0), rr (0), rh (0), rn (),
00060     rsimd_align (false)
00061 {
00062   plan[0] = plan[1] = 0;
00063   d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
00064   simd_align[0] = simd_align[1] = false;
00065   inplace[0] = inplace[1] = false;
00066   n[0] = n[1] = dim_vector ();
00067 
00068   // If we have a system wide wisdom file, import it.
00069   fftw_import_system_wisdom ();
00070 }
00071 
00072 octave_fftw_planner::~octave_fftw_planner (void)
00073 {
00074   fftw_plan *plan_p;
00075 
00076   plan_p = &rplan;
00077   if (*plan_p)
00078     fftw_destroy_plan (*plan_p);
00079 
00080   plan_p = &plan[0];
00081   if (*plan_p)
00082     fftw_destroy_plan (*plan_p);
00083 
00084   plan_p = &plan[1];
00085   if (*plan_p)
00086     fftw_destroy_plan (*plan_p);
00087 }
00088 
00089 bool
00090 octave_fftw_planner::instance_ok (void)
00091 {
00092   bool retval = true;
00093 
00094   if (! instance)
00095     {
00096       instance = new octave_fftw_planner ();
00097 
00098       if (instance)
00099         singleton_cleanup_list::add (cleanup_instance);
00100     }
00101 
00102   if (! instance)
00103     {
00104       (*current_liboctave_error_handler)
00105         ("unable to create octave_fftw_planner object!");
00106 
00107       retval = false;
00108     }
00109 
00110   return retval;
00111 }
00112 
00113 #define CHECK_SIMD_ALIGNMENT(x) \
00114   (((reinterpret_cast<ptrdiff_t> (x)) & 0xF) == 0)
00115 
00116 fftw_plan
00117 octave_fftw_planner::do_create_plan (int dir, const int rank,
00118                                      const dim_vector dims,
00119                                      octave_idx_type howmany,
00120                                      octave_idx_type stride,
00121                                      octave_idx_type dist,
00122                                      const Complex *in, Complex *out)
00123 {
00124   int which = (dir == FFTW_FORWARD) ? 0 : 1;
00125   fftw_plan *cur_plan_p = &plan[which];
00126   bool create_new_plan = false;
00127   bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
00128   bool ioinplace = (in == out);
00129 
00130   // Don't create a new plan if we have a non SIMD plan already but
00131   // can do SIMD.  This prevents endlessly recreating plans if we
00132   // change the alignment.
00133 
00134   if (plan[which] == 0 || d[which] != dist || s[which] != stride
00135       || r[which] != rank || h[which] != howmany
00136       || ioinplace != inplace[which]
00137       || ((ioalign != simd_align[which]) ? !ioalign : false))
00138     create_new_plan = true;
00139   else
00140     {
00141       // We still might not have the same shape of array.
00142 
00143       for (int i = 0; i < rank; i++)
00144         if (dims(i) != n[which](i))
00145           {
00146             create_new_plan = true;
00147             break;
00148           }
00149     }
00150 
00151   if (create_new_plan)
00152     {
00153       d[which] = dist;
00154       s[which] = stride;
00155       r[which] = rank;
00156       h[which] = howmany;
00157       simd_align[which] = ioalign;
00158       inplace[which] = ioinplace;
00159       n[which] = dims;
00160 
00161       // Note reversal of dimensions for column major storage in FFTW.
00162       octave_idx_type nn = 1;
00163       OCTAVE_LOCAL_BUFFER (int, tmp, rank);
00164 
00165       for (int i = 0, j = rank-1; i < rank; i++, j--)
00166         {
00167           tmp[i] = dims(j);
00168           nn *= dims(j);
00169         }
00170 
00171       int plan_flags = 0;
00172       bool plan_destroys_in = true;
00173 
00174       switch (meth)
00175         {
00176         case UNKNOWN:
00177         case ESTIMATE:
00178           plan_flags |= FFTW_ESTIMATE;
00179           plan_destroys_in = false;
00180           break;
00181         case MEASURE:
00182           plan_flags |= FFTW_MEASURE;
00183           break;
00184         case PATIENT:
00185           plan_flags |= FFTW_PATIENT;
00186           break;
00187         case EXHAUSTIVE:
00188           plan_flags |= FFTW_EXHAUSTIVE;
00189           break;
00190         case HYBRID:
00191           if (nn < 8193)
00192             plan_flags |= FFTW_MEASURE;
00193           else
00194             {
00195               plan_flags |= FFTW_ESTIMATE;
00196               plan_destroys_in = false;
00197             }
00198           break;
00199         }
00200 
00201       if (ioalign)
00202         plan_flags &= ~FFTW_UNALIGNED;
00203       else
00204         plan_flags |= FFTW_UNALIGNED;
00205 
00206       if (*cur_plan_p)
00207         fftw_destroy_plan (*cur_plan_p);
00208 
00209       if (plan_destroys_in)
00210         {
00211           // Create matrix with the same size and 16-byte alignment as input
00212           OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
00213           itmp = reinterpret_cast<Complex *>
00214             (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
00215              ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
00216 
00217           *cur_plan_p =
00218             fftw_plan_many_dft (rank, tmp, howmany,
00219               reinterpret_cast<fftw_complex *> (itmp),
00220               0, stride, dist, reinterpret_cast<fftw_complex *> (out),
00221               0, stride, dist, dir, plan_flags);
00222         }
00223       else
00224         {
00225           *cur_plan_p =
00226             fftw_plan_many_dft (rank, tmp, howmany,
00227               reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
00228               0, stride, dist, reinterpret_cast<fftw_complex *> (out),
00229               0, stride, dist, dir, plan_flags);
00230         }
00231 
00232       if (*cur_plan_p == 0)
00233         (*current_liboctave_error_handler) ("Error creating fftw plan");
00234     }
00235 
00236   return *cur_plan_p;
00237 }
00238 
00239 fftw_plan
00240 octave_fftw_planner::do_create_plan (const int rank, const dim_vector dims,
00241                                      octave_idx_type howmany,
00242                                      octave_idx_type stride,
00243                                      octave_idx_type dist,
00244                                      const double *in, Complex *out)
00245 {
00246   fftw_plan *cur_plan_p = &rplan;
00247   bool create_new_plan = false;
00248   bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
00249 
00250   // Don't create a new plan if we have a non SIMD plan already but
00251   // can do SIMD.  This prevents endlessly recreating plans if we
00252   // change the alignment.
00253 
00254   if (rplan == 0 || rd != dist || rs != stride || rr != rank
00255       || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false))
00256     create_new_plan = true;
00257   else
00258     {
00259       // We still might not have the same shape of array.
00260 
00261       for (int i = 0; i < rank; i++)
00262         if (dims(i) != rn(i))
00263           {
00264             create_new_plan = true;
00265             break;
00266           }
00267     }
00268 
00269   if (create_new_plan)
00270     {
00271       rd = dist;
00272       rs = stride;
00273       rr = rank;
00274       rh = howmany;
00275       rsimd_align = ioalign;
00276       rn = dims;
00277 
00278       // Note reversal of dimensions for column major storage in FFTW.
00279       octave_idx_type nn = 1;
00280       OCTAVE_LOCAL_BUFFER (int, tmp, rank);
00281 
00282       for (int i = 0, j = rank-1; i < rank; i++, j--)
00283         {
00284           tmp[i] = dims(j);
00285           nn *= dims(j);
00286         }
00287 
00288       int plan_flags = 0;
00289       bool plan_destroys_in = true;
00290 
00291       switch (meth)
00292         {
00293         case UNKNOWN:
00294         case ESTIMATE:
00295           plan_flags |= FFTW_ESTIMATE;
00296           plan_destroys_in = false;
00297           break;
00298         case MEASURE:
00299           plan_flags |= FFTW_MEASURE;
00300           break;
00301         case PATIENT:
00302           plan_flags |= FFTW_PATIENT;
00303           break;
00304         case EXHAUSTIVE:
00305           plan_flags |= FFTW_EXHAUSTIVE;
00306           break;
00307         case HYBRID:
00308           if (nn < 8193)
00309             plan_flags |= FFTW_MEASURE;
00310           else
00311             {
00312               plan_flags |= FFTW_ESTIMATE;
00313               plan_destroys_in = false;
00314             }
00315           break;
00316         }
00317 
00318       if (ioalign)
00319         plan_flags &= ~FFTW_UNALIGNED;
00320       else
00321         plan_flags |= FFTW_UNALIGNED;
00322 
00323       if (*cur_plan_p)
00324         fftw_destroy_plan (*cur_plan_p);
00325 
00326       if (plan_destroys_in)
00327         {
00328           // Create matrix with the same size and 16-byte alignment as input
00329           OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
00330           itmp = reinterpret_cast<double *>
00331             (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
00332              ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
00333 
00334           *cur_plan_p =
00335             fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
00336               0, stride, dist, reinterpret_cast<fftw_complex *> (out),
00337               0, stride, dist, plan_flags);
00338         }
00339       else
00340         {
00341           *cur_plan_p =
00342             fftw_plan_many_dft_r2c (rank, tmp, howmany,
00343               (const_cast<double *> (in)),
00344               0, stride, dist, reinterpret_cast<fftw_complex *> (out),
00345               0, stride, dist, plan_flags);
00346         }
00347 
00348       if (*cur_plan_p == 0)
00349         (*current_liboctave_error_handler) ("Error creating fftw plan");
00350     }
00351 
00352   return *cur_plan_p;
00353 }
00354 
00355 octave_fftw_planner::FftwMethod
00356 octave_fftw_planner::do_method (void)
00357 {
00358   return meth;
00359 }
00360 
00361 octave_fftw_planner::FftwMethod
00362 octave_fftw_planner::do_method (FftwMethod _meth)
00363 {
00364   FftwMethod ret = meth;
00365   if (_meth == ESTIMATE || _meth == MEASURE
00366       || _meth == PATIENT || _meth == EXHAUSTIVE
00367       || _meth == HYBRID)
00368     {
00369       if (meth != _meth)
00370         {
00371           meth = _meth;
00372           if (rplan)
00373             fftw_destroy_plan (rplan);
00374           if (plan[0])
00375             fftw_destroy_plan (plan[0]);
00376           if (plan[1])
00377             fftw_destroy_plan (plan[1]);
00378           rplan = plan[0] = plan[1] = 0;
00379         }
00380     }
00381   else
00382     ret = UNKNOWN;
00383   return ret;
00384 }
00385 
00386 octave_float_fftw_planner *octave_float_fftw_planner::instance = 0;
00387 
00388 octave_float_fftw_planner::octave_float_fftw_planner (void)
00389   : meth (ESTIMATE), rplan (0), rd (0), rs (0), rr (0), rh (0), rn (),
00390     rsimd_align (false)
00391 {
00392   plan[0] = plan[1] = 0;
00393   d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
00394   simd_align[0] = simd_align[1] = false;
00395   inplace[0] = inplace[1] = false;
00396   n[0] = n[1] = dim_vector ();
00397 
00398   // If we have a system wide wisdom file, import it.
00399   fftwf_import_system_wisdom ();
00400 }
00401 
00402 octave_float_fftw_planner::~octave_float_fftw_planner (void)
00403 {
00404   fftwf_plan *plan_p;
00405 
00406   plan_p = &rplan;
00407   if (*plan_p)
00408     fftwf_destroy_plan (*plan_p);
00409 
00410   plan_p = &plan[0];
00411   if (*plan_p)
00412     fftwf_destroy_plan (*plan_p);
00413 
00414   plan_p = &plan[1];
00415   if (*plan_p)
00416     fftwf_destroy_plan (*plan_p);
00417 }
00418 
00419 bool
00420 octave_float_fftw_planner::instance_ok (void)
00421 {
00422   bool retval = true;
00423 
00424   if (! instance)
00425     {
00426       instance = new octave_float_fftw_planner ();
00427 
00428       if (instance)
00429         singleton_cleanup_list::add (cleanup_instance);
00430     }
00431 
00432   if (! instance)
00433     {
00434       (*current_liboctave_error_handler)
00435         ("unable to create octave_fftw_planner object!");
00436 
00437       retval = false;
00438     }
00439 
00440   return retval;
00441 }
00442 
00443 fftwf_plan
00444 octave_float_fftw_planner::do_create_plan (int dir, const int rank,
00445                                            const dim_vector dims,
00446                                            octave_idx_type howmany,
00447                                            octave_idx_type stride,
00448                                            octave_idx_type dist,
00449                                            const FloatComplex *in,
00450                                            FloatComplex *out)
00451 {
00452   int which = (dir == FFTW_FORWARD) ? 0 : 1;
00453   fftwf_plan *cur_plan_p = &plan[which];
00454   bool create_new_plan = false;
00455   bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
00456   bool ioinplace = (in == out);
00457 
00458   // Don't create a new plan if we have a non SIMD plan already but
00459   // can do SIMD.  This prevents endlessly recreating plans if we
00460   // change the alignment.
00461 
00462   if (plan[which] == 0 || d[which] != dist || s[which] != stride
00463       || r[which] != rank || h[which] != howmany
00464       || ioinplace != inplace[which]
00465       || ((ioalign != simd_align[which]) ? !ioalign : false))
00466     create_new_plan = true;
00467   else
00468     {
00469       // We still might not have the same shape of array.
00470 
00471       for (int i = 0; i < rank; i++)
00472         if (dims(i) != n[which](i))
00473           {
00474             create_new_plan = true;
00475             break;
00476           }
00477     }
00478 
00479   if (create_new_plan)
00480     {
00481       d[which] = dist;
00482       s[which] = stride;
00483       r[which] = rank;
00484       h[which] = howmany;
00485       simd_align[which] = ioalign;
00486       inplace[which] = ioinplace;
00487       n[which] = dims;
00488 
00489       // Note reversal of dimensions for column major storage in FFTW.
00490       octave_idx_type nn = 1;
00491       OCTAVE_LOCAL_BUFFER (int, tmp, rank);
00492 
00493       for (int i = 0, j = rank-1; i < rank; i++, j--)
00494         {
00495           tmp[i] = dims(j);
00496           nn *= dims(j);
00497         }
00498 
00499       int plan_flags = 0;
00500       bool plan_destroys_in = true;
00501 
00502       switch (meth)
00503         {
00504         case UNKNOWN:
00505         case ESTIMATE:
00506           plan_flags |= FFTW_ESTIMATE;
00507           plan_destroys_in = false;
00508           break;
00509         case MEASURE:
00510           plan_flags |= FFTW_MEASURE;
00511           break;
00512         case PATIENT:
00513           plan_flags |= FFTW_PATIENT;
00514           break;
00515         case EXHAUSTIVE:
00516           plan_flags |= FFTW_EXHAUSTIVE;
00517           break;
00518         case HYBRID:
00519           if (nn < 8193)
00520             plan_flags |= FFTW_MEASURE;
00521           else
00522             {
00523               plan_flags |= FFTW_ESTIMATE;
00524               plan_destroys_in = false;
00525             }
00526           break;
00527         }
00528 
00529       if (ioalign)
00530         plan_flags &= ~FFTW_UNALIGNED;
00531       else
00532         plan_flags |= FFTW_UNALIGNED;
00533 
00534       if (*cur_plan_p)
00535         fftwf_destroy_plan (*cur_plan_p);
00536 
00537       if (plan_destroys_in)
00538         {
00539           // Create matrix with the same size and 16-byte alignment as input
00540           OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
00541           itmp = reinterpret_cast<FloatComplex *>
00542             (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
00543              ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
00544 
00545           *cur_plan_p =
00546             fftwf_plan_many_dft (rank, tmp, howmany,
00547               reinterpret_cast<fftwf_complex *> (itmp),
00548               0, stride, dist, reinterpret_cast<fftwf_complex *> (out),
00549               0, stride, dist, dir, plan_flags);
00550         }
00551       else
00552         {
00553           *cur_plan_p =
00554             fftwf_plan_many_dft (rank, tmp, howmany,
00555               reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
00556               0, stride, dist, reinterpret_cast<fftwf_complex *> (out),
00557               0, stride, dist, dir, plan_flags);
00558         }
00559 
00560       if (*cur_plan_p == 0)
00561         (*current_liboctave_error_handler) ("Error creating fftw plan");
00562     }
00563 
00564   return *cur_plan_p;
00565 }
00566 
00567 fftwf_plan
00568 octave_float_fftw_planner::do_create_plan (const int rank,
00569                                            const dim_vector dims,
00570                                            octave_idx_type howmany,
00571                                            octave_idx_type stride,
00572                                            octave_idx_type dist,
00573                                            const float *in, FloatComplex *out)
00574 {
00575   fftwf_plan *cur_plan_p = &rplan;
00576   bool create_new_plan = false;
00577   bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
00578 
00579   // Don't create a new plan if we have a non SIMD plan already but
00580   // can do SIMD.  This prevents endlessly recreating plans if we
00581   // change the alignment.
00582 
00583   if (rplan == 0 || rd != dist || rs != stride || rr != rank
00584       || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false))
00585     create_new_plan = true;
00586   else
00587     {
00588       // We still might not have the same shape of array.
00589 
00590       for (int i = 0; i < rank; i++)
00591         if (dims(i) != rn(i))
00592           {
00593             create_new_plan = true;
00594             break;
00595           }
00596     }
00597 
00598   if (create_new_plan)
00599     {
00600       rd = dist;
00601       rs = stride;
00602       rr = rank;
00603       rh = howmany;
00604       rsimd_align = ioalign;
00605       rn = dims;
00606 
00607       // Note reversal of dimensions for column major storage in FFTW.
00608       octave_idx_type nn = 1;
00609       OCTAVE_LOCAL_BUFFER (int, tmp, rank);
00610 
00611       for (int i = 0, j = rank-1; i < rank; i++, j--)
00612         {
00613           tmp[i] = dims(j);
00614           nn *= dims(j);
00615         }
00616 
00617       int plan_flags = 0;
00618       bool plan_destroys_in = true;
00619 
00620       switch (meth)
00621         {
00622         case UNKNOWN:
00623         case ESTIMATE:
00624           plan_flags |= FFTW_ESTIMATE;
00625           plan_destroys_in = false;
00626           break;
00627         case MEASURE:
00628           plan_flags |= FFTW_MEASURE;
00629           break;
00630         case PATIENT:
00631           plan_flags |= FFTW_PATIENT;
00632           break;
00633         case EXHAUSTIVE:
00634           plan_flags |= FFTW_EXHAUSTIVE;
00635           break;
00636         case HYBRID:
00637           if (nn < 8193)
00638             plan_flags |= FFTW_MEASURE;
00639           else
00640             {
00641               plan_flags |= FFTW_ESTIMATE;
00642               plan_destroys_in = false;
00643             }
00644           break;
00645         }
00646 
00647       if (ioalign)
00648         plan_flags &= ~FFTW_UNALIGNED;
00649       else
00650         plan_flags |= FFTW_UNALIGNED;
00651 
00652       if (*cur_plan_p)
00653         fftwf_destroy_plan (*cur_plan_p);
00654 
00655       if (plan_destroys_in)
00656         {
00657           // Create matrix with the same size and 16-byte alignment as input
00658           OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
00659           itmp = reinterpret_cast<float *>
00660             (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
00661              ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
00662 
00663           *cur_plan_p =
00664             fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
00665               0, stride, dist, reinterpret_cast<fftwf_complex *> (out),
00666               0, stride, dist, plan_flags);
00667         }
00668       else
00669         {
00670           *cur_plan_p =
00671             fftwf_plan_many_dft_r2c (rank, tmp, howmany,
00672               (const_cast<float *> (in)),
00673               0, stride, dist, reinterpret_cast<fftwf_complex *> (out),
00674               0, stride, dist, plan_flags);
00675         }
00676 
00677       if (*cur_plan_p == 0)
00678         (*current_liboctave_error_handler) ("Error creating fftw plan");
00679     }
00680 
00681   return *cur_plan_p;
00682 }
00683 
00684 octave_float_fftw_planner::FftwMethod
00685 octave_float_fftw_planner::do_method (void)
00686 {
00687   return meth;
00688 }
00689 
00690 octave_float_fftw_planner::FftwMethod
00691 octave_float_fftw_planner::do_method (FftwMethod _meth)
00692 {
00693   FftwMethod ret = meth;
00694   if (_meth == ESTIMATE || _meth == MEASURE
00695       || _meth == PATIENT || _meth == EXHAUSTIVE
00696       || _meth == HYBRID)
00697     {
00698       if (meth != _meth)
00699         {
00700           meth = _meth;
00701           if (rplan)
00702             fftwf_destroy_plan (rplan);
00703           if (plan[0])
00704             fftwf_destroy_plan (plan[0]);
00705           if (plan[1])
00706             fftwf_destroy_plan (plan[1]);
00707           rplan = plan[0] = plan[1] = 0;
00708         }
00709     }
00710   else
00711     ret = UNKNOWN;
00712   return ret;
00713 }
00714 
00715 template <class T>
00716 static inline void
00717 convert_packcomplex_1d (T *out, size_t nr, size_t nc,
00718                         octave_idx_type stride, octave_idx_type dist)
00719 {
00720   octave_quit ();
00721 
00722   // Fill in the missing data.
00723 
00724   for (size_t i = 0; i < nr; i++)
00725     for (size_t j = nc/2+1; j < nc; j++)
00726       out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]);
00727 
00728   octave_quit ();
00729 }
00730 
00731 template <class T>
00732 static inline void
00733 convert_packcomplex_Nd (T *out, const dim_vector &dv)
00734 {
00735   size_t nc = dv(0);
00736   size_t nr = dv(1);
00737   size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1);
00738   size_t nrp = nr * np;
00739   T *ptr1, *ptr2;
00740 
00741   octave_quit ();
00742 
00743   // Create space for the missing elements.
00744 
00745   for (size_t i = 0; i < nrp; i++)
00746     {
00747       ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
00748       ptr2 = out + i * nc;
00749       for (size_t j = 0; j < nc/2+1; j++)
00750         *ptr2++ = *ptr1++;
00751     }
00752 
00753   octave_quit ();
00754 
00755   // Fill in the missing data for the rank = 2 case directly for speed.
00756 
00757   for (size_t i = 0; i < np; i++)
00758     {
00759       for (size_t j = 1; j < nr; j++)
00760         for (size_t k = nc/2+1; k < nc; k++)
00761           out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]);
00762 
00763       for (size_t j = nc/2+1; j < nc; j++)
00764         out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]);
00765     }
00766 
00767   octave_quit ();
00768 
00769   // Now do the permutations needed for rank > 2 cases.
00770 
00771   size_t jstart = dv(0) * dv(1);
00772   size_t kstep = dv(0);
00773   size_t nel = dv.numel ();
00774 
00775   for (int inner = 2; inner < dv.length (); inner++)
00776     {
00777       size_t jmax = jstart * dv(inner);
00778       for (size_t i = 0; i < nel; i+=jmax)
00779         for (size_t j = jstart, jj = jmax-jstart; j < jj;
00780              j+=jstart, jj-=jstart)
00781           for (size_t k = 0; k < jstart; k+= kstep)
00782             for (size_t l = nc/2+1; l < nc; l++)
00783               {
00784                 T tmp = out[i+ j + k + l];
00785                 out[i + j + k + l] =  out[i + jj + k + l];
00786                 out[i + jj + k + l] = tmp;
00787               }
00788       jstart = jmax;
00789     }
00790 
00791   octave_quit ();
00792 }
00793 
00794 int
00795 octave_fftw::fft (const double *in, Complex *out, size_t npts,
00796                   size_t nsamples, octave_idx_type stride, octave_idx_type dist)
00797 {
00798   dist = (dist < 0 ? npts : dist);
00799 
00800   dim_vector dv (npts, 1);
00801   fftw_plan plan = octave_fftw_planner::create_plan (1, dv, nsamples,
00802                                                      stride, dist, in, out);
00803 
00804   fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
00805                          reinterpret_cast<fftw_complex *> (out));
00806 
00807   // Need to create other half of the transform.
00808 
00809   convert_packcomplex_1d (out, nsamples, npts, stride, dist);
00810 
00811   return 0;
00812 }
00813 
00814 int
00815 octave_fftw::fft (const Complex *in, Complex *out, size_t npts,
00816                   size_t nsamples, octave_idx_type stride, octave_idx_type dist)
00817 {
00818   dist = (dist < 0 ? npts : dist);
00819 
00820   dim_vector dv (npts, 1);
00821   fftw_plan plan = octave_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
00822                                                      nsamples, stride,
00823                                                      dist, in, out);
00824 
00825   fftw_execute_dft (plan,
00826         reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00827         reinterpret_cast<fftw_complex *> (out));
00828 
00829   return 0;
00830 }
00831 
00832 int
00833 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts,
00834                    size_t nsamples, octave_idx_type stride, octave_idx_type dist)
00835 {
00836   dist = (dist < 0 ? npts : dist);
00837 
00838   dim_vector dv (npts, 1);
00839   fftw_plan plan = octave_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
00840                                                      nsamples, stride,
00841                                                      dist, in, out);
00842 
00843   fftw_execute_dft (plan,
00844         reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00845         reinterpret_cast<fftw_complex *> (out));
00846 
00847   const Complex scale = npts;
00848   for (size_t j = 0; j < nsamples; j++)
00849     for (size_t i = 0; i < npts; i++)
00850       out[i*stride + j*dist] /= scale;
00851 
00852   return 0;
00853 }
00854 
00855 int
00856 octave_fftw::fftNd (const double *in, Complex *out, const int rank,
00857                     const dim_vector &dv)
00858 {
00859   octave_idx_type dist = 1;
00860   for (int i = 0; i < rank; i++)
00861     dist *= dv(i);
00862 
00863   // Fool with the position of the start of the output matrix, so that
00864   // creating other half of the matrix won't cause cache problems.
00865 
00866   octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
00867 
00868   fftw_plan plan = octave_fftw_planner::create_plan (rank, dv, 1, 1, dist,
00869                                                      in, out + offset);
00870 
00871   fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
00872                         reinterpret_cast<fftw_complex *> (out+ offset));
00873 
00874   // Need to create other half of the transform.
00875 
00876   convert_packcomplex_Nd (out, dv);
00877 
00878   return 0;
00879 }
00880 
00881 int
00882 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank,
00883                     const dim_vector &dv)
00884 {
00885   octave_idx_type dist = 1;
00886   for (int i = 0; i < rank; i++)
00887     dist *= dv(i);
00888 
00889   fftw_plan plan = octave_fftw_planner::create_plan (FFTW_FORWARD, rank,
00890                                                      dv, 1, 1, dist, in, out);
00891 
00892   fftw_execute_dft (plan,
00893         reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00894         reinterpret_cast<fftw_complex *> (out));
00895 
00896   return 0;
00897 }
00898 
00899 int
00900 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank,
00901                      const dim_vector &dv)
00902 {
00903   octave_idx_type dist = 1;
00904   for (int i = 0; i < rank; i++)
00905     dist *= dv(i);
00906 
00907   fftw_plan plan = octave_fftw_planner::create_plan (FFTW_BACKWARD, rank,
00908                                                      dv, 1, 1, dist, in, out);
00909 
00910   fftw_execute_dft (plan,
00911         reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00912         reinterpret_cast<fftw_complex *> (out));
00913 
00914   const size_t npts = dv.numel ();
00915   const Complex scale = npts;
00916   for (size_t i = 0; i < npts; i++)
00917     out[i] /= scale;
00918 
00919   return 0;
00920 }
00921 
00922 int
00923 octave_fftw::fft (const float *in, FloatComplex *out, size_t npts,
00924                   size_t nsamples, octave_idx_type stride, octave_idx_type dist)
00925 {
00926   dist = (dist < 0 ? npts : dist);
00927 
00928   dim_vector dv (npts, 1);
00929   fftwf_plan plan = octave_float_fftw_planner::create_plan (1, dv, nsamples,
00930                                                             stride, dist,
00931                                                             in, out);
00932 
00933   fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
00934                         reinterpret_cast<fftwf_complex *> (out));
00935 
00936   // Need to create other half of the transform.
00937 
00938   convert_packcomplex_1d (out, nsamples, npts, stride, dist);
00939 
00940   return 0;
00941 }
00942 
00943 int
00944 octave_fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts,
00945                   size_t nsamples, octave_idx_type stride, octave_idx_type dist)
00946 {
00947   dist = (dist < 0 ? npts : dist);
00948 
00949   dim_vector dv (npts, 1);
00950   fftwf_plan plan = octave_float_fftw_planner::create_plan (FFTW_FORWARD, 1,
00951                                                             dv, nsamples,
00952                                                             stride, dist,
00953                                                             in, out);
00954 
00955   fftwf_execute_dft (plan,
00956         reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
00957         reinterpret_cast<fftwf_complex *> (out));
00958 
00959   return 0;
00960 }
00961 
00962 int
00963 octave_fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts,
00964                    size_t nsamples, octave_idx_type stride, octave_idx_type dist)
00965 {
00966   dist = (dist < 0 ? npts : dist);
00967 
00968   dim_vector dv (npts, 1);
00969   fftwf_plan plan = octave_float_fftw_planner::create_plan (FFTW_BACKWARD, 1,
00970                                                             dv, nsamples,
00971                                                             stride, dist,
00972                                                             in, out);
00973 
00974   fftwf_execute_dft (plan,
00975         reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
00976         reinterpret_cast<fftwf_complex *> (out));
00977 
00978   const FloatComplex scale = npts;
00979   for (size_t j = 0; j < nsamples; j++)
00980     for (size_t i = 0; i < npts; i++)
00981       out[i*stride + j*dist] /= scale;
00982 
00983   return 0;
00984 }
00985 
00986 int
00987 octave_fftw::fftNd (const float *in, FloatComplex *out, const int rank,
00988                     const dim_vector &dv)
00989 {
00990   octave_idx_type dist = 1;
00991   for (int i = 0; i < rank; i++)
00992     dist *= dv(i);
00993 
00994   // Fool with the position of the start of the output matrix, so that
00995   // creating other half of the matrix won't cause cache problems.
00996 
00997   octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
00998 
00999   fftwf_plan plan = octave_float_fftw_planner::create_plan (rank, dv, 1, 1,
01000                                                             dist, in,
01001                                                             out + offset);
01002 
01003   fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
01004                         reinterpret_cast<fftwf_complex *> (out+ offset));
01005 
01006   // Need to create other half of the transform.
01007 
01008   convert_packcomplex_Nd (out, dv);
01009 
01010   return 0;
01011 }
01012 
01013 int
01014 octave_fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
01015                     const dim_vector &dv)
01016 {
01017   octave_idx_type dist = 1;
01018   for (int i = 0; i < rank; i++)
01019     dist *= dv(i);
01020 
01021   fftwf_plan plan = octave_float_fftw_planner::create_plan (FFTW_FORWARD,
01022                                                             rank, dv, 1, 1,
01023                                                             dist, in, out);
01024 
01025   fftwf_execute_dft (plan,
01026         reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
01027         reinterpret_cast<fftwf_complex *> (out));
01028 
01029   return 0;
01030 }
01031 
01032 int
01033 octave_fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
01034                      const dim_vector &dv)
01035 {
01036   octave_idx_type dist = 1;
01037   for (int i = 0; i < rank; i++)
01038     dist *= dv(i);
01039 
01040   fftwf_plan plan = octave_float_fftw_planner::create_plan (FFTW_BACKWARD,
01041                                                             rank, dv, 1, 1,
01042                                                             dist, in, out);
01043 
01044   fftwf_execute_dft (plan,
01045         reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
01046         reinterpret_cast<fftwf_complex *> (out));
01047 
01048   const size_t npts = dv.numel ();
01049   const FloatComplex scale = npts;
01050   for (size_t i = 0; i < npts; i++)
01051     out[i] /= scale;
01052 
01053   return 0;
01054 }
01055 
01056 #endif
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines