GNU Octave  4.4.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
oct-fftw.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2001-2018 John W. Eaton
4 
5 This file is part of Octave.
6 
7 Octave is free software: you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <https://www.gnu.org/licenses/>.
20 
21 */
22 
23 #if defined (HAVE_CONFIG_H)
24 # include "config.h"
25 #endif
26 
27 #if defined (HAVE_FFTW3_H)
28 # include <fftw3.h>
29 #endif
30 
31 #include "lo-error.h"
32 #include "oct-fftw.h"
33 #include "oct-locbuf.h"
34 #include "quit.h"
35 #include "singleton-cleanup.h"
36 
37 #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)
38 # include "nproc-wrapper.h"
39 #endif
40 
41 namespace octave
42 {
43 #if defined (HAVE_FFTW)
44 
45  fftw_planner *fftw_planner::instance = nullptr;
46 
47  // Helper class to create and cache FFTW plans for both 1D and
48  // 2D. This implementation defaults to using FFTW_ESTIMATE to create
49  // the plans, which in theory is suboptimal, but provides quite
50  // reasonable performance in practice.
51 
52  // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
53  // will destroy the input and output arrays. We must, therefore, create a
54  // temporary input array with the same size and 16-byte alignment as
55  // the original array when using a different planner strategy.
56  // Note that we also use any wisdom that is available, either in a
57  // FFTW3 system wide file or as supplied by the user.
58 
59  // FIXME: if we can ensure 16 byte alignment in Array<T>
60  // (<T> *data) the FFTW3 can use SIMD instructions for further
61  // acceleration.
62 
63  // Note that it is profitable to store the FFTW3 plans, for small FFTs.
64 
66  : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (),
67  rsimd_align (false), nthreads (1)
68  {
69  plan[0] = plan[1] = nullptr;
70  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
71  simd_align[0] = simd_align[1] = false;
72  inplace[0] = inplace[1] = false;
73  n[0] = n[1] = dim_vector ();
74 
75 #if defined (HAVE_FFTW3_THREADS)
76  int init_ret = fftw_init_threads ();
77  if (! init_ret)
78  (*current_liboctave_error_handler) ("Error initializing FFTW threads");
79 
80  // Use number of processors available to the current process
81  // This can be later changed with fftw ("threads", nthreads).
83  fftw_plan_with_nthreads (nthreads);
84 #endif
85 
86  // If we have a system wide wisdom file, import it.
87  fftw_import_system_wisdom ();
88  }
89 
91  {
92  fftw_plan *plan_p;
93 
94  plan_p = reinterpret_cast<fftw_plan *> (&rplan);
95  if (*plan_p)
96  fftw_destroy_plan (*plan_p);
97 
98  plan_p = reinterpret_cast<fftw_plan *> (&plan[0]);
99  if (*plan_p)
100  fftw_destroy_plan (*plan_p);
101 
102  plan_p = reinterpret_cast<fftw_plan *> (&plan[1]);
103  if (*plan_p)
104  fftw_destroy_plan (*plan_p);
105  }
106 
107  bool
109  {
110  bool retval = true;
111 
112  if (! instance)
113  {
114  instance = new fftw_planner ();
115 
116  if (instance)
118  }
119 
120  if (! instance)
121  (*current_liboctave_error_handler)
122  ("unable to create fftw_planner object!");
123 
124  return retval;
125  }
126 
127  void
129  {
130 #if defined (HAVE_FFTW3_THREADS)
131  if (instance_ok () && nt != threads ())
132  {
133  instance->nthreads = nt;
134  fftw_plan_with_nthreads (nt);
135  // Clear the current plans.
136  instance->rplan = instance->plan[0] = instance->plan[1] = nullptr;
137  }
138 #else
139  (*current_liboctave_warning_handler)
140  ("unable to change number of threads without FFTW thread support");
141 #endif
142  }
143 
144 #define CHECK_SIMD_ALIGNMENT(x) \
145  (((reinterpret_cast<ptrdiff_t> (x)) & 0xF) == 0)
146 
147  void *
148  fftw_planner::do_create_plan (int dir, const int rank,
149  const dim_vector& dims,
150  octave_idx_type howmany,
151  octave_idx_type stride,
152  octave_idx_type dist,
153  const Complex *in, Complex *out)
154  {
155  int which = (dir == FFTW_FORWARD) ? 0 : 1;
156  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&plan[which]);
157  bool create_new_plan = false;
158  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
159  bool ioinplace = (in == out);
160 
161  // Don't create a new plan if we have a non SIMD plan already but
162  // can do SIMD. This prevents endlessly recreating plans if we
163  // change the alignment.
164 
165  if (plan[which] == nullptr || d[which] != dist || s[which] != stride
166  || r[which] != rank || h[which] != howmany
167  || ioinplace != inplace[which]
168  || ((ioalign != simd_align[which]) ? ! ioalign : false))
169  create_new_plan = true;
170  else
171  {
172  // We still might not have the same shape of array.
173 
174  for (int i = 0; i < rank; i++)
175  if (dims(i) != n[which](i))
176  {
177  create_new_plan = true;
178  break;
179  }
180  }
181 
182  if (create_new_plan)
183  {
184  d[which] = dist;
185  s[which] = stride;
186  r[which] = rank;
187  h[which] = howmany;
188  simd_align[which] = ioalign;
189  inplace[which] = ioinplace;
190  n[which] = dims;
191 
192  // Note reversal of dimensions for column major storage in FFTW.
193  octave_idx_type nn = 1;
194  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
195 
196  for (int i = 0, j = rank-1; i < rank; i++, j--)
197  {
198  tmp[i] = dims(j);
199  nn *= dims(j);
200  }
201 
202  int plan_flags = 0;
203  bool plan_destroys_in = true;
204 
205  switch (meth)
206  {
207  case UNKNOWN:
208  case ESTIMATE:
209  plan_flags |= FFTW_ESTIMATE;
210  plan_destroys_in = false;
211  break;
212  case MEASURE:
213  plan_flags |= FFTW_MEASURE;
214  break;
215  case PATIENT:
216  plan_flags |= FFTW_PATIENT;
217  break;
218  case EXHAUSTIVE:
219  plan_flags |= FFTW_EXHAUSTIVE;
220  break;
221  case HYBRID:
222  if (nn < 8193)
223  plan_flags |= FFTW_MEASURE;
224  else
225  {
226  plan_flags |= FFTW_ESTIMATE;
227  plan_destroys_in = false;
228  }
229  break;
230  }
231 
232  if (ioalign)
233  plan_flags &= ~FFTW_UNALIGNED;
234  else
235  plan_flags |= FFTW_UNALIGNED;
236 
237  if (*cur_plan_p)
238  fftw_destroy_plan (*cur_plan_p);
239 
240  if (plan_destroys_in)
241  {
242  // Create matrix with the same size and 16-byte alignment as input
243  OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
244  itmp = reinterpret_cast<Complex *>
245  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
246  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
247 
248  *cur_plan_p =
249  fftw_plan_many_dft (rank, tmp, howmany,
250  reinterpret_cast<fftw_complex *> (itmp),
251  nullptr, stride, dist,
252  reinterpret_cast<fftw_complex *> (out),
253  nullptr, stride, dist, dir, plan_flags);
254  }
255  else
256  {
257  *cur_plan_p =
258  fftw_plan_many_dft (rank, tmp, howmany,
259  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
260  nullptr, stride, dist,
261  reinterpret_cast<fftw_complex *> (out),
262  nullptr, stride, dist, dir, plan_flags);
263  }
264 
265  if (*cur_plan_p == nullptr)
266  (*current_liboctave_error_handler) ("Error creating fftw plan");
267  }
268 
269  return *cur_plan_p;
270  }
271 
272  void *
274  octave_idx_type howmany,
275  octave_idx_type stride,
276  octave_idx_type dist,
277  const double *in, Complex *out)
278  {
279  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&rplan);
280  bool create_new_plan = false;
281  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
282 
283  // Don't create a new plan if we have a non SIMD plan already but
284  // can do SIMD. This prevents endlessly recreating plans if we
285  // change the alignment.
286 
287  if (rplan == nullptr || rd != dist || rs != stride || rr != rank
288  || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))
289  create_new_plan = true;
290  else
291  {
292  // We still might not have the same shape of array.
293 
294  for (int i = 0; i < rank; i++)
295  if (dims(i) != rn(i))
296  {
297  create_new_plan = true;
298  break;
299  }
300  }
301 
302  if (create_new_plan)
303  {
304  rd = dist;
305  rs = stride;
306  rr = rank;
307  rh = howmany;
308  rsimd_align = ioalign;
309  rn = dims;
310 
311  // Note reversal of dimensions for column major storage in FFTW.
312  octave_idx_type nn = 1;
313  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
314 
315  for (int i = 0, j = rank-1; i < rank; i++, j--)
316  {
317  tmp[i] = dims(j);
318  nn *= dims(j);
319  }
320 
321  int plan_flags = 0;
322  bool plan_destroys_in = true;
323 
324  switch (meth)
325  {
326  case UNKNOWN:
327  case ESTIMATE:
328  plan_flags |= FFTW_ESTIMATE;
329  plan_destroys_in = false;
330  break;
331  case MEASURE:
332  plan_flags |= FFTW_MEASURE;
333  break;
334  case PATIENT:
335  plan_flags |= FFTW_PATIENT;
336  break;
337  case EXHAUSTIVE:
338  plan_flags |= FFTW_EXHAUSTIVE;
339  break;
340  case HYBRID:
341  if (nn < 8193)
342  plan_flags |= FFTW_MEASURE;
343  else
344  {
345  plan_flags |= FFTW_ESTIMATE;
346  plan_destroys_in = false;
347  }
348  break;
349  }
350 
351  if (ioalign)
352  plan_flags &= ~FFTW_UNALIGNED;
353  else
354  plan_flags |= FFTW_UNALIGNED;
355 
356  if (*cur_plan_p)
357  fftw_destroy_plan (*cur_plan_p);
358 
359  if (plan_destroys_in)
360  {
361  // Create matrix with the same size and 16-byte alignment as input
362  OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
363  itmp = reinterpret_cast<double *>
364  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
365  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
366 
367  *cur_plan_p =
368  fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
369  nullptr, stride, dist,
370  reinterpret_cast<fftw_complex *> (out),
371  nullptr, stride, dist, plan_flags);
372  }
373  else
374  {
375  *cur_plan_p =
376  fftw_plan_many_dft_r2c (rank, tmp, howmany,
377  (const_cast<double *> (in)),
378  nullptr, stride, dist,
379  reinterpret_cast<fftw_complex *> (out),
380  nullptr, stride, dist, plan_flags);
381  }
382 
383  if (*cur_plan_p == nullptr)
384  (*current_liboctave_error_handler) ("Error creating fftw plan");
385  }
386 
387  return *cur_plan_p;
388  }
389 
392  {
393  return meth;
394  }
395 
398  {
399  FftwMethod ret = meth;
400  if (_meth == ESTIMATE || _meth == MEASURE
401  || _meth == PATIENT || _meth == EXHAUSTIVE
402  || _meth == HYBRID)
403  {
404  if (meth != _meth)
405  {
406  meth = _meth;
407  if (rplan)
408  fftw_destroy_plan (reinterpret_cast<fftw_plan> (rplan));
409  if (plan[0])
410  fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[0]));
411  if (plan[1])
412  fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[1]));
413  rplan = plan[0] = plan[1] = nullptr;
414  }
415  }
416  else
417  ret = UNKNOWN;
418  return ret;
419  }
420 
422 
424  : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (),
425  rsimd_align (false), nthreads (1)
426  {
427  plan[0] = plan[1] = nullptr;
428  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
429  simd_align[0] = simd_align[1] = false;
430  inplace[0] = inplace[1] = false;
431  n[0] = n[1] = dim_vector ();
432 
433 #if defined (HAVE_FFTW3F_THREADS)
434  int init_ret = fftwf_init_threads ();
435  if (! init_ret)
436  (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
437 
438  // Use number of processors available to the current process
439  // This can be later changed with fftw ("threads", nthreads).
441  fftwf_plan_with_nthreads (nthreads);
442 #endif
443 
444  // If we have a system wide wisdom file, import it.
445  fftwf_import_system_wisdom ();
446  }
447 
449  {
450  fftwf_plan *plan_p;
451 
452  plan_p = reinterpret_cast<fftwf_plan *> (&rplan);
453  if (*plan_p)
454  fftwf_destroy_plan (*plan_p);
455 
456  plan_p = reinterpret_cast<fftwf_plan *> (&plan[0]);
457  if (*plan_p)
458  fftwf_destroy_plan (*plan_p);
459 
460  plan_p = reinterpret_cast<fftwf_plan *> (&plan[1]);
461  if (*plan_p)
462  fftwf_destroy_plan (*plan_p);
463  }
464 
465  bool
467  {
468  bool retval = true;
469 
470  if (! instance)
471  {
472  instance = new float_fftw_planner ();
473 
474  if (instance)
476  }
477 
478  if (! instance)
479  (*current_liboctave_error_handler)
480  ("unable to create fftw_planner object!");
481 
482  return retval;
483  }
484 
485  void
487  {
488 #if defined (HAVE_FFTW3F_THREADS)
489  if (instance_ok () && nt != threads ())
490  {
491  instance->nthreads = nt;
492  fftwf_plan_with_nthreads (nt);
493  // Clear the current plans.
494  instance->rplan = instance->plan[0] = instance->plan[1] = nullptr;
495  }
496 #else
497  (*current_liboctave_warning_handler)
498  ("unable to change number of threads without FFTW thread support");
499 #endif
500  }
501 
502  void *
503  float_fftw_planner::do_create_plan (int dir, const int rank,
504  const dim_vector& dims,
505  octave_idx_type howmany,
506  octave_idx_type stride,
507  octave_idx_type dist,
508  const FloatComplex *in,
509  FloatComplex *out)
510  {
511  int which = (dir == FFTW_FORWARD) ? 0 : 1;
512  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&plan[which]);
513  bool create_new_plan = false;
514  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
515  bool ioinplace = (in == out);
516 
517  // Don't create a new plan if we have a non SIMD plan already but
518  // can do SIMD. This prevents endlessly recreating plans if we
519  // change the alignment.
520 
521  if (plan[which] == nullptr || d[which] != dist || s[which] != stride
522  || r[which] != rank || h[which] != howmany
523  || ioinplace != inplace[which]
524  || ((ioalign != simd_align[which]) ? ! ioalign : false))
525  create_new_plan = true;
526  else
527  {
528  // We still might not have the same shape of array.
529 
530  for (int i = 0; i < rank; i++)
531  if (dims(i) != n[which](i))
532  {
533  create_new_plan = true;
534  break;
535  }
536  }
537 
538  if (create_new_plan)
539  {
540  d[which] = dist;
541  s[which] = stride;
542  r[which] = rank;
543  h[which] = howmany;
544  simd_align[which] = ioalign;
545  inplace[which] = ioinplace;
546  n[which] = dims;
547 
548  // Note reversal of dimensions for column major storage in FFTW.
549  octave_idx_type nn = 1;
550  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
551 
552  for (int i = 0, j = rank-1; i < rank; i++, j--)
553  {
554  tmp[i] = dims(j);
555  nn *= dims(j);
556  }
557 
558  int plan_flags = 0;
559  bool plan_destroys_in = true;
560 
561  switch (meth)
562  {
563  case UNKNOWN:
564  case ESTIMATE:
565  plan_flags |= FFTW_ESTIMATE;
566  plan_destroys_in = false;
567  break;
568  case MEASURE:
569  plan_flags |= FFTW_MEASURE;
570  break;
571  case PATIENT:
572  plan_flags |= FFTW_PATIENT;
573  break;
574  case EXHAUSTIVE:
575  plan_flags |= FFTW_EXHAUSTIVE;
576  break;
577  case HYBRID:
578  if (nn < 8193)
579  plan_flags |= FFTW_MEASURE;
580  else
581  {
582  plan_flags |= FFTW_ESTIMATE;
583  plan_destroys_in = false;
584  }
585  break;
586  }
587 
588  if (ioalign)
589  plan_flags &= ~FFTW_UNALIGNED;
590  else
591  plan_flags |= FFTW_UNALIGNED;
592 
593  if (*cur_plan_p)
594  fftwf_destroy_plan (*cur_plan_p);
595 
596  if (plan_destroys_in)
597  {
598  // Create matrix with the same size and 16-byte alignment as input
599  OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
600  itmp = reinterpret_cast<FloatComplex *>
601  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
602  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
603 
604  *cur_plan_p =
605  fftwf_plan_many_dft (rank, tmp, howmany,
606  reinterpret_cast<fftwf_complex *> (itmp),
607  nullptr, stride, dist,
608  reinterpret_cast<fftwf_complex *> (out),
609  nullptr, stride, dist, dir, plan_flags);
610  }
611  else
612  {
613  *cur_plan_p =
614  fftwf_plan_many_dft (rank, tmp, howmany,
615  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
616  nullptr, stride, dist,
617  reinterpret_cast<fftwf_complex *> (out),
618  nullptr, stride, dist, dir, plan_flags);
619  }
620 
621  if (*cur_plan_p == nullptr)
622  (*current_liboctave_error_handler) ("Error creating fftw plan");
623  }
624 
625  return *cur_plan_p;
626  }
627 
628  void *
630  octave_idx_type howmany,
631  octave_idx_type stride,
632  octave_idx_type dist,
633  const float *in, FloatComplex *out)
634  {
635  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&rplan);
636  bool create_new_plan = false;
637  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
638 
639  // Don't create a new plan if we have a non SIMD plan already but
640  // can do SIMD. This prevents endlessly recreating plans if we
641  // change the alignment.
642 
643  if (rplan == nullptr || rd != dist || rs != stride || rr != rank
644  || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))
645  create_new_plan = true;
646  else
647  {
648  // We still might not have the same shape of array.
649 
650  for (int i = 0; i < rank; i++)
651  if (dims(i) != rn(i))
652  {
653  create_new_plan = true;
654  break;
655  }
656  }
657 
658  if (create_new_plan)
659  {
660  rd = dist;
661  rs = stride;
662  rr = rank;
663  rh = howmany;
664  rsimd_align = ioalign;
665  rn = dims;
666 
667  // Note reversal of dimensions for column major storage in FFTW.
668  octave_idx_type nn = 1;
669  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
670 
671  for (int i = 0, j = rank-1; i < rank; i++, j--)
672  {
673  tmp[i] = dims(j);
674  nn *= dims(j);
675  }
676 
677  int plan_flags = 0;
678  bool plan_destroys_in = true;
679 
680  switch (meth)
681  {
682  case UNKNOWN:
683  case ESTIMATE:
684  plan_flags |= FFTW_ESTIMATE;
685  plan_destroys_in = false;
686  break;
687  case MEASURE:
688  plan_flags |= FFTW_MEASURE;
689  break;
690  case PATIENT:
691  plan_flags |= FFTW_PATIENT;
692  break;
693  case EXHAUSTIVE:
694  plan_flags |= FFTW_EXHAUSTIVE;
695  break;
696  case HYBRID:
697  if (nn < 8193)
698  plan_flags |= FFTW_MEASURE;
699  else
700  {
701  plan_flags |= FFTW_ESTIMATE;
702  plan_destroys_in = false;
703  }
704  break;
705  }
706 
707  if (ioalign)
708  plan_flags &= ~FFTW_UNALIGNED;
709  else
710  plan_flags |= FFTW_UNALIGNED;
711 
712  if (*cur_plan_p)
713  fftwf_destroy_plan (*cur_plan_p);
714 
715  if (plan_destroys_in)
716  {
717  // Create matrix with the same size and 16-byte alignment as input
718  OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
719  itmp = reinterpret_cast<float *>
720  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
721  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
722 
723  *cur_plan_p =
724  fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
725  nullptr, stride, dist,
726  reinterpret_cast<fftwf_complex *> (out),
727  nullptr, stride, dist, plan_flags);
728  }
729  else
730  {
731  *cur_plan_p =
732  fftwf_plan_many_dft_r2c (rank, tmp, howmany,
733  (const_cast<float *> (in)),
734  nullptr, stride, dist,
735  reinterpret_cast<fftwf_complex *> (out),
736  nullptr, stride, dist, plan_flags);
737  }
738 
739  if (*cur_plan_p == nullptr)
740  (*current_liboctave_error_handler) ("Error creating fftw plan");
741  }
742 
743  return *cur_plan_p;
744  }
745 
748  {
749  return meth;
750  }
751 
754  {
755  FftwMethod ret = meth;
756  if (_meth == ESTIMATE || _meth == MEASURE
757  || _meth == PATIENT || _meth == EXHAUSTIVE
758  || _meth == HYBRID)
759  {
760  if (meth != _meth)
761  {
762  meth = _meth;
763  if (rplan)
764  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (rplan));
765  if (plan[0])
766  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[0]));
767  if (plan[1])
768  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[1]));
769  rplan = plan[0] = plan[1] = nullptr;
770  }
771  }
772  else
773  ret = UNKNOWN;
774  return ret;
775  }
776 
777  template <typename T>
778  static inline void
779  convert_packcomplex_1d (T *out, size_t nr, size_t nc,
780  octave_idx_type stride, octave_idx_type dist)
781  {
782  octave_quit ();
783 
784  // Fill in the missing data.
785 
786  for (size_t i = 0; i < nr; i++)
787  for (size_t j = nc/2+1; j < nc; j++)
788  out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
789 
790  octave_quit ();
791  }
792 
793  template <typename T>
794  static inline void
796  {
797  size_t nc = dv(0);
798  size_t nr = dv(1);
799  size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);
800  size_t nrp = nr * np;
801  T *ptr1, *ptr2;
802 
803  octave_quit ();
804 
805  // Create space for the missing elements.
806 
807  for (size_t i = 0; i < nrp; i++)
808  {
809  ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
810  ptr2 = out + i * nc;
811  for (size_t j = 0; j < nc/2+1; j++)
812  *ptr2++ = *ptr1++;
813  }
814 
815  octave_quit ();
816 
817  // Fill in the missing data for the rank = 2 case directly for speed.
818 
819  for (size_t i = 0; i < np; i++)
820  {
821  for (size_t j = 1; j < nr; j++)
822  for (size_t k = nc/2+1; k < nc; k++)
823  out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
824 
825  for (size_t j = nc/2+1; j < nc; j++)
826  out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
827  }
828 
829  octave_quit ();
830 
831  // Now do the permutations needed for rank > 2 cases.
832 
833  size_t jstart = dv(0) * dv(1);
834  size_t kstep = dv(0);
835  size_t nel = dv.numel ();
836 
837  for (int inner = 2; inner < dv.ndims (); inner++)
838  {
839  size_t jmax = jstart * dv(inner);
840  for (size_t i = 0; i < nel; i+=jmax)
841  for (size_t j = jstart, jj = jmax-jstart; j < jj;
842  j+=jstart, jj-=jstart)
843  for (size_t k = 0; k < jstart; k+= kstep)
844  for (size_t l = nc/2+1; l < nc; l++)
845  {
846  T tmp = out[i+ j + k + l];
847  out[i + j + k + l] = out[i + jj + k + l];
848  out[i + jj + k + l] = tmp;
849  }
850  jstart = jmax;
851  }
852 
853  octave_quit ();
854  }
855 
856  int
857  fftw::fft (const double *in, Complex *out, size_t npts,
858  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
859  {
860  dist = (dist < 0 ? npts : dist);
861 
862  dim_vector dv (npts, 1);
863  void *vplan = fftw_planner::create_plan (1, dv, nsamples,
864  stride, dist, in, out);
865  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
866 
867  fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
868  reinterpret_cast<fftw_complex *> (out));
869 
870  // Need to create other half of the transform.
871 
872  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
873 
874  return 0;
875  }
876 
877  int
878  fftw::fft (const Complex *in, Complex *out, size_t npts,
879  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
880  {
881  dist = (dist < 0 ? npts : dist);
882 
883  dim_vector dv (npts, 1);
884  void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
885  nsamples, stride,
886  dist, in, out);
887  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
888 
889  fftw_execute_dft (plan,
890  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
891  reinterpret_cast<fftw_complex *> (out));
892 
893  return 0;
894  }
895 
896  int
897  fftw::ifft (const Complex *in, Complex *out, size_t npts,
898  size_t nsamples, octave_idx_type stride,
899  octave_idx_type dist)
900  {
901  dist = (dist < 0 ? npts : dist);
902 
903  dim_vector dv (npts, 1);
904  void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,
905  stride, dist, in, out);
906  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
907 
908  fftw_execute_dft (plan,
909  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
910  reinterpret_cast<fftw_complex *> (out));
911 
912  const Complex scale = npts;
913  for (size_t j = 0; j < nsamples; j++)
914  for (size_t i = 0; i < npts; i++)
915  out[i*stride + j*dist] /= scale;
916 
917  return 0;
918  }
919 
920  int
921  fftw::fftNd (const double *in, Complex *out, const int rank,
922  const dim_vector& dv)
923  {
924  octave_idx_type dist = 1;
925  for (int i = 0; i < rank; i++)
926  dist *= dv(i);
927 
928  // Fool with the position of the start of the output matrix, so that
929  // creating other half of the matrix won't cause cache problems.
930 
931  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
932 
933  void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,
934  in, out + offset);
935  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
936 
937  fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
938  reinterpret_cast<fftw_complex *> (out+ offset));
939 
940  // Need to create other half of the transform.
941 
942  convert_packcomplex_Nd (out, dv);
943 
944  return 0;
945  }
946 
947  int
948  fftw::fftNd (const Complex *in, Complex *out, const int rank,
949  const dim_vector& dv)
950  {
951  octave_idx_type dist = 1;
952  for (int i = 0; i < rank; i++)
953  dist *= dv(i);
954 
955  void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
956  1, 1, dist, in, out);
957  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
958 
959  fftw_execute_dft (plan,
960  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
961  reinterpret_cast<fftw_complex *> (out));
962 
963  return 0;
964  }
965 
966  int
967  fftw::ifftNd (const Complex *in, Complex *out, const int rank,
968  const dim_vector& dv)
969  {
970  octave_idx_type dist = 1;
971  for (int i = 0; i < rank; i++)
972  dist *= dv(i);
973 
974  void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
975  1, 1, dist, in, out);
976  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
977 
978  fftw_execute_dft (plan,
979  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
980  reinterpret_cast<fftw_complex *> (out));
981 
982  const size_t npts = dv.numel ();
983  const Complex scale = npts;
984  for (size_t i = 0; i < npts; i++)
985  out[i] /= scale;
986 
987  return 0;
988  }
989 
990  int
991  fftw::fft (const float *in, FloatComplex *out, size_t npts,
992  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
993  {
994  dist = (dist < 0 ? npts : dist);
995 
996  dim_vector dv (npts, 1);
997  void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,
998  dist, in, out);
999  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1000 
1001  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
1002  reinterpret_cast<fftwf_complex *> (out));
1003 
1004  // Need to create other half of the transform.
1005 
1006  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
1007 
1008  return 0;
1009  }
1010 
1011  int
1012  fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts,
1013  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
1014  {
1015  dist = (dist < 0 ? npts : dist);
1016 
1017  dim_vector dv (npts, 1);
1018  void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
1019  nsamples, stride, dist,
1020  in, out);
1021  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1022 
1023  fftwf_execute_dft (plan,
1024  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1025  reinterpret_cast<fftwf_complex *> (out));
1026 
1027  return 0;
1028  }
1029 
1030  int
1031  fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts,
1032  size_t nsamples, octave_idx_type stride,
1033  octave_idx_type dist)
1034  {
1035  dist = (dist < 0 ? npts : dist);
1036 
1037  dim_vector dv (npts, 1);
1038  void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
1039  nsamples, stride, dist,
1040  in, out);
1041  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1042 
1043  fftwf_execute_dft (plan,
1044  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1045  reinterpret_cast<fftwf_complex *> (out));
1046 
1047  const FloatComplex scale = npts;
1048  for (size_t j = 0; j < nsamples; j++)
1049  for (size_t i = 0; i < npts; i++)
1050  out[i*stride + j*dist] /= scale;
1051 
1052  return 0;
1053  }
1054 
1055  int
1056  fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1057  const dim_vector& dv)
1058  {
1059  octave_idx_type dist = 1;
1060  for (int i = 0; i < rank; i++)
1061  dist *= dv(i);
1062 
1063  // Fool with the position of the start of the output matrix, so that
1064  // creating other half of the matrix won't cause cache problems.
1065 
1066  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1067 
1068  void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,
1069  in, out + offset);
1070  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1071 
1072  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
1073  reinterpret_cast<fftwf_complex *> (out+ offset));
1074 
1075  // Need to create other half of the transform.
1076 
1077  convert_packcomplex_Nd (out, dv);
1078 
1079  return 0;
1080  }
1081 
1082  int
1083  fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1084  const dim_vector& dv)
1085  {
1086  octave_idx_type dist = 1;
1087  for (int i = 0; i < rank; i++)
1088  dist *= dv(i);
1089 
1090  void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
1091  1, 1, dist, in, out);
1092  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1093 
1094  fftwf_execute_dft (plan,
1095  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1096  reinterpret_cast<fftwf_complex *> (out));
1097 
1098  return 0;
1099  }
1100 
1101  int
1102  fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1103  const dim_vector& dv)
1104  {
1105  octave_idx_type dist = 1;
1106  for (int i = 0; i < rank; i++)
1107  dist *= dv(i);
1108 
1109  void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
1110  1, 1, dist, in, out);
1111  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1112 
1113  fftwf_execute_dft (plan,
1114  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1115  reinterpret_cast<fftwf_complex *> (out));
1116 
1117  const size_t npts = dv.numel ();
1118  const FloatComplex scale = npts;
1119  for (size_t i = 0; i < npts; i++)
1120  out[i] /= scale;
1121 
1122  return 0;
1123  }
1124 
1125 #endif
1126 
1127  std::string
1129  {
1130 #if defined (HAVE_FFTW)
1132 #else
1133  return "none";
1134 #endif
1135  }
1136 
1137  std::string
1139  {
1140 #if defined (HAVE_FFTW)
1142 #else
1143  return "none";
1144 #endif
1145  }
1146 }
FftwMethod do_method(void)
Definition: oct-fftw.cc:747
dim_vector n[2]
Definition: oct-fftw.h:152
static void convert_packcomplex_Nd(T *out, const dim_vector &dv)
Definition: oct-fftw.cc:795
static int threads(void)
Definition: oct-fftw.h:106
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.h:68
static void convert_packcomplex_1d(T *out, size_t nr, size_t nc, octave_idx_type stride, octave_idx_type dist)
Definition: oct-fftw.cc:779
static bool instance_ok(void)
Definition: oct-fftw.cc:108
for large enough k
Definition: lu.cc:617
static F77_INT nn
Definition: DASPK.cc:62
static int ifft(const Complex *in, Complex *out, size_t npts, size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:897
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:921
octave_idx_type h[2]
Definition: oct-fftw.h:295
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.cc:148
ComplexColumnVector conj(const ComplexColumnVector &a)
Definition: CColVector.cc:215
static int fft(const double *in, Complex *out, size_t npts, size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:857
static void cleanup_instance(void)
Definition: oct-fftw.h:115
octave_idx_type rh
Definition: oct-fftw.h:316
octave_idx_type s[2]
Definition: oct-fftw.h:143
octave_idx_type d[2]
Definition: oct-fftw.h:286
static void add(fptr f)
octave_idx_type s[2]
Definition: oct-fftw.h:289
octave_idx_type rh
Definition: oct-fftw.h:170
std::string fftwf_version(void)
Definition: oct-fftw.cc:1138
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
Definition: nproc-wrapper.c:37
double tmp
Definition: data.cc:6252
is false
Definition: cellfun.cc:400
octave_value retval
Definition: data.cc:6246
std::string fftw_version(void)
Definition: oct-fftw.cc:1128
static fftw_planner * instance
Definition: oct-fftw.h:113
static float_fftw_planner * instance
Definition: oct-fftw.h:259
void * do_create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.cc:503
the exceeded dimensions are set to if fewer subscripts than dimensions are the exceeding dimensions are merged into the final requested dimension For consider the following dims
Definition: sub2ind.cc:255
static bool instance_ok(void)
Definition: oct-fftw.cc:466
octave_idx_type d[2]
Definition: oct-fftw.h:140
octave_idx_type rd
Definition: oct-fftw.h:161
octave_idx_type rd
Definition: oct-fftw.h:307
octave_idx_type h[2]
Definition: oct-fftw.h:149
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:362
static void * create_plan(int dir, const int rank, const dim_vector &dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.h:214
FftwMethod do_method(void)
Definition: oct-fftw.cc:391
static int threads(void)
Definition: oct-fftw.h:252
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:967
void scale(Matrix &m, double x, double y, double z)
Definition: graphics.cc:5442
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:41
#define CHECK_SIMD_ALIGNMENT(x)
Definition: oct-fftw.cc:144
for i
Definition: data.cc:5264
std::complex< float > FloatComplex
Definition: oct-cmplx.h:32
static void cleanup_instance(void)
Definition: oct-fftw.h:261
octave_idx_type ndims(void) const
Number of dimensions.
Definition: dim-vector.h:295
FftwMethod meth
Definition: oct-fftw.h:132
std::complex< double > Complex
Definition: oct-cmplx.h:31
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
octave_idx_type rs
Definition: oct-fftw.h:310
octave_idx_type rs
Definition: oct-fftw.h:164
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:888
dim_vector dv
Definition: sub2ind.cc:263