GNU Octave  4.2.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
oct-fftw.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2001-2017 John W. Eaton
4 
5 This file is part of Octave.
6 
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 #if defined (HAVE_CONFIG_H)
24 # include "config.h"
25 #endif
26 
27 #include <iostream>
28 #include <string>
29 #include <vector>
30 
31 #if defined (HAVE_FFTW3_H)
32 # include <fftw3.h>
33 #endif
34 
35 #include "lo-error.h"
36 #include "oct-fftw.h"
37 #include "quit.h"
38 #include "oct-locbuf.h"
39 #include "singleton-cleanup.h"
40 
41 #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)
42 # include "nproc-wrapper.h"
43 #endif
44 
45 #if defined (HAVE_FFTW)
46 
48 
49 // Helper class to create and cache FFTW plans for both 1D and
50 // 2D. This implementation defaults to using FFTW_ESTIMATE to create
51 // the plans, which in theory is suboptimal, but provides quite
52 // reasonable performance in practice.
53 
54 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
55 // will destroy the input and output arrays. We must, therefore, create a
56 // temporary input array with the same size and 16-byte alignment as
57 // the original array when using a different planner strategy.
58 // Note that we also use any wisdom that is available, either in a
59 // FFTW3 system wide file or as supplied by the user.
60 
61 // FIXME: if we can ensure 16 byte alignment in Array<T>
62 // (<T> *data) the FFTW3 can use SIMD instructions for further
63 // acceleration.
64 
65 // Note that it is profitable to store the FFTW3 plans, for small FFTs.
66 
68  : meth (ESTIMATE), rplan (0), rd (0), rs (0), rr (0), rh (0), rn (),
69  rsimd_align (false), nthreads (1)
70 {
71  plan[0] = plan[1] = 0;
72  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
73  simd_align[0] = simd_align[1] = false;
74  inplace[0] = inplace[1] = false;
75  n[0] = n[1] = dim_vector ();
76 
77 #if defined (HAVE_FFTW3_THREADS)
78  int init_ret = fftw_init_threads ();
79  if (! init_ret)
80  (*current_liboctave_error_handler) ("Error initializing FFTW threads");
81 
82  // Use number of processors available to the current process
83  // This can be later changed with fftw ("threads", nthreads).
85  fftw_plan_with_nthreads (nthreads);
86 #endif
87 
88  // If we have a system wide wisdom file, import it.
89  fftw_import_system_wisdom ();
90 }
91 
93 {
94  fftw_plan *plan_p;
95 
96  plan_p = reinterpret_cast<fftw_plan *> (&rplan);
97  if (*plan_p)
98  fftw_destroy_plan (*plan_p);
99 
100  plan_p = reinterpret_cast<fftw_plan *> (&plan[0]);
101  if (*plan_p)
102  fftw_destroy_plan (*plan_p);
103 
104  plan_p = reinterpret_cast<fftw_plan *> (&plan[1]);
105  if (*plan_p)
106  fftw_destroy_plan (*plan_p);
107 }
108 
109 bool
111 {
112  bool retval = true;
113 
114  if (! instance)
115  {
116  instance = new octave_fftw_planner ();
117 
118  if (instance)
120  }
121 
122  if (! instance)
123  (*current_liboctave_error_handler)
124  ("unable to create octave_fftw_planner object!");
125 
126  return retval;
127 }
128 
129 void
131 {
132 #if defined (HAVE_FFTW3_THREADS)
133  if (instance_ok () && nt != threads ())
134  {
135  instance->nthreads = nt;
136  fftw_plan_with_nthreads (nt);
137  // Clear the current plans.
138  instance->rplan = instance->plan[0] = instance->plan[1] = 0;
139  }
140 #else
141  (*current_liboctave_warning_handler)
142  ("unable to change number of threads without FFTW thread support");
143 #endif
144 }
145 
146 #define CHECK_SIMD_ALIGNMENT(x) \
147  (((reinterpret_cast<ptrdiff_t> (x)) & 0xF) == 0)
148 
149 void *
150 octave_fftw_planner::do_create_plan (int dir, const int rank,
151  const dim_vector dims,
152  octave_idx_type howmany,
153  octave_idx_type stride,
154  octave_idx_type dist,
155  const Complex *in, Complex *out)
156 {
157  int which = (dir == FFTW_FORWARD) ? 0 : 1;
158  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&plan[which]);
159  bool create_new_plan = false;
160  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
161  bool ioinplace = (in == out);
162 
163  // Don't create a new plan if we have a non SIMD plan already but
164  // can do SIMD. This prevents endlessly recreating plans if we
165  // change the alignment.
166 
167  if (plan[which] == 0 || d[which] != dist || s[which] != stride
168  || r[which] != rank || h[which] != howmany
169  || ioinplace != inplace[which]
170  || ((ioalign != simd_align[which]) ? ! ioalign : false))
171  create_new_plan = true;
172  else
173  {
174  // We still might not have the same shape of array.
175 
176  for (int i = 0; i < rank; i++)
177  if (dims(i) != n[which](i))
178  {
179  create_new_plan = true;
180  break;
181  }
182  }
183 
184  if (create_new_plan)
185  {
186  d[which] = dist;
187  s[which] = stride;
188  r[which] = rank;
189  h[which] = howmany;
190  simd_align[which] = ioalign;
191  inplace[which] = ioinplace;
192  n[which] = dims;
193 
194  // Note reversal of dimensions for column major storage in FFTW.
195  octave_idx_type nn = 1;
196  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
197 
198  for (int i = 0, j = rank-1; i < rank; i++, j--)
199  {
200  tmp[i] = dims(j);
201  nn *= dims(j);
202  }
203 
204  int plan_flags = 0;
205  bool plan_destroys_in = true;
206 
207  switch (meth)
208  {
209  case UNKNOWN:
210  case ESTIMATE:
211  plan_flags |= FFTW_ESTIMATE;
212  plan_destroys_in = false;
213  break;
214  case MEASURE:
215  plan_flags |= FFTW_MEASURE;
216  break;
217  case PATIENT:
218  plan_flags |= FFTW_PATIENT;
219  break;
220  case EXHAUSTIVE:
221  plan_flags |= FFTW_EXHAUSTIVE;
222  break;
223  case HYBRID:
224  if (nn < 8193)
225  plan_flags |= FFTW_MEASURE;
226  else
227  {
228  plan_flags |= FFTW_ESTIMATE;
229  plan_destroys_in = false;
230  }
231  break;
232  }
233 
234  if (ioalign)
235  plan_flags &= ~FFTW_UNALIGNED;
236  else
237  plan_flags |= FFTW_UNALIGNED;
238 
239  if (*cur_plan_p)
240  fftw_destroy_plan (*cur_plan_p);
241 
242  if (plan_destroys_in)
243  {
244  // Create matrix with the same size and 16-byte alignment as input
245  OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
246  itmp = reinterpret_cast<Complex *>
247  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
248  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
249 
250  *cur_plan_p =
251  fftw_plan_many_dft (rank, tmp, howmany,
252  reinterpret_cast<fftw_complex *> (itmp),
253  0, stride, dist,
254  reinterpret_cast<fftw_complex *> (out),
255  0, stride, dist, dir, plan_flags);
256  }
257  else
258  {
259  *cur_plan_p =
260  fftw_plan_many_dft (rank, tmp, howmany,
261  reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
262  0, stride, dist,
263  reinterpret_cast<fftw_complex *> (out),
264  0, stride, dist, dir, plan_flags);
265  }
266 
267  if (*cur_plan_p == 0)
268  (*current_liboctave_error_handler) ("Error creating fftw plan");
269  }
270 
271  return *cur_plan_p;
272 }
273 
274 void *
276  octave_idx_type howmany,
277  octave_idx_type stride,
278  octave_idx_type dist,
279  const double *in, Complex *out)
280 {
281  fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&rplan);
282  bool create_new_plan = false;
283  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
284 
285  // Don't create a new plan if we have a non SIMD plan already but
286  // can do SIMD. This prevents endlessly recreating plans if we
287  // change the alignment.
288 
289  if (rplan == 0 || rd != dist || rs != stride || rr != rank
290  || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))
291  create_new_plan = true;
292  else
293  {
294  // We still might not have the same shape of array.
295 
296  for (int i = 0; i < rank; i++)
297  if (dims(i) != rn(i))
298  {
299  create_new_plan = true;
300  break;
301  }
302  }
303 
304  if (create_new_plan)
305  {
306  rd = dist;
307  rs = stride;
308  rr = rank;
309  rh = howmany;
310  rsimd_align = ioalign;
311  rn = dims;
312 
313  // Note reversal of dimensions for column major storage in FFTW.
314  octave_idx_type nn = 1;
315  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
316 
317  for (int i = 0, j = rank-1; i < rank; i++, j--)
318  {
319  tmp[i] = dims(j);
320  nn *= dims(j);
321  }
322 
323  int plan_flags = 0;
324  bool plan_destroys_in = true;
325 
326  switch (meth)
327  {
328  case UNKNOWN:
329  case ESTIMATE:
330  plan_flags |= FFTW_ESTIMATE;
331  plan_destroys_in = false;
332  break;
333  case MEASURE:
334  plan_flags |= FFTW_MEASURE;
335  break;
336  case PATIENT:
337  plan_flags |= FFTW_PATIENT;
338  break;
339  case EXHAUSTIVE:
340  plan_flags |= FFTW_EXHAUSTIVE;
341  break;
342  case HYBRID:
343  if (nn < 8193)
344  plan_flags |= FFTW_MEASURE;
345  else
346  {
347  plan_flags |= FFTW_ESTIMATE;
348  plan_destroys_in = false;
349  }
350  break;
351  }
352 
353  if (ioalign)
354  plan_flags &= ~FFTW_UNALIGNED;
355  else
356  plan_flags |= FFTW_UNALIGNED;
357 
358  if (*cur_plan_p)
359  fftw_destroy_plan (*cur_plan_p);
360 
361  if (plan_destroys_in)
362  {
363  // Create matrix with the same size and 16-byte alignment as input
364  OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
365  itmp = reinterpret_cast<double *>
366  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
367  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
368 
369  *cur_plan_p =
370  fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
371  0, stride, dist,
372  reinterpret_cast<fftw_complex *> (out),
373  0, stride, dist, plan_flags);
374  }
375  else
376  {
377  *cur_plan_p =
378  fftw_plan_many_dft_r2c (rank, tmp, howmany,
379  (const_cast<double *> (in)),
380  0, stride, dist,
381  reinterpret_cast<fftw_complex *> (out),
382  0, stride, dist, plan_flags);
383  }
384 
385  if (*cur_plan_p == 0)
386  (*current_liboctave_error_handler) ("Error creating fftw plan");
387  }
388 
389  return *cur_plan_p;
390 }
391 
394 {
395  return meth;
396 }
397 
400 {
401  FftwMethod ret = meth;
402  if (_meth == ESTIMATE || _meth == MEASURE
403  || _meth == PATIENT || _meth == EXHAUSTIVE
404  || _meth == HYBRID)
405  {
406  if (meth != _meth)
407  {
408  meth = _meth;
409  if (rplan)
410  fftw_destroy_plan (reinterpret_cast<fftw_plan> (rplan));
411  if (plan[0])
412  fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[0]));
413  if (plan[1])
414  fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[1]));
415  rplan = plan[0] = plan[1] = 0;
416  }
417  }
418  else
419  ret = UNKNOWN;
420  return ret;
421 }
422 
424 
426  : meth (ESTIMATE), rplan (0), rd (0), rs (0), rr (0), rh (0), rn (),
427  rsimd_align (false), nthreads (1)
428 {
429  plan[0] = plan[1] = 0;
430  d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
431  simd_align[0] = simd_align[1] = false;
432  inplace[0] = inplace[1] = false;
433  n[0] = n[1] = dim_vector ();
434 
435 #if defined (HAVE_FFTW3F_THREADS)
436  int init_ret = fftwf_init_threads ();
437  if (! init_ret)
438  (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
439 
440  // Use number of processors available to the current process
441  // This can be later changed with fftw ("threads", nthreads).
443  fftwf_plan_with_nthreads (nthreads);
444 #endif
445 
446  // If we have a system wide wisdom file, import it.
447  fftwf_import_system_wisdom ();
448 }
449 
451 {
452  fftwf_plan *plan_p;
453 
454  plan_p = reinterpret_cast<fftwf_plan *> (&rplan);
455  if (*plan_p)
456  fftwf_destroy_plan (*plan_p);
457 
458  plan_p = reinterpret_cast<fftwf_plan *> (&plan[0]);
459  if (*plan_p)
460  fftwf_destroy_plan (*plan_p);
461 
462  plan_p = reinterpret_cast<fftwf_plan *> (&plan[1]);
463  if (*plan_p)
464  fftwf_destroy_plan (*plan_p);
465 }
466 
467 bool
469 {
470  bool retval = true;
471 
472  if (! instance)
473  {
475 
476  if (instance)
478  }
479 
480  if (! instance)
481  (*current_liboctave_error_handler)
482  ("unable to create octave_fftw_planner object!");
483 
484  return retval;
485 }
486 
487 void
489 {
490 #if defined (HAVE_FFTW3F_THREADS)
491  if (instance_ok () && nt != threads ())
492  {
493  instance->nthreads = nt;
494  fftwf_plan_with_nthreads (nt);
495  // Clear the current plans.
496  instance->rplan = instance->plan[0] = instance->plan[1] = 0;
497  }
498 #else
499  (*current_liboctave_warning_handler)
500  ("unable to change number of threads without FFTW thread support");
501 #endif
502 }
503 
504 void *
506  const dim_vector dims,
507  octave_idx_type howmany,
508  octave_idx_type stride,
509  octave_idx_type dist,
510  const FloatComplex *in,
511  FloatComplex *out)
512 {
513  int which = (dir == FFTW_FORWARD) ? 0 : 1;
514  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&plan[which]);
515  bool create_new_plan = false;
516  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
517  bool ioinplace = (in == out);
518 
519  // Don't create a new plan if we have a non SIMD plan already but
520  // can do SIMD. This prevents endlessly recreating plans if we
521  // change the alignment.
522 
523  if (plan[which] == 0 || d[which] != dist || s[which] != stride
524  || r[which] != rank || h[which] != howmany
525  || ioinplace != inplace[which]
526  || ((ioalign != simd_align[which]) ? ! ioalign : false))
527  create_new_plan = true;
528  else
529  {
530  // We still might not have the same shape of array.
531 
532  for (int i = 0; i < rank; i++)
533  if (dims(i) != n[which](i))
534  {
535  create_new_plan = true;
536  break;
537  }
538  }
539 
540  if (create_new_plan)
541  {
542  d[which] = dist;
543  s[which] = stride;
544  r[which] = rank;
545  h[which] = howmany;
546  simd_align[which] = ioalign;
547  inplace[which] = ioinplace;
548  n[which] = dims;
549 
550  // Note reversal of dimensions for column major storage in FFTW.
551  octave_idx_type nn = 1;
552  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
553 
554  for (int i = 0, j = rank-1; i < rank; i++, j--)
555  {
556  tmp[i] = dims(j);
557  nn *= dims(j);
558  }
559 
560  int plan_flags = 0;
561  bool plan_destroys_in = true;
562 
563  switch (meth)
564  {
565  case UNKNOWN:
566  case ESTIMATE:
567  plan_flags |= FFTW_ESTIMATE;
568  plan_destroys_in = false;
569  break;
570  case MEASURE:
571  plan_flags |= FFTW_MEASURE;
572  break;
573  case PATIENT:
574  plan_flags |= FFTW_PATIENT;
575  break;
576  case EXHAUSTIVE:
577  plan_flags |= FFTW_EXHAUSTIVE;
578  break;
579  case HYBRID:
580  if (nn < 8193)
581  plan_flags |= FFTW_MEASURE;
582  else
583  {
584  plan_flags |= FFTW_ESTIMATE;
585  plan_destroys_in = false;
586  }
587  break;
588  }
589 
590  if (ioalign)
591  plan_flags &= ~FFTW_UNALIGNED;
592  else
593  plan_flags |= FFTW_UNALIGNED;
594 
595  if (*cur_plan_p)
596  fftwf_destroy_plan (*cur_plan_p);
597 
598  if (plan_destroys_in)
599  {
600  // Create matrix with the same size and 16-byte alignment as input
601  OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
602  itmp = reinterpret_cast<FloatComplex *>
603  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
604  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
605 
606  *cur_plan_p =
607  fftwf_plan_many_dft (rank, tmp, howmany,
608  reinterpret_cast<fftwf_complex *> (itmp),
609  0, stride, dist,
610  reinterpret_cast<fftwf_complex *> (out),
611  0, stride, dist, dir, plan_flags);
612  }
613  else
614  {
615  *cur_plan_p =
616  fftwf_plan_many_dft (rank, tmp, howmany,
617  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
618  0, stride, dist,
619  reinterpret_cast<fftwf_complex *> (out),
620  0, stride, dist, dir, plan_flags);
621  }
622 
623  if (*cur_plan_p == 0)
624  (*current_liboctave_error_handler) ("Error creating fftw plan");
625  }
626 
627  return *cur_plan_p;
628 }
629 
630 void *
632  const dim_vector dims,
633  octave_idx_type howmany,
634  octave_idx_type stride,
635  octave_idx_type dist,
636  const float *in, FloatComplex *out)
637 {
638  fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&rplan);
639  bool create_new_plan = false;
640  bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
641 
642  // Don't create a new plan if we have a non SIMD plan already but
643  // can do SIMD. This prevents endlessly recreating plans if we
644  // change the alignment.
645 
646  if (rplan == 0 || rd != dist || rs != stride || rr != rank
647  || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))
648  create_new_plan = true;
649  else
650  {
651  // We still might not have the same shape of array.
652 
653  for (int i = 0; i < rank; i++)
654  if (dims(i) != rn(i))
655  {
656  create_new_plan = true;
657  break;
658  }
659  }
660 
661  if (create_new_plan)
662  {
663  rd = dist;
664  rs = stride;
665  rr = rank;
666  rh = howmany;
667  rsimd_align = ioalign;
668  rn = dims;
669 
670  // Note reversal of dimensions for column major storage in FFTW.
671  octave_idx_type nn = 1;
672  OCTAVE_LOCAL_BUFFER (int, tmp, rank);
673 
674  for (int i = 0, j = rank-1; i < rank; i++, j--)
675  {
676  tmp[i] = dims(j);
677  nn *= dims(j);
678  }
679 
680  int plan_flags = 0;
681  bool plan_destroys_in = true;
682 
683  switch (meth)
684  {
685  case UNKNOWN:
686  case ESTIMATE:
687  plan_flags |= FFTW_ESTIMATE;
688  plan_destroys_in = false;
689  break;
690  case MEASURE:
691  plan_flags |= FFTW_MEASURE;
692  break;
693  case PATIENT:
694  plan_flags |= FFTW_PATIENT;
695  break;
696  case EXHAUSTIVE:
697  plan_flags |= FFTW_EXHAUSTIVE;
698  break;
699  case HYBRID:
700  if (nn < 8193)
701  plan_flags |= FFTW_MEASURE;
702  else
703  {
704  plan_flags |= FFTW_ESTIMATE;
705  plan_destroys_in = false;
706  }
707  break;
708  }
709 
710  if (ioalign)
711  plan_flags &= ~FFTW_UNALIGNED;
712  else
713  plan_flags |= FFTW_UNALIGNED;
714 
715  if (*cur_plan_p)
716  fftwf_destroy_plan (*cur_plan_p);
717 
718  if (plan_destroys_in)
719  {
720  // Create matrix with the same size and 16-byte alignment as input
721  OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
722  itmp = reinterpret_cast<float *>
723  (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) +
724  ((reinterpret_cast<ptrdiff_t> (in)) & 0xF));
725 
726  *cur_plan_p =
727  fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
728  0, stride, dist,
729  reinterpret_cast<fftwf_complex *> (out),
730  0, stride, dist, plan_flags);
731  }
732  else
733  {
734  *cur_plan_p =
735  fftwf_plan_many_dft_r2c (rank, tmp, howmany,
736  (const_cast<float *> (in)),
737  0, stride, dist,
738  reinterpret_cast<fftwf_complex *> (out),
739  0, stride, dist, plan_flags);
740  }
741 
742  if (*cur_plan_p == 0)
743  (*current_liboctave_error_handler) ("Error creating fftw plan");
744  }
745 
746  return *cur_plan_p;
747 }
748 
751 {
752  return meth;
753 }
754 
757 {
758  FftwMethod ret = meth;
759  if (_meth == ESTIMATE || _meth == MEASURE
760  || _meth == PATIENT || _meth == EXHAUSTIVE
761  || _meth == HYBRID)
762  {
763  if (meth != _meth)
764  {
765  meth = _meth;
766  if (rplan)
767  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (rplan));
768  if (plan[0])
769  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[0]));
770  if (plan[1])
771  fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[1]));
772  rplan = plan[0] = plan[1] = 0;
773  }
774  }
775  else
776  ret = UNKNOWN;
777  return ret;
778 }
779 
780 template <typename T>
781 static inline void
782 convert_packcomplex_1d (T *out, size_t nr, size_t nc,
783  octave_idx_type stride, octave_idx_type dist)
784 {
785  octave_quit ();
786 
787  // Fill in the missing data.
788 
789  for (size_t i = 0; i < nr; i++)
790  for (size_t j = nc/2+1; j < nc; j++)
791  out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
792 
793  octave_quit ();
794 }
795 
796 template <typename T>
797 static inline void
799 {
800  size_t nc = dv(0);
801  size_t nr = dv(1);
802  size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);
803  size_t nrp = nr * np;
804  T *ptr1, *ptr2;
805 
806  octave_quit ();
807 
808  // Create space for the missing elements.
809 
810  for (size_t i = 0; i < nrp; i++)
811  {
812  ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
813  ptr2 = out + i * nc;
814  for (size_t j = 0; j < nc/2+1; j++)
815  *ptr2++ = *ptr1++;
816  }
817 
818  octave_quit ();
819 
820  // Fill in the missing data for the rank = 2 case directly for speed.
821 
822  for (size_t i = 0; i < np; i++)
823  {
824  for (size_t j = 1; j < nr; j++)
825  for (size_t k = nc/2+1; k < nc; k++)
826  out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
827 
828  for (size_t j = nc/2+1; j < nc; j++)
829  out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
830  }
831 
832  octave_quit ();
833 
834  // Now do the permutations needed for rank > 2 cases.
835 
836  size_t jstart = dv(0) * dv(1);
837  size_t kstep = dv(0);
838  size_t nel = dv.numel ();
839 
840  for (int inner = 2; inner < dv.ndims (); inner++)
841  {
842  size_t jmax = jstart * dv(inner);
843  for (size_t i = 0; i < nel; i+=jmax)
844  for (size_t j = jstart, jj = jmax-jstart; j < jj;
845  j+=jstart, jj-=jstart)
846  for (size_t k = 0; k < jstart; k+= kstep)
847  for (size_t l = nc/2+1; l < nc; l++)
848  {
849  T tmp = out[i+ j + k + l];
850  out[i + j + k + l] = out[i + jj + k + l];
851  out[i + jj + k + l] = tmp;
852  }
853  jstart = jmax;
854  }
855 
856  octave_quit ();
857 }
858 
859 int
860 octave_fftw::fft (const double *in, Complex *out, size_t npts,
861  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
862 {
863  dist = (dist < 0 ? npts : dist);
864 
865  dim_vector dv (npts, 1);
866  void *vplan = octave_fftw_planner::create_plan (1, dv, nsamples,
867  stride, dist, in, out);
868  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
869 
870  fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
871  reinterpret_cast<fftw_complex *> (out));
872 
873  // Need to create other half of the transform.
874 
875  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
876 
877  return 0;
878 }
879 
880 int
881 octave_fftw::fft (const Complex *in, Complex *out, size_t npts,
882  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
883 {
884  dist = (dist < 0 ? npts : dist);
885 
886  dim_vector dv (npts, 1);
887  void *vplan = octave_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
888  nsamples, stride,
889  dist, in, out);
890  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
891 
892  fftw_execute_dft (plan,
893  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
894  reinterpret_cast<fftw_complex *> (out));
895 
896  return 0;
897 }
898 
899 int
900 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts,
901  size_t nsamples, octave_idx_type stride,
902  octave_idx_type dist)
903 {
904  dist = (dist < 0 ? npts : dist);
905 
906  dim_vector dv (npts, 1);
907  void *vplan = octave_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
908  nsamples, stride,
909  dist, in, out);
910  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
911 
912  fftw_execute_dft (plan,
913  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
914  reinterpret_cast<fftw_complex *> (out));
915 
916  const Complex scale = npts;
917  for (size_t j = 0; j < nsamples; j++)
918  for (size_t i = 0; i < npts; i++)
919  out[i*stride + j*dist] /= scale;
920 
921  return 0;
922 }
923 
924 int
925 octave_fftw::fftNd (const double *in, Complex *out, const int rank,
926  const dim_vector &dv)
927 {
928  octave_idx_type dist = 1;
929  for (int i = 0; i < rank; i++)
930  dist *= dv(i);
931 
932  // Fool with the position of the start of the output matrix, so that
933  // creating other half of the matrix won't cause cache problems.
934 
935  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
936 
937  void *vplan = octave_fftw_planner::create_plan (rank, dv, 1, 1, dist,
938  in, out + offset);
939  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
940 
941  fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
942  reinterpret_cast<fftw_complex *> (out+ offset));
943 
944  // Need to create other half of the transform.
945 
946  convert_packcomplex_Nd (out, dv);
947 
948  return 0;
949 }
950 
951 int
952 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank,
953  const dim_vector &dv)
954 {
955  octave_idx_type dist = 1;
956  for (int i = 0; i < rank; i++)
957  dist *= dv(i);
958 
959  void *vplan = octave_fftw_planner::create_plan (FFTW_FORWARD, rank,
960  dv, 1, 1, dist, in, out);
961  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
962 
963  fftw_execute_dft (plan,
964  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
965  reinterpret_cast<fftw_complex *> (out));
966 
967  return 0;
968 }
969 
970 int
971 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank,
972  const dim_vector &dv)
973 {
974  octave_idx_type dist = 1;
975  for (int i = 0; i < rank; i++)
976  dist *= dv(i);
977 
978  void *vplan = octave_fftw_planner::create_plan (FFTW_BACKWARD, rank,
979  dv, 1, 1, dist, in, out);
980  fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
981 
982  fftw_execute_dft (plan,
983  reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
984  reinterpret_cast<fftw_complex *> (out));
985 
986  const size_t npts = dv.numel ();
987  const Complex scale = npts;
988  for (size_t i = 0; i < npts; i++)
989  out[i] /= scale;
990 
991  return 0;
992 }
993 
994 int
995 octave_fftw::fft (const float *in, FloatComplex *out, size_t npts,
996  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
997 {
998  dist = (dist < 0 ? npts : dist);
999 
1000  dim_vector dv (npts, 1);
1001  void *vplan = octave_float_fftw_planner::create_plan (1, dv, nsamples,
1002  stride, dist,
1003  in, out);
1004  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1005 
1006  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
1007  reinterpret_cast<fftwf_complex *> (out));
1008 
1009  // Need to create other half of the transform.
1010 
1011  convert_packcomplex_1d (out, nsamples, npts, stride, dist);
1012 
1013  return 0;
1014 }
1015 
1016 int
1017 octave_fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts,
1018  size_t nsamples, octave_idx_type stride, octave_idx_type dist)
1019 {
1020  dist = (dist < 0 ? npts : dist);
1021 
1022  dim_vector dv (npts, 1);
1023  void *vplan = octave_float_fftw_planner::create_plan (FFTW_FORWARD, 1,
1024  dv, nsamples,
1025  stride, dist,
1026  in, out);
1027  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1028 
1029  fftwf_execute_dft (plan,
1030  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1031  reinterpret_cast<fftwf_complex *> (out));
1032 
1033  return 0;
1034 }
1035 
1036 int
1037 octave_fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts,
1038  size_t nsamples, octave_idx_type stride,
1039  octave_idx_type dist)
1040 {
1041  dist = (dist < 0 ? npts : dist);
1042 
1043  dim_vector dv (npts, 1);
1044  void *vplan = octave_float_fftw_planner::create_plan (FFTW_BACKWARD, 1,
1045  dv, nsamples,
1046  stride, dist,
1047  in, out);
1048  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1049 
1050  fftwf_execute_dft (plan,
1051  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1052  reinterpret_cast<fftwf_complex *> (out));
1053 
1054  const FloatComplex scale = npts;
1055  for (size_t j = 0; j < nsamples; j++)
1056  for (size_t i = 0; i < npts; i++)
1057  out[i*stride + j*dist] /= scale;
1058 
1059  return 0;
1060 }
1061 
1062 int
1063 octave_fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1064  const dim_vector &dv)
1065 {
1066  octave_idx_type dist = 1;
1067  for (int i = 0; i < rank; i++)
1068  dist *= dv(i);
1069 
1070  // Fool with the position of the start of the output matrix, so that
1071  // creating other half of the matrix won't cause cache problems.
1072 
1073  octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1074 
1075  void *vplan = octave_float_fftw_planner::create_plan (rank, dv, 1, 1,
1076  dist, in,
1077  out + offset);
1078  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1079 
1080  fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
1081  reinterpret_cast<fftwf_complex *> (out+ offset));
1082 
1083  // Need to create other half of the transform.
1084 
1085  convert_packcomplex_Nd (out, dv);
1086 
1087  return 0;
1088 }
1089 
1090 int
1091 octave_fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1092  const dim_vector &dv)
1093 {
1094  octave_idx_type dist = 1;
1095  for (int i = 0; i < rank; i++)
1096  dist *= dv(i);
1097 
1098  void *vplan = octave_float_fftw_planner::create_plan (FFTW_FORWARD,
1099  rank, dv, 1, 1,
1100  dist, in, out);
1101  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1102 
1103  fftwf_execute_dft (plan,
1104  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1105  reinterpret_cast<fftwf_complex *> (out));
1106 
1107  return 0;
1108 }
1109 
1110 int
1111 octave_fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1112  const dim_vector &dv)
1113 {
1114  octave_idx_type dist = 1;
1115  for (int i = 0; i < rank; i++)
1116  dist *= dv(i);
1117 
1118  void *vplan = octave_float_fftw_planner::create_plan (FFTW_BACKWARD,
1119  rank, dv, 1, 1,
1120  dist, in, out);
1121  fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1122 
1123  fftwf_execute_dft (plan,
1124  reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1125  reinterpret_cast<fftwf_complex *> (out));
1126 
1127  const size_t npts = dv.numel ();
1128  const FloatComplex scale = npts;
1129  for (size_t i = 0; i < npts; i++)
1130  out[i] /= scale;
1131 
1132  return 0;
1133 }
1134 
1135 #endif
1136 
1139 {
1140 #if defined (HAVE_FFTW)
1141  return fftw_version;
1142 #else
1143  return "none";
1144 #endif
1145 }
1146 
1149 {
1150 #if defined (HAVE_FFTW)
1151  return fftwf_version;
1152 #else
1153  return "none";
1154 #endif
1155 }
static octave_fftw_planner * instance
Definition: oct-fftw.h:109
static bool instance_ok(void)
Definition: oct-fftw.cc:110
octave_idx_type d[2]
Definition: oct-fftw.h:281
static void convert_packcomplex_1d(T *out, size_t nr, size_t nc, octave_idx_type stride, octave_idx_type dist)
Definition: oct-fftw.cc:782
static int fft(const double *in, Complex *out, size_t npts, size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:860
static octave_idx_type nn
Definition: DASPK.cc:71
bool simd_align[2]
Definition: oct-fftw.h:150
octave_idx_type s[2]
Definition: oct-fftw.h:284
octave_fftw_planner(void)
Definition: oct-fftw.cc:67
for large enough k
Definition: lu.cc:606
static octave_float_fftw_planner * instance
Definition: oct-fftw.h:254
void * do_create_plan(int dir, const int rank, const dim_vector dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.cc:150
static int threads(void)
Definition: oct-fftw.h:241
octave_idx_type rs
Definition: oct-fftw.h:160
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:389
static int fftNd(const double *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:925
octave_idx_type s[2]
Definition: oct-fftw.h:139
octave_idx_type h[2]
Definition: oct-fftw.h:290
static void * create_plan(int dir, const int rank, const dim_vector dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const Complex *in, Complex *out)
Definition: oct-fftw.h:58
octave_idx_type rh
Definition: oct-fftw.h:166
ComplexColumnVector conj(const ComplexColumnVector &a)
Definition: CColVector.cc:216
FftwMethod do_method(void)
Definition: oct-fftw.cc:393
std::string octave_fftwf_version(void)
Definition: oct-fftw.cc:1148
static void add(fptr f)
void * plan[2]
Definition: oct-fftw.h:133
static void convert_packcomplex_Nd(T *out, const dim_vector &dv)
Definition: oct-fftw.cc:798
static int ifft(const Complex *in, Complex *out, size_t npts, size_t nsamples=1, octave_idx_type stride=1, octave_idx_type dist=-1)
Definition: oct-fftw.cc:900
static void * create_plan(int dir, const int rank, const dim_vector dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.h:203
unsigned long int octave_num_processors_wrapper(enum octave_nproc_query octave_query)
Definition: nproc-wrapper.c:37
double tmp
Definition: data.cc:6300
is false
Definition: cellfun.cc:398
octave_value retval
Definition: data.cc:6294
static int threads(void)
Definition: oct-fftw.h:96
the exceeded dimensions are set to if fewer subscripts than dimensions are the exceeding dimensions are merged into the final requested dimension For consider the following dims
Definition: sub2ind.cc:255
octave_idx_type rs
Definition: oct-fftw.h:305
static int ifftNd(const Complex *, Complex *, const int, const dim_vector &)
Definition: oct-fftw.cc:971
octave_idx_type rd
Definition: oct-fftw.h:157
FftwMethod meth
Definition: oct-fftw.h:128
octave_idx_type d[2]
Definition: oct-fftw.h:136
std::string octave_fftw_version(void)
Definition: oct-fftw.cc:1138
dim_vector rn
Definition: oct-fftw.h:169
=val(i)}if ode{val(i)}occurs in table i
Definition: lookup.cc:239
octave_idx_type h[2]
Definition: oct-fftw.h:145
void * do_create_plan(int dir, const int rank, const dim_vector dims, octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, const FloatComplex *in, FloatComplex *out)
Definition: oct-fftw.cc:505
static bool instance_ok(void)
Definition: oct-fftw.cc:468
octave_idx_type ndims(void) const
Number of dimensions.
Definition: dim-vector.h:301
void scale(Matrix &m, double x, double y, double z)
Definition: graphics.cc:5150
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:200
#define CHECK_SIMD_ALIGNMENT(x)
Definition: oct-fftw.cc:146
std::complex< float > FloatComplex
Definition: oct-cmplx.h:32
std::complex< double > Complex
Definition: oct-cmplx.h:31
~octave_fftw_planner(void)
Definition: oct-fftw.cc:92
FftwMethod do_method(void)
Definition: oct-fftw.cc:750
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:854
octave_idx_type rh
Definition: oct-fftw.h:311
dim_vector dv
Definition: sub2ind.cc:263
static void cleanup_instance(void)
Definition: oct-fftw.h:256
octave_idx_type rd
Definition: oct-fftw.h:302
dim_vector n[2]
Definition: oct-fftw.h:148
static void cleanup_instance(void)
Definition: oct-fftw.h:111