GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
mx-inlines.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 1993-2013 John W. Eaton
4 Copyright (C) 2009 Jaroslav Hajek
5 Copyright (C) 2009 VZLU Prague
6 
7 This file is part of Octave.
8 
9 Octave is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 Octave is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Octave; see the file COPYING. If not, see
21 <http://www.gnu.org/licenses/>.
22 
23 */
24 
25 #if !defined (octave_mx_inlines_h)
26 #define octave_mx_inlines_h 1
27 
28 #include <cstddef>
29 #include <cmath>
30 #include <memory>
31 
32 #include "quit.h"
33 
34 #include "oct-cmplx.h"
35 #include "oct-locbuf.h"
36 #include "oct-inttypes.h"
37 #include "Array.h"
38 #include "Array-util.h"
39 
40 #include "bsxfun.h"
41 
42 // Provides some commonly repeated, basic loop templates.
43 
44 template <class R, class S>
45 inline void mx_inline_fill (size_t n, R *r, S s) throw ()
46 { for (size_t i = 0; i < n; i++) r[i] = s; }
47 
48 #define DEFMXUNOP(F, OP) \
49 template <class R, class X> \
50 inline void F (size_t n, R *r, const X *x) throw () \
51 { for (size_t i = 0; i < n; i++) r[i] = OP x[i]; }
52 
54 
55 #define DEFMXUNOPEQ(F, OP) \
56 template <class R> \
57 inline void F (size_t n, R *r) throw () \
58 { for (size_t i = 0; i < n; i++) r[i] = OP r[i]; }
59 
61 
62 #define DEFMXUNBOOLOP(F, OP) \
63 template <class X> \
64 inline void F (size_t n, bool *r, const X *x) throw () \
65 { const X zero = X (); for (size_t i = 0; i < n; i++) r[i] = x[i] OP zero; }
66 
69 
70 #define DEFMXBINOP(F, OP) \
71 template <class R, class X, class Y> \
72 inline void F (size_t n, R *r, const X *x, const Y *y) throw () \
73 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y[i]; } \
74 template <class R, class X, class Y> \
75 inline void F (size_t n, R *r, const X *x, Y y) throw () \
76 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y; } \
77 template <class R, class X, class Y> \
78 inline void F (size_t n, R *r, X x, const Y *y) throw () \
79 { for (size_t i = 0; i < n; i++) r[i] = x OP y[i]; }
80 
85 
86 #define DEFMXBINOPEQ(F, OP) \
87 template <class R, class X> \
88 inline void F (size_t n, R *r, const X *x) throw () \
89 { for (size_t i = 0; i < n; i++) r[i] OP x[i]; } \
90 template <class R, class X> \
91 inline void F (size_t n, R *r, X x) throw () \
92 { for (size_t i = 0; i < n; i++) r[i] OP x; }
93 
98 
99 #define DEFMXCMPOP(F, OP) \
100 template <class X, class Y> \
101 inline void F (size_t n, bool *r, const X *x, const Y *y) throw () \
102 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y[i]; } \
103 template <class X, class Y> \
104 inline void F (size_t n, bool *r, const X *x, Y y) throw () \
105 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y; } \
106 template <class X, class Y> \
107 inline void F (size_t n, bool *r, X x, const Y *y) throw () \
108 { for (size_t i = 0; i < n; i++) r[i] = x OP y[i]; }
109 
116 
117 // Convert to logical value, for logical op purposes.
118 template <class T> inline bool logical_value (T x) { return x; }
119 template <class T> inline bool logical_value (const std::complex<T>& x)
120 { return x.real () != 0 || x.imag () != 0; }
121 template <class T> inline bool logical_value (const octave_int<T>& x)
122 { return x.value (); }
123 
124 template <class X>
125 void mx_inline_not (size_t n, bool *r, const X* x) throw ()
126 {
127  for (size_t i = 0; i < n; i++)
128  r[i] = ! logical_value (x[i]);
129 }
130 
131 inline void mx_inline_not2 (size_t n, bool *r) throw ()
132 {
133  for (size_t i = 0; i < n; i++) r[i] = ! r[i];
134 }
135 
136 #define DEFMXBOOLOP(F, NOT1, OP, NOT2) \
137 template <class X, class Y> \
138 inline void F (size_t n, bool *r, const X *x, const Y *y) throw () \
139 { \
140  for (size_t i = 0; i < n; i++) \
141  r[i] = (NOT1 logical_value (x[i])) OP (NOT2 logical_value (y[i])); \
142 } \
143 template <class X, class Y> \
144 inline void F (size_t n, bool *r, const X *x, Y y) throw () \
145 { \
146  const bool yy = (NOT2 logical_value (y)); \
147  for (size_t i = 0; i < n; i++) \
148  r[i] = (NOT1 logical_value (x[i])) OP yy; \
149 } \
150 template <class X, class Y> \
151 inline void F (size_t n, bool *r, X x, const Y *y) throw () \
152 { \
153  const bool xx = (NOT1 logical_value (x)); \
154  for (size_t i = 0; i < n; i++) \
155  r[i] = xx OP (NOT2 logical_value (y[i])); \
156 }
157 
164 
165 #define DEFMXBOOLOPEQ(F, OP) \
166 template <class X> \
167 inline void F (size_t n, bool *r, const X *x) throw () \
168 { \
169  for (size_t i = 0; i < n; i++) \
170  r[i] OP logical_value (x[i]); \
171 } \
172 template <class X> \
173 inline void F (size_t n, bool *r, X x) throw () \
174 { for (size_t i = 0; i < n; i++) r[i] OP x; }
175 
178 
179 template <class T>
180 inline bool
181 mx_inline_any_nan (size_t n, const T* x) throw ()
182 {
183  for (size_t i = 0; i < n; i++)
184  {
185  if (xisnan (x[i]))
186  return true;
187  }
188 
189  return false;
190 }
191 
192 template <class T>
193 inline bool
194 mx_inline_all_finite (size_t n, const T* x) throw ()
195 {
196  for (size_t i = 0; i < n; i++)
197  {
198  if (! xfinite (x[i]))
199  return false;
200  }
201 
202  return true;
203 }
204 
205 template <class T>
206 inline bool
207 mx_inline_any_negative (size_t n, const T* x) throw ()
208 {
209  for (size_t i = 0; i < n; i++)
210  {
211  if (x[i] < 0)
212  return true;
213  }
214 
215  return false;
216 }
217 
218 template <class T>
219 inline bool
220 mx_inline_any_positive (size_t n, const T* x) throw ()
221 {
222  for (size_t i = 0; i < n; i++)
223  {
224  if (x[i] > 0)
225  return true;
226  }
227 
228  return false;
229 }
230 
231 template<class T>
232 inline bool
233 mx_inline_all_real (size_t n, const std::complex<T>* x) throw ()
234 {
235  for (size_t i = 0; i < n; i++)
236  {
237  if (x[i].imag () != 0)
238  return false;
239  }
240 
241  return true;
242 }
243 
244 #define DEFMXMAPPER(F, FUN) \
245 template <class T> \
246 inline void F (size_t n, T *r, const T *x) throw () \
247 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i]); }
248 
249 template<class T>
250 inline void mx_inline_real (size_t n, T *r, const std::complex<T>* x) throw ()
251 { for (size_t i = 0; i < n; i++) r[i] = x[i].real (); }
252 template<class T>
253 inline void mx_inline_imag (size_t n, T *r, const std::complex<T>* x) throw ()
254 { for (size_t i = 0; i < n; i++) r[i] = x[i].imag (); }
255 
256 // Pairwise minimums/maximums
257 #define DEFMXMAPPER2(F, FUN) \
258 template <class T> \
259 inline void F (size_t n, T *r, const T *x, const T *y) throw () \
260 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y[i]); } \
261 template <class T> \
262 inline void F (size_t n, T *r, const T *x, T y) throw () \
263 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y); } \
264 template <class T> \
265 inline void F (size_t n, T *r, T x, const T *y) throw () \
266 { for (size_t i = 0; i < n; i++) r[i] = FUN (x, y[i]); }
267 
270 
271 // Specialize array-scalar max/min
272 #define DEFMINMAXSPEC(T, F, OP) \
273 template <> \
274 inline void F<T> (size_t n, T *r, const T *x, T y) throw () \
275 { \
276  if (xisnan (y)) \
277  std::memcpy (r, x, n * sizeof (T)); \
278  else \
279  for (size_t i = 0; i < n; i++) r[i] = (x[i] OP y) ? x[i] : y; \
280 } \
281 template <> \
282 inline void F<T> (size_t n, T *r, T x, const T *y) throw () \
283 { \
284  if (xisnan (x)) \
285  std::memcpy (r, y, n * sizeof (T)); \
286  else \
287  for (size_t i = 0; i < n; i++) r[i] = (y[i] OP x) ? y[i] : x; \
288 }
289 
291 DEFMINMAXSPEC (double, mx_inline_xmax, >=)
293 DEFMINMAXSPEC (float, mx_inline_xmax, >=)
294 
295 // Pairwise power
296 #define DEFMXMAPPER2X(F, FUN) \
297 template <class R, class X, class Y> \
298 inline void F (size_t n, R *r, const X *x, const Y *y) throw () \
299 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y[i]); } \
300 template <class R, class X, class Y> \
301 inline void F (size_t n, R *r, const X *x, Y y) throw () \
302 { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y); } \
303 template <class R, class X, class Y> \
304 inline void F (size_t n, R *r, X x, const Y *y) throw () \
305 { for (size_t i = 0; i < n; i++) r[i] = FUN (x, y[i]); }
306 
307 // Let the compiler decide which pow to use, whichever best matches the
308 // arguments provided.
309 using std::pow;
311 
312 // Arbitrary function appliers. The function is a template parameter to enable
313 // inlining.
314 template <class R, class X, R fun (X x)>
315 inline void mx_inline_map (size_t n, R *r, const X *x) throw ()
316 { for (size_t i = 0; i < n; i++) r[i] = fun (x[i]); }
317 
318 template <class R, class X, R fun (const X& x)>
319 inline void mx_inline_map (size_t n, R *r, const X *x) throw ()
320 { for (size_t i = 0; i < n; i++) r[i] = fun (x[i]); }
321 
322 // Appliers. Since these call the operation just once, we pass it as
323 // a pointer, to allow the compiler reduce number of instances.
324 
325 template <class R, class X>
326 inline Array<R>
328  void (*op) (size_t, R *, const X *) throw ())
329 {
330  Array<R> r (x.dims ());
331  op (r.numel (), r.fortran_vec (), x.data ());
332  return r;
333 }
334 
335 // Shortcuts for applying mx_inline_map.
336 
337 template <class R, class X, R fun (X)>
338 inline Array<R>
340 {
341  return do_mx_unary_op<R, X> (x, mx_inline_map<R, X, fun>);
342 }
343 
344 template <class R, class X, R fun (const X&)>
345 inline Array<R>
346 do_mx_unary_map (const Array<X>& x)
347 {
348  return do_mx_unary_op<R, X> (x, mx_inline_map<R, X, fun>);
349 }
350 
351 template <class R>
352 inline Array<R>&
354  void (*op) (size_t, R *) throw ())
355 {
356  op (r.numel (), r.fortran_vec ());
357  return r;
358 }
359 
360 template <class R, class X, class Y>
361 inline Array<R>
362 do_mm_binary_op (const Array<X>& x, const Array<Y>& y,
363  void (*op) (size_t, R *, const X *, const Y *) throw (),
364  void (*op1) (size_t, R *, X, const Y *) throw (),
365  void (*op2) (size_t, R *, const X *, Y) throw (),
366  const char *opname)
367 {
368  dim_vector dx = x.dims (), dy = y.dims ();
369  if (dx == dy)
370  {
371  Array<R> r (dx);
372  op (r.length (), r.fortran_vec (), x.data (), y.data ());
373  return r;
374  }
375  else if (is_valid_bsxfun (opname, dx, dy))
376  {
377  return do_bsxfun_op (x, y, op, op1, op2);
378  }
379  else
380  {
381  gripe_nonconformant (opname, dx, dy);
382  return Array<R> ();
383  }
384 }
385 
386 template <class R, class X, class Y>
387 inline Array<R>
388 do_ms_binary_op (const Array<X>& x, const Y& y,
389  void (*op) (size_t, R *, const X *, Y) throw ())
390 {
391  Array<R> r (x.dims ());
392  op (r.length (), r.fortran_vec (), x.data (), y);
393  return r;
394 }
395 
396 template <class R, class X, class Y>
397 inline Array<R>
398 do_sm_binary_op (const X& x, const Array<Y>& y,
399  void (*op) (size_t, R *, X, const Y *) throw ())
400 {
401  Array<R> r (y.dims ());
402  op (r.length (), r.fortran_vec (), x, y.data ());
403  return r;
404 }
405 
406 template <class R, class X>
407 inline Array<R>&
409  void (*op) (size_t, R *, const X *) throw (),
410  void (*op1) (size_t, R *, X) throw (),
411  const char *opname)
412 {
413  dim_vector dr = r.dims (), dx = x.dims ();
414  if (dr == dx)
415  {
416  op (r.length (), r.fortran_vec (), x.data ());
417  }
418  else if (is_valid_inplace_bsxfun (opname, dr, dx))
419  {
420  do_inplace_bsxfun_op (r, x, op, op1);
421  }
422  else
423  gripe_nonconformant (opname, dr, dx);
424  return r;
425 }
426 
427 template <class R, class X>
428 inline Array<R>&
429 do_ms_inplace_op (Array<R>& r, const X& x,
430  void (*op) (size_t, R *, X) throw ())
431 {
432  op (r.length (), r.fortran_vec (), x);
433  return r;
434 }
435 
436 template <class T1, class T2>
437 inline bool
438 mx_inline_equal (size_t n, const T1 *x, const T2 *y) throw ()
439 {
440  for (size_t i = 0; i < n; i++)
441  if (x[i] != y[i])
442  return false;
443  return true;
444 }
445 
446 template <class T>
447 inline bool
449  bool (*op) (size_t, const T *) throw ())
450 {
451  return op (a.numel (), a.data ());
452 }
453 
454 // NOTE: we don't use std::norm because it typically does some heavyweight
455 // magic to avoid underflows, which we don't need here.
456 template <class T>
457 inline T cabsq (const std::complex<T>& c)
458 { return c.real () * c.real () + c.imag () * c.imag (); }
459 
460 // default. works for integers and bool.
461 template <class T>
462 inline bool xis_true (T x) { return x; }
463 template <class T>
464 inline bool xis_false (T x) { return ! x; }
465 // for octave_ints
466 template <class T>
467 inline bool xis_true (const octave_int<T>& x) { return x.value (); }
468 template <class T>
469 inline bool xis_false (const octave_int<T>& x) { return ! x.value (); }
470 // for reals, we want to ignore NaNs.
471 inline bool xis_true (double x) { return ! xisnan (x) && x != 0.0; }
472 inline bool xis_false (double x) { return x == 0.0; }
473 inline bool xis_true (float x) { return ! xisnan (x) && x != 0.0f; }
474 inline bool xis_false (float x) { return x == 0.0f; }
475 // Ditto for complex.
476 inline bool xis_true (const Complex& x) { return ! xisnan (x) && x != 0.0; }
477 inline bool xis_false (const Complex& x) { return x == 0.0; }
478 inline bool xis_true (const FloatComplex& x) { return ! xisnan (x) && x != 0.0f; }
479 inline bool xis_false (const FloatComplex& x) { return x == 0.0f; }
480 
481 #define OP_RED_SUM(ac, el) ac += el
482 #define OP_RED_PROD(ac, el) ac *= el
483 #define OP_RED_SUMSQ(ac, el) ac += el*el
484 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el)
485 
486 inline void op_dble_sum (double& ac, float el)
487 { ac += el; }
488 inline void op_dble_sum (Complex& ac, const FloatComplex& el)
489 { ac += el; } // FIXME: guaranteed?
490 template <class T>
491 inline void op_dble_sum (double& ac, const octave_int<T>& el)
492 { ac += el.double_value (); }
493 
494 // The following two implement a simple short-circuiting.
495 #define OP_RED_ANYC(ac, el) if (xis_true (el)) { ac = true; break; } else continue
496 #define OP_RED_ALLC(ac, el) if (xis_false (el)) { ac = false; break; } else continue
497 
498 #define OP_RED_FCN(F, TSRC, TRES, OP, ZERO) \
499 template <class T> \
500 inline TRES \
501 F (const TSRC* v, octave_idx_type n) \
502 { \
503  TRES ac = ZERO; \
504  for (octave_idx_type i = 0; i < n; i++) \
505  OP(ac, v[i]); \
506  return ac; \
507 }
508 
509 #define PROMOTE_DOUBLE(T) typename subst_template_param<std::complex, T, double>::type
510 
516 OP_RED_FCN (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0)
517 OP_RED_FCN (mx_inline_any, T, bool, OP_RED_ANYC, false)
518 OP_RED_FCN (mx_inline_all, T, bool, OP_RED_ALLC, true)
519 
520 
521 #define OP_RED_FCN2(F, TSRC, TRES, OP, ZERO) \
522 template <class T> \
523 inline void \
524 F (const TSRC* v, TRES *r, octave_idx_type m, octave_idx_type n) \
525 { \
526  for (octave_idx_type i = 0; i < m; i++) \
527  r[i] = ZERO; \
528  for (octave_idx_type j = 0; j < n; j++) \
529  { \
530  for (octave_idx_type i = 0; i < m; i++) \
531  OP(r[i], v[i]); \
532  v += m; \
533  } \
534 }
536 OP_RED_FCN2 (mx_inline_sum, T, T, OP_RED_SUM, 0)
537 OP_RED_FCN2 (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0)
538 OP_RED_FCN2 (mx_inline_count, bool, T, OP_RED_SUM, 0)
539 OP_RED_FCN2 (mx_inline_prod, T, T, OP_RED_PROD, 1)
540 OP_RED_FCN2 (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0)
541 OP_RED_FCN2 (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0)
542 
543 #define OP_RED_ANYR(ac, el) ac |= xis_true (el)
544 #define OP_RED_ALLR(ac, el) ac &= xis_true (el)
545 
546 OP_RED_FCN2 (mx_inline_any_r, T, bool, OP_RED_ANYR, false)
547 OP_RED_FCN2 (mx_inline_all_r, T, bool, OP_RED_ALLR, true)
548 
549 // Using the general code for any/all would sacrifice short-circuiting.
550 // OTOH, going by rows would sacrifice cache-coherence. The following algorithm
551 // will achieve both, at the cost of a temporary octave_idx_type array.
552 
553 #define OP_ROW_SHORT_CIRCUIT(F, PRED, ZERO) \
554 template <class T> \
555 inline void \
556 F (const T* v, bool *r, octave_idx_type m, octave_idx_type n) \
557 { \
558  if (n <= 8) \
559  return F ## _r (v, r, m, n); \
560  \
561  /* FIXME: it may be sub-optimal to allocate the buffer here. */ \
562  OCTAVE_LOCAL_BUFFER (octave_idx_type, iact, m); \
563  for (octave_idx_type i = 0; i < m; i++) iact[i] = i; \
564  octave_idx_type nact = m; \
565  for (octave_idx_type j = 0; j < n; j++) \
566  { \
567  octave_idx_type k = 0; \
568  for (octave_idx_type i = 0; i < nact; i++) \
569  { \
570  octave_idx_type ia = iact[i]; \
571  if (! PRED (v[ia])) \
572  iact[k++] = ia; \
573  } \
574  nact = k; \
575  v += m; \
576  } \
577  for (octave_idx_type i = 0; i < m; i++) r[i] = ! ZERO; \
578  for (octave_idx_type i = 0; i < nact; i++) r[iact[i]] = ZERO; \
579 }
580 
581 OP_ROW_SHORT_CIRCUIT (mx_inline_any, xis_true, false)
582 OP_ROW_SHORT_CIRCUIT (mx_inline_all, xis_false, true)
583 
584 #define OP_RED_FCNN(F, TSRC, TRES) \
585 template <class T> \
586 inline void \
587 F (const TSRC *v, TRES *r, octave_idx_type l, \
588  octave_idx_type n, octave_idx_type u) \
589 { \
590  if (l == 1) \
591  { \
592  for (octave_idx_type i = 0; i < u; i++) \
593  { \
594  r[i] = F<T> (v, n); \
595  v += n; \
596  } \
597  } \
598  else \
599  { \
600  for (octave_idx_type i = 0; i < u; i++) \
601  { \
602  F (v, r, l, n); \
603  v += l*n; \
604  r += l; \
605  } \
606  } \
607 }
608 
610 OP_RED_FCNN (mx_inline_dsum, T, PROMOTE_DOUBLE(T))
611 OP_RED_FCNN (mx_inline_count, bool, T)
612 OP_RED_FCNN (mx_inline_prod, T, T)
613 OP_RED_FCNN (mx_inline_sumsq, T, T)
614 OP_RED_FCNN (mx_inline_sumsq, std::complex<T>, T)
615 OP_RED_FCNN (mx_inline_any, T, bool)
616 OP_RED_FCNN (mx_inline_all, T, bool)
617 
618 #define OP_CUM_FCN(F, TSRC, TRES, OP) \
619 template <class T> \
620 inline void \
621 F (const TSRC *v, TRES *r, octave_idx_type n) \
622 { \
623  if (n) \
624  { \
625  TRES t = r[0] = v[0]; \
626  for (octave_idx_type i = 1; i < n; i++) \
627  r[i] = t = t OP v[i]; \
628  } \
629 }
630 
631 OP_CUM_FCN (mx_inline_cumsum, T, T, +)
632 OP_CUM_FCN (mx_inline_cumprod, T, T, *)
633 OP_CUM_FCN (mx_inline_cumcount, bool, T, +)
634 
635 #define OP_CUM_FCN2(F, TSRC, TRES, OP) \
636 template <class T> \
637 inline void \
638 F (const TSRC *v, TRES *r, octave_idx_type m, octave_idx_type n) \
639 { \
640  if (n) \
641  { \
642  for (octave_idx_type i = 0; i < m; i++) \
643  r[i] = v[i]; \
644  const T *r0 = r; \
645  for (octave_idx_type j = 1; j < n; j++) \
646  { \
647  r += m; v += m; \
648  for (octave_idx_type i = 0; i < m; i++) \
649  r[i] = r0[i] OP v[i]; \
650  r0 += m; \
651  } \
652  } \
653 }
654 
655 OP_CUM_FCN2 (mx_inline_cumsum, T, T, +)
656 OP_CUM_FCN2 (mx_inline_cumprod, T, T, *)
657 OP_CUM_FCN2 (mx_inline_cumcount, bool, T, +)
658 
659 #define OP_CUM_FCNN(F, TSRC, TRES) \
660 template <class T> \
661 inline void \
662 F (const TSRC *v, TRES *r, octave_idx_type l, \
663  octave_idx_type n, octave_idx_type u) \
664 { \
665  if (l == 1) \
666  { \
667  for (octave_idx_type i = 0; i < u; i++) \
668  { \
669  F (v, r, n); \
670  v += n; r += n; \
671  } \
672  } \
673  else \
674  { \
675  for (octave_idx_type i = 0; i < u; i++) \
676  { \
677  F (v, r, l, n); \
678  v += l*n; \
679  r += l*n; \
680  } \
681  } \
682 }
683 
685 OP_CUM_FCNN (mx_inline_cumprod, T, T)
686 OP_CUM_FCNN (mx_inline_cumcount, bool, T)
687 
688 #define OP_MINMAX_FCN(F, OP) \
689 template <class T> \
690 void F (const T *v, T *r, octave_idx_type n) \
691 { \
692  if (! n) return; \
693  T tmp = v[0]; \
694  octave_idx_type i = 1; \
695  if (xisnan (tmp)) \
696  { \
697  for (; i < n && xisnan (v[i]); i++) ; \
698  if (i < n) tmp = v[i]; \
699  } \
700  for (; i < n; i++) \
701  if (v[i] OP tmp) tmp = v[i]; \
702  *r = tmp; \
703 } \
704 template <class T> \
705 void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \
706 { \
707  if (! n) return; \
708  T tmp = v[0]; \
709  octave_idx_type tmpi = 0; \
710  octave_idx_type i = 1; \
711  if (xisnan (tmp)) \
712  { \
713  for (; i < n && xisnan (v[i]); i++) ; \
714  if (i < n) { tmp = v[i]; tmpi = i; } \
715  } \
716  for (; i < n; i++) \
717  if (v[i] OP tmp) { tmp = v[i]; tmpi = i; }\
718  *r = tmp; \
719  *ri = tmpi; \
720 }
721 
724 
725 // Row reductions will be slightly complicated. We will proceed with checks
726 // for NaNs until we detect that no row will yield a NaN, in which case we
727 // proceed to a faster code.
728 
729 #define OP_MINMAX_FCN2(F, OP) \
730 template <class T> \
731 inline void \
732 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \
733 { \
734  if (! n) return; \
735  bool nan = false; \
736  octave_idx_type j = 0; \
737  for (octave_idx_type i = 0; i < m; i++) \
738  { \
739  r[i] = v[i]; \
740  if (xisnan (v[i])) nan = true; \
741  } \
742  j++; v += m; \
743  while (nan && j < n) \
744  { \
745  nan = false; \
746  for (octave_idx_type i = 0; i < m; i++) \
747  { \
748  if (xisnan (v[i])) \
749  nan = true; \
750  else if (xisnan (r[i]) || v[i] OP r[i]) \
751  r[i] = v[i]; \
752  } \
753  j++; v += m; \
754  } \
755  while (j < n) \
756  { \
757  for (octave_idx_type i = 0; i < m; i++) \
758  if (v[i] OP r[i]) r[i] = v[i]; \
759  j++; v += m; \
760  } \
761 } \
762 template <class T> \
763 inline void \
764 F (const T *v, T *r, octave_idx_type *ri, \
765  octave_idx_type m, octave_idx_type n) \
766 { \
767  if (! n) return; \
768  bool nan = false; \
769  octave_idx_type j = 0; \
770  for (octave_idx_type i = 0; i < m; i++) \
771  { \
772  r[i] = v[i]; ri[i] = j; \
773  if (xisnan (v[i])) nan = true; \
774  } \
775  j++; v += m; \
776  while (nan && j < n) \
777  { \
778  nan = false; \
779  for (octave_idx_type i = 0; i < m; i++) \
780  { \
781  if (xisnan (v[i])) \
782  nan = true; \
783  else if (xisnan (r[i]) || v[i] OP r[i]) \
784  { r[i] = v[i]; ri[i] = j; } \
785  } \
786  j++; v += m; \
787  } \
788  while (j < n) \
789  { \
790  for (octave_idx_type i = 0; i < m; i++) \
791  if (v[i] OP r[i]) \
792  { r[i] = v[i]; ri[i] = j; } \
793  j++; v += m; \
794  } \
795 }
796 
798 OP_MINMAX_FCN2 (mx_inline_max, >)
799 
800 #define OP_MINMAX_FCNN(F) \
801 template <class T> \
802 inline void \
803 F (const T *v, T *r, octave_idx_type l, \
804  octave_idx_type n, octave_idx_type u) \
805 { \
806  if (! n) return; \
807  if (l == 1) \
808  { \
809  for (octave_idx_type i = 0; i < u; i++) \
810  { \
811  F (v, r, n); \
812  v += n; r++; \
813  } \
814  } \
815  else \
816  { \
817  for (octave_idx_type i = 0; i < u; i++) \
818  { \
819  F (v, r, l, n); \
820  v += l*n; \
821  r += l; \
822  } \
823  } \
824 } \
825 template <class T> \
826 inline void \
827 F (const T *v, T *r, octave_idx_type *ri, \
828  octave_idx_type l, octave_idx_type n, octave_idx_type u) \
829 { \
830  if (! n) return; \
831  if (l == 1) \
832  { \
833  for (octave_idx_type i = 0; i < u; i++) \
834  { \
835  F (v, r, ri, n); \
836  v += n; r++; ri++; \
837  } \
838  } \
839  else \
840  { \
841  for (octave_idx_type i = 0; i < u; i++) \
842  { \
843  F (v, r, ri, l, n); \
844  v += l*n; \
845  r += l; ri += l; \
846  } \
847  } \
848 }
849 
851 OP_MINMAX_FCNN (mx_inline_max)
852 
853 #define OP_CUMMINMAX_FCN(F, OP) \
854 template <class T> \
855 void F (const T *v, T *r, octave_idx_type n) \
856 { \
857  if (! n) return; \
858  T tmp = v[0]; \
859  octave_idx_type i = 1, j = 0; \
860  if (xisnan (tmp)) \
861  { \
862  for (; i < n && xisnan (v[i]); i++) ; \
863  for (; j < i; j++) r[j] = tmp; \
864  if (i < n) tmp = v[i]; \
865  } \
866  for (; i < n; i++) \
867  if (v[i] OP tmp) \
868  { \
869  for (; j < i; j++) r[j] = tmp; \
870  tmp = v[i]; \
871  } \
872  for (; j < i; j++) r[j] = tmp; \
873 } \
874 template <class T> \
875 void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \
876 { \
877  if (! n) return; \
878  T tmp = v[0]; octave_idx_type tmpi = 0; \
879  octave_idx_type i = 1, j = 0; \
880  if (xisnan (tmp)) \
881  { \
882  for (; i < n && xisnan (v[i]); i++) ; \
883  for (; j < i; j++) { r[j] = tmp; ri[j] = tmpi; } \
884  if (i < n) { tmp = v[i]; tmpi = i; } \
885  } \
886  for (; i < n; i++) \
887  if (v[i] OP tmp) \
888  { \
889  for (; j < i; j++) { r[j] = tmp; ri[j] = tmpi; } \
890  tmp = v[i]; tmpi = i; \
891  } \
892  for (; j < i; j++) { r[j] = tmp; ri[j] = tmpi; } \
893 }
894 
897 
898 // Row reductions will be slightly complicated. We will proceed with checks
899 // for NaNs until we detect that no row will yield a NaN, in which case we
900 // proceed to a faster code.
901 
902 #define OP_CUMMINMAX_FCN2(F, OP) \
903 template <class T> \
904 inline void \
905 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \
906 { \
907  if (! n) return; \
908  bool nan = false; \
909  const T *r0; \
910  octave_idx_type j = 0; \
911  for (octave_idx_type i = 0; i < m; i++) \
912  { \
913  r[i] = v[i]; \
914  if (xisnan (v[i])) nan = true; \
915  } \
916  j++; v += m; r0 = r; r += m; \
917  while (nan && j < n) \
918  { \
919  nan = false; \
920  for (octave_idx_type i = 0; i < m; i++) \
921  { \
922  if (xisnan (v[i])) \
923  { r[i] = r0[i]; nan = true; } \
924  else if (xisnan (r0[i]) || v[i] OP r0[i]) \
925  r[i] = v[i]; \
926  } \
927  j++; v += m; r0 = r; r += m; \
928  } \
929  while (j < n) \
930  { \
931  for (octave_idx_type i = 0; i < m; i++) \
932  if (v[i] OP r0[i]) \
933  r[i] = v[i]; \
934  else \
935  r[i] = r0[i]; \
936  j++; v += m; r0 = r; r += m; \
937  } \
938 } \
939 template <class T> \
940 inline void \
941 F (const T *v, T *r, octave_idx_type *ri, \
942  octave_idx_type m, octave_idx_type n) \
943 { \
944  if (! n) return; \
945  bool nan = false; \
946  const T *r0; const octave_idx_type *r0i; \
947  octave_idx_type j = 0; \
948  for (octave_idx_type i = 0; i < m; i++) \
949  { \
950  r[i] = v[i]; ri[i] = 0; \
951  if (xisnan (v[i])) nan = true; \
952  } \
953  j++; v += m; r0 = r; r += m; r0i = ri; ri += m; \
954  while (nan && j < n) \
955  { \
956  nan = false; \
957  for (octave_idx_type i = 0; i < m; i++) \
958  { \
959  if (xisnan (v[i])) \
960  { r[i] = r0[i]; ri[i] = r0i[i]; nan = true; } \
961  else if (xisnan (r0[i]) || v[i] OP r0[i]) \
962  { r[i] = v[i]; ri[i] = j; }\
963  } \
964  j++; v += m; r0 = r; r += m; r0i = ri; ri += m; \
965  } \
966  while (j < n) \
967  { \
968  for (octave_idx_type i = 0; i < m; i++) \
969  if (v[i] OP r0[i]) \
970  { r[i] = v[i]; ri[i] = j; } \
971  else \
972  { r[i] = r0[i]; ri[i] = r0i[i]; } \
973  j++; v += m; r0 = r; r += m; r0i = ri; ri += m; \
974  } \
975 }
976 
978 OP_CUMMINMAX_FCN2 (mx_inline_cummax, >)
979 
980 #define OP_CUMMINMAX_FCNN(F) \
981 template <class T> \
982 inline void \
983 F (const T *v, T *r, octave_idx_type l, \
984  octave_idx_type n, octave_idx_type u) \
985 { \
986  if (! n) return; \
987  if (l == 1) \
988  { \
989  for (octave_idx_type i = 0; i < u; i++) \
990  { \
991  F (v, r, n); \
992  v += n; r += n; \
993  } \
994  } \
995  else \
996  { \
997  for (octave_idx_type i = 0; i < u; i++) \
998  { \
999  F (v, r, l, n); \
1000  v += l*n; \
1001  r += l*n; \
1002  } \
1003  } \
1004 } \
1005 template <class T> \
1006 inline void \
1007 F (const T *v, T *r, octave_idx_type *ri, \
1008  octave_idx_type l, octave_idx_type n, octave_idx_type u) \
1009 { \
1010  if (! n) return; \
1011  if (l == 1) \
1012  { \
1013  for (octave_idx_type i = 0; i < u; i++) \
1014  { \
1015  F (v, r, ri, n); \
1016  v += n; r += n; ri += n; \
1017  } \
1018  } \
1019  else \
1020  { \
1021  for (octave_idx_type i = 0; i < u; i++) \
1022  { \
1023  F (v, r, ri, l, n); \
1024  v += l*n; \
1025  r += l*n; ri += l*n; \
1026  } \
1027  } \
1028 }
1029 
1031 OP_CUMMINMAX_FCNN (mx_inline_cummax)
1032 
1033 template <class T>
1034 void mx_inline_diff (const T *v, T *r, octave_idx_type n,
1035  octave_idx_type order)
1036 {
1037  switch (order)
1038  {
1039  case 1:
1040  for (octave_idx_type i = 0; i < n-1; i++)
1041  r[i] = v[i+1] - v[i];
1042  break;
1043  case 2:
1044  if (n > 1)
1045  {
1046  T lst = v[1] - v[0];
1047  for (octave_idx_type i = 0; i < n-2; i++)
1048  {
1049  T dif = v[i+2] - v[i+1];
1050  r[i] = dif - lst;
1051  lst = dif;
1052  }
1053  }
1054  break;
1055  default:
1056  {
1057  OCTAVE_LOCAL_BUFFER (T, buf, n-1);
1058 
1059  for (octave_idx_type i = 0; i < n-1; i++)
1060  buf[i] = v[i+1] - v[i];
1061 
1062  for (octave_idx_type o = 2; o <= order; o++)
1063  {
1064  for (octave_idx_type i = 0; i < n-o; i++)
1065  buf[i] = buf[i+1] - buf[i];
1066  }
1067 
1068  for (octave_idx_type i = 0; i < n-order; i++)
1069  r[i] = buf[i];
1070  }
1071  }
1072 }
1073 
1074 template <class T>
1075 void mx_inline_diff (const T *v, T *r,
1077  octave_idx_type order)
1078 {
1079  switch (order)
1080  {
1081  case 1:
1082  for (octave_idx_type i = 0; i < m*(n-1); i++)
1083  r[i] = v[i+m] - v[i];
1084  break;
1085  case 2:
1086  for (octave_idx_type i = 0; i < n-2; i++)
1087  {
1088  for (octave_idx_type j = i*m; j < i*m+m; j++)
1089  r[j] = (v[j+m+m] - v[j+m]) - (v[j+m] - v[j]);
1090  }
1091  break;
1092  default:
1093  {
1094  OCTAVE_LOCAL_BUFFER (T, buf, n-1);
1095 
1096  for (octave_idx_type j = 0; j < m; j++)
1097  {
1098  for (octave_idx_type i = 0; i < n-1; i++)
1099  buf[i] = v[i*m+j+m] - v[i*m+j];
1100 
1101  for (octave_idx_type o = 2; o <= order; o++)
1102  {
1103  for (octave_idx_type i = 0; i < n-o; i++)
1104  buf[i] = buf[i+1] - buf[i];
1105  }
1106 
1107  for (octave_idx_type i = 0; i < n-order; i++)
1108  r[i*m+j] = buf[i];
1109  }
1110  }
1111  }
1112 }
1113 
1114 template <class T>
1115 inline void
1116 mx_inline_diff (const T *v, T *r,
1118  octave_idx_type order)
1119 {
1120  if (! n) return;
1121  if (l == 1)
1122  {
1123  for (octave_idx_type i = 0; i < u; i++)
1124  {
1125  mx_inline_diff (v, r, n, order);
1126  v += n; r += n-order;
1127  }
1128  }
1129  else
1130  {
1131  for (octave_idx_type i = 0; i < u; i++)
1132  {
1133  mx_inline_diff (v, r, l, n, order);
1134  v += l*n;
1135  r += l*(n-order);
1136  }
1137  }
1138 }
1139 
1140 // Assistant function
1141 
1142 inline void
1143 get_extent_triplet (const dim_vector& dims, int& dim,
1145  octave_idx_type& u)
1146 {
1147  octave_idx_type ndims = dims.length ();
1148  if (dim >= ndims)
1149  {
1150  l = dims.numel ();
1151  n = 1;
1152  u = 1;
1153  }
1154  else
1155  {
1156  if (dim < 0)
1157  dim = dims.first_non_singleton ();
1158 
1159  // calculate extent triplet.
1160  l = 1, n = dims(dim), u = 1;
1161  for (octave_idx_type i = 0; i < dim; i++)
1162  l *= dims (i);
1163  for (octave_idx_type i = dim + 1; i < ndims; i++)
1164  u *= dims (i);
1165  }
1166 }
1167 
1168 // Appliers.
1169 // FIXME: is this the best design? C++ gives a lot of options here...
1170 // maybe it can be done without an explicit parameter?
1171 
1172 template <class R, class T>
1173 inline Array<R>
1174 do_mx_red_op (const Array<T>& src, int dim,
1175  void (*mx_red_op) (const T *, R *, octave_idx_type,
1177 {
1178  octave_idx_type l, n, u;
1179  dim_vector dims = src.dims ();
1180  // M*b inconsistency: sum ([]) = 0 etc.
1181  if (dims.length () == 2 && dims(0) == 0 && dims(1) == 0)
1182  dims (1) = 1;
1183 
1184  get_extent_triplet (dims, dim, l, n, u);
1185 
1186  // Reduction operation reduces the array size.
1187  if (dim < dims.length ()) dims(dim) = 1;
1188  dims.chop_trailing_singletons ();
1190  Array<R> ret (dims);
1191  mx_red_op (src.data (), ret.fortran_vec (), l, n, u);
1192 
1193  return ret;
1194 }
1195 
1196 template <class R, class T>
1197 inline Array<R>
1198 do_mx_cum_op (const Array<T>& src, int dim,
1199  void (*mx_cum_op) (const T *, R *, octave_idx_type,
1201 {
1202  octave_idx_type l, n, u;
1203  dim_vector dims = src.dims ();
1204  get_extent_triplet (dims, dim, l, n, u);
1205 
1206  // Cumulative operation doesn't reduce the array size.
1207  Array<R> ret (dims);
1208  mx_cum_op (src.data (), ret.fortran_vec (), l, n, u);
1209 
1210  return ret;
1211 }
1212 
1213 template <class R>
1214 inline Array<R>
1215 do_mx_minmax_op (const Array<R>& src, int dim,
1216  void (*mx_minmax_op) (const R *, R *, octave_idx_type,
1218 {
1219  octave_idx_type l, n, u;
1220  dim_vector dims = src.dims ();
1221  get_extent_triplet (dims, dim, l, n, u);
1222 
1223  // If the dimension is zero, we don't do anything.
1224  if (dim < dims.length () && dims(dim) != 0) dims(dim) = 1;
1225  dims.chop_trailing_singletons ();
1227  Array<R> ret (dims);
1228  mx_minmax_op (src.data (), ret.fortran_vec (), l, n, u);
1229 
1230  return ret;
1231 }
1232 
1233 template <class R>
1234 inline Array<R>
1235 do_mx_minmax_op (const Array<R>& src, Array<octave_idx_type>& idx, int dim,
1236  void (*mx_minmax_op) (const R *, R *, octave_idx_type *,
1238 {
1239  octave_idx_type l, n, u;
1240  dim_vector dims = src.dims ();
1241  get_extent_triplet (dims, dim, l, n, u);
1242 
1243  // If the dimension is zero, we don't do anything.
1244  if (dim < dims.length () && dims(dim) != 0) dims(dim) = 1;
1245  dims.chop_trailing_singletons ();
1246 
1247  Array<R> ret (dims);
1248  if (idx.dims () != dims) idx = Array<octave_idx_type> (dims);
1250  mx_minmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (),
1251  l, n, u);
1252 
1253  return ret;
1254 }
1255 
1256 template <class R>
1257 inline Array<R>
1258 do_mx_cumminmax_op (const Array<R>& src, int dim,
1259  void (*mx_cumminmax_op) (const R *, R *, octave_idx_type,
1261 {
1262  octave_idx_type l, n, u;
1263  dim_vector dims = src.dims ();
1264  get_extent_triplet (dims, dim, l, n, u);
1266  Array<R> ret (dims);
1267  mx_cumminmax_op (src.data (), ret.fortran_vec (), l, n, u);
1268 
1269  return ret;
1270 }
1271 
1272 template <class R>
1273 inline Array<R>
1274 do_mx_cumminmax_op (const Array<R>& src, Array<octave_idx_type>& idx, int dim,
1275  void (*mx_cumminmax_op) (const R *, R *, octave_idx_type *,
1277 {
1278  octave_idx_type l, n, u;
1279  dim_vector dims = src.dims ();
1280  get_extent_triplet (dims, dim, l, n, u);
1281 
1282  Array<R> ret (dims);
1283  if (idx.dims () != dims) idx = Array<octave_idx_type> (dims);
1285  mx_cumminmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (),
1286  l, n, u);
1287 
1288  return ret;
1289 }
1290 
1291 template <class R>
1292 inline Array<R>
1293 do_mx_diff_op (const Array<R>& src, int dim, octave_idx_type order,
1294  void (*mx_diff_op) (const R *, R *,
1296  octave_idx_type))
1297 {
1298  octave_idx_type l, n, u;
1299  if (order <= 0)
1300  return src;
1301 
1302  dim_vector dims = src.dims ();
1303 
1304  get_extent_triplet (dims, dim, l, n, u);
1305  if (dim >= dims.length ())
1306  dims.resize (dim+1, 1);
1307 
1308  if (dims(dim) <= order)
1309  {
1310  dims (dim) = 0;
1311  return Array<R> (dims);
1312  }
1313  else
1314  {
1315  dims(dim) -= order;
1316  }
1317 
1318  Array<R> ret (dims);
1319  mx_diff_op (src.data (), ret.fortran_vec (), l, n, u, order);
1320 
1321  return ret;
1322 }
1323 
1324 // Fast extra-precise summation. According to
1325 // T. Ogita, S. M. Rump, S. Oishi:
1326 // Accurate Sum And Dot Product,
1327 // SIAM J. Sci. Computing, Vol. 26, 2005
1328 
1329 template <class T>
1330 inline void twosum_accum (T& s, T& e,
1331  const T& x)
1332 {
1333  T s1 = s + x, t = s1 - s, e1 = (s - (s1 - t)) + (x - t);
1334  s = s1;
1335  e += e1;
1336 }
1337 
1338 template <class T>
1339 inline T
1340 mx_inline_xsum (const T *v, octave_idx_type n)
1341 {
1342  T s = 0, e = 0;
1343  for (octave_idx_type i = 0; i < n; i++)
1344  twosum_accum (s, e, v[i]);
1345 
1346  return s + e;
1347 }
1348 
1349 template <class T>
1350 inline void
1351 mx_inline_xsum (const T *v, T *r,
1353 {
1354  OCTAVE_LOCAL_BUFFER (T, e, m);
1355  for (octave_idx_type i = 0; i < m; i++)
1356  e[i] = r[i] = T ();
1357 
1358  for (octave_idx_type j = 0; j < n; j++)
1359  {
1360  for (octave_idx_type i = 0; i < m; i++)
1361  twosum_accum (r[i], e[i], v[i]);
1362 
1363  v += m;
1364  }
1365 
1366  for (octave_idx_type i = 0; i < m; i++)
1367  r[i] += e[i];
1368 }
1369 
1371 
1372 #endif