GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
bsxfun-defs.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2009-2013 Jaroslav Hajek
4 Copyright (C) 2009 VZLU Prague
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #if !defined (octave_bsxfun_defs_h)
25 #define octave_bsxfun_defs_h 1
26 
27 #include <algorithm>
28 #include <iostream>
29 
30 #include "dim-vector.h"
31 #include "oct-locbuf.h"
32 #include "lo-error.h"
33 
34 #include "mx-inlines.cc"
35 
36 template <class R, class X, class Y>
38 do_bsxfun_op (const Array<X>& x, const Array<Y>& y,
39  void (*op_vv) (size_t, R *, const X *, const Y *),
40  void (*op_sv) (size_t, R *, X, const Y *),
41  void (*op_vs) (size_t, R *, const X *, Y))
42 {
43  int nd = std::max (x.ndims (), y.ndims ());
44  dim_vector dvx = x.dims ().redim (nd), dvy = y.dims ().redim (nd);
45 
46  // Construct the result dimensions.
47  dim_vector dvr;
48  dvr.resize (nd);
49  for (int i = 0; i < nd; i++)
50  {
51  octave_idx_type xk = dvx(i), yk = dvy(i);
52  if (xk == 1)
53  dvr(i) = yk;
54  else if (yk == 1 || xk == yk)
55  dvr(i) = xk;
56  else
57  {
58  (*current_liboctave_error_handler)
59  ("bsxfun: nonconformant dimensions: %s and %s",
60  x.dims ().str ().c_str (), y.dims ().str ().c_str ());
61  break;
62  }
63  }
64 
65  Array<R> retval (dvr);
66 
67  const X *xvec = x.fortran_vec ();
68  const Y *yvec = y.fortran_vec ();
69  R *rvec = retval.fortran_vec ();
70 
71  // Fold the common leading dimensions.
72  octave_idx_type start, ldr = 1;
73  for (start = 0; start < nd; start++)
74  {
75  if (dvx(start) != dvy(start))
76  break;
77  ldr *= dvr(start);
78  }
79 
80  if (retval.is_empty ())
81  ; // do nothing
82  else if (start == nd)
83  op_vv (retval.numel (), rvec, xvec, yvec);
84  else
85  {
86  // Determine the type of the low-level loop.
87  bool xsing = false, ysing = false;
88  if (ldr == 1)
89  {
90  xsing = dvx(start) == 1;
91  ysing = dvy(start) == 1;
92  if (xsing || ysing)
93  {
94  ldr *= dvx(start) * dvy(start);
95  start++;
96  }
97  }
98  dim_vector cdvx = dvx.cumulative (), cdvy = dvy.cumulative ();
99  // Nullify singleton dims to achieve a spread effect.
100  for (int i = std::max (start, octave_idx_type (1)); i < nd; i++)
101  {
102  if (dvx(i) == 1)
103  cdvx(i-1) = 0;
104  if (dvy(i) == 1)
105  cdvy(i-1) = 0;
106  }
107 
108  octave_idx_type niter = dvr.numel (start);
109  // The index array.
111  for (octave_idx_type iter = 0; iter < niter; iter++)
112  {
113  octave_quit ();
114 
115  // Compute indices.
116  // FIXME: performance impact noticeable?
117  octave_idx_type xidx = cdvx.cum_compute_index (idx);
118  octave_idx_type yidx = cdvy.cum_compute_index (idx);
119  octave_idx_type ridx = dvr.compute_index (idx);
120 
121  // Apply the low-level loop.
122  if (xsing)
123  op_sv (ldr, rvec + ridx, xvec[xidx], yvec + yidx);
124  else if (ysing)
125  op_vs (ldr, rvec + ridx, xvec + xidx, yvec[yidx]);
126  else
127  op_vv (ldr, rvec + ridx, xvec + xidx, yvec + yidx);
128 
129  dvr.increment_index (idx + start, start);
130  }
131  }
132 
133  return retval;
134 }
135 
136 template <class R, class X>
137 void
139  void (*op_vv) (size_t, R *, const X *),
140  void (*op_vs) (size_t, R *, X))
141 {
142  dim_vector dvr = r.dims (), dvx = x.dims ();
143  octave_idx_type nd = r.ndims ();
144  dvx.redim (nd);
145 
146  const X* xvec = x.fortran_vec ();
147  R* rvec = r.fortran_vec ();
148 
149  // Fold the common leading dimensions.
150  octave_idx_type start, ldr = 1;
151  for (start = 0; start < nd; start++)
152  {
153  if (dvr(start) != dvx(start))
154  break;
155  ldr *= dvr(start);
156  }
157 
158  if (r.is_empty ())
159  ; // do nothing
160  else if (start == nd)
161  op_vv (r.numel (), rvec, xvec);
162  else
163  {
164  // Determine the type of the low-level loop.
165  bool xsing = false;
166  if (ldr == 1)
167  {
168  xsing = dvx(start) == 1;
169  if (xsing)
170  {
171  ldr *= dvr(start) * dvx(start);
172  start++;
173  }
174  }
175 
176  dim_vector cdvx = dvx.cumulative ();
177  // Nullify singleton dims to achieve a spread effect.
178  for (int i = std::max (start, octave_idx_type (1)); i < nd; i++)
179  {
180  if (dvx(i) == 1)
181  cdvx(i-1) = 0;
182  }
183 
184  octave_idx_type niter = dvr.numel (start);
185  // The index array.
187  for (octave_idx_type iter = 0; iter < niter; iter++)
188  {
189  octave_quit ();
190 
191  // Compute indices.
192  // FIXME: performance impact noticeable?
193  octave_idx_type xidx = cdvx.cum_compute_index (idx);
194  octave_idx_type ridx = dvr.compute_index (idx);
195 
196  // Apply the low-level loop.
197  if (xsing)
198  op_vs (ldr, rvec + ridx, xvec[xidx]);
199  else
200  op_vv (ldr, rvec + ridx, xvec + xidx);
201 
202  dvr.increment_index (idx + start, start);
203  }
204  }
205 }
206 
207 #define BSXFUN_OP_DEF(OP, ARRAY) \
208 ARRAY bsxfun_ ## OP (const ARRAY& x, const ARRAY& y)
209 
210 #define BSXFUN_OP2_DEF(OP, ARRAY, ARRAY1, ARRAY2) \
211 ARRAY bsxfun_ ## OP (const ARRAY1& x, const ARRAY2& y)
212 
213 #define BSXFUN_REL_DEF(OP, ARRAY) \
214 boolNDArray bsxfun_ ## OP (const ARRAY& x, const ARRAY& y)
215 
216 #define BSXFUN_OP_DEF_MXLOOP(OP, ARRAY, LOOP) \
217  BSXFUN_OP_DEF(OP, ARRAY) \
218  { return do_bsxfun_op<ARRAY::element_type, ARRAY::element_type, ARRAY::element_type> \
219  (x, y, LOOP, LOOP, LOOP); }
220 
221 #define BSXFUN_OP2_DEF_MXLOOP(OP, ARRAY, ARRAY1, ARRAY2, LOOP) \
222  BSXFUN_OP2_DEF(OP, ARRAY, ARRAY1, ARRAY2) \
223  { return do_bsxfun_op<ARRAY::element_type, ARRAY1::element_type, ARRAY2::element_type> \
224  (x, y, LOOP, LOOP, LOOP); }
225 
226 #define BSXFUN_REL_DEF_MXLOOP(OP, ARRAY, LOOP) \
227  BSXFUN_REL_DEF(OP, ARRAY) \
228  { return do_bsxfun_op<bool, ARRAY::element_type, ARRAY::element_type> \
229  (x, y, LOOP, LOOP, LOOP); }
230 
231 #define BSXFUN_STDOP_DEFS_MXLOOP(ARRAY) \
232  BSXFUN_OP_DEF_MXLOOP (add, ARRAY, mx_inline_add) \
233  BSXFUN_OP_DEF_MXLOOP (sub, ARRAY, mx_inline_sub) \
234  BSXFUN_OP_DEF_MXLOOP (mul, ARRAY, mx_inline_mul) \
235  BSXFUN_OP_DEF_MXLOOP (div, ARRAY, mx_inline_div) \
236  BSXFUN_OP_DEF_MXLOOP (min, ARRAY, mx_inline_xmin) \
237  BSXFUN_OP_DEF_MXLOOP (max, ARRAY, mx_inline_xmax) \
238 
239 #define BSXFUN_STDREL_DEFS_MXLOOP(ARRAY) \
240  BSXFUN_REL_DEF_MXLOOP (eq, ARRAY, mx_inline_eq) \
241  BSXFUN_REL_DEF_MXLOOP (ne, ARRAY, mx_inline_ne) \
242  BSXFUN_REL_DEF_MXLOOP (lt, ARRAY, mx_inline_lt) \
243  BSXFUN_REL_DEF_MXLOOP (le, ARRAY, mx_inline_le) \
244  BSXFUN_REL_DEF_MXLOOP (gt, ARRAY, mx_inline_gt) \
245  BSXFUN_REL_DEF_MXLOOP (ge, ARRAY, mx_inline_ge)
246 
247 //For bsxfun power with mixed integer/float types
248 #define BSXFUN_POW_MIXED_MXLOOP(INT_TYPE) \
249  BSXFUN_OP2_DEF_MXLOOP (pow, INT_TYPE, INT_TYPE, NDArray, mx_inline_pow) \
250  BSXFUN_OP2_DEF_MXLOOP (pow, INT_TYPE, INT_TYPE, FloatNDArray, mx_inline_pow)\
251  BSXFUN_OP2_DEF_MXLOOP (pow, INT_TYPE, NDArray, INT_TYPE, mx_inline_pow) \
252  BSXFUN_OP2_DEF_MXLOOP (pow, INT_TYPE, FloatNDArray, INT_TYPE, mx_inline_pow)
253 
254 #endif