GNU Octave  4.2.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
lookup.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2008-2017 VZLU Prague a.s., Czech Republic
4 
5 This file is part of Octave.
6 
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 // Author: Jaroslav Hajek <highegg@gmail.com>
24 
25 #if defined (HAVE_CONFIG_H)
26 # include "config.h"
27 #endif
28 
29 #include <cctype>
30 #include <functional>
31 #include <algorithm>
32 
33 #include "dNDArray.h"
34 #include "CNDArray.h"
35 
36 #include "Cell.h"
37 #include "defun.h"
38 #include "error.h"
39 #include "errwarn.h"
40 #include "ovl.h"
41 #include "ov.h"
42 
43 static
44 bool
46 {
47  return (str.find (c) != std::string::npos
48  || str.find (std::toupper (c)) != std::string::npos);
49 }
50 
51 // case-insensitive character comparison functors
52 struct icmp_char_lt : public std::binary_function<char, char, bool>
53 {
54  bool operator () (char x, char y) const
55  { return std::toupper (x) < std::toupper (y); }
56 };
57 
58 struct icmp_char_gt : public std::binary_function<char, char, bool>
59 {
60  bool operator () (char x, char y) const
61  { return std::toupper (x) > std::toupper (y); }
62 };
63 
64 // FIXME: maybe these should go elsewhere?
65 // FIXME: are they even needed now?
66 // case-insensitive ascending comparator
67 #if 0
68 static bool
69 stri_comp_lt (const std::string& a, const std::string& b)
70 {
71  return std::lexicographical_compare (a.begin (), a.end (),
72  b.begin (), b.end (),
73  icmp_char_lt ());
74 }
75 
76 // case-insensitive descending comparator
77 static bool
78 stri_comp_gt (const std::string& a, const std::string& b)
79 {
80  return std::lexicographical_compare (a.begin (), a.end (),
81  b.begin (), b.end (),
82  icmp_char_gt ());
83 }
84 #endif
85 
86 template <typename T>
87 inline sortmode
89  typename octave_sort<T>::compare_fcn_type desc_comp
91 {
92  octave_idx_type n = array.numel ();
93  if (n > 1 && desc_comp (array (0), array (n-1)))
94  return DESCENDING;
95  else
96  return ASCENDING;
97 }
98 
99 // FIXME: perhaps there should be octave_value::lookup?
100 // The question is, how should it behave w.r.t. the second argument's type.
101 // We'd need a dispatch on two arguments. Hmmm...
102 
103 #define INT_ARRAY_LOOKUP(TYPE) \
104  (table.is_ ## TYPE ## _type () && y.is_ ## TYPE ## _type ()) \
105  retval = do_numeric_lookup (table.TYPE ## _array_value (), \
106  y.TYPE ## _array_value (), \
107  left_inf, right_inf, \
108  match_idx, match_bool);
109 template <typename ArrayT>
110 static octave_value
111 do_numeric_lookup (const ArrayT& array, const ArrayT& values,
112  bool left_inf, bool right_inf,
113  bool match_idx, bool match_bool)
114 {
116 
117  Array<octave_idx_type> idx = array.lookup (values);
118  octave_idx_type n = array.numel ();
119  octave_idx_type nval = values.numel ();
120 
121  // Post-process.
122  if (match_bool)
123  {
124  boolNDArray match (idx.dims ());
125  for (octave_idx_type i = 0; i < nval; i++)
126  {
127  octave_idx_type j = idx.xelem (i);
128  match.xelem (i) = j != 0 && values(i) == array(j-1);
129  }
130 
131  retval = match;
132  }
133  else if (match_idx || left_inf || right_inf)
134  {
135  if (match_idx)
136  {
137  NDArray ridx (idx.dims ());
138 
139  for (octave_idx_type i = 0; i < nval; i++)
140  {
141  octave_idx_type j = idx.xelem (i);
142  ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
143  }
144 
145  retval = ridx;
146  }
147  else if (left_inf && right_inf)
148  {
149  // Results in valid indices. Optimize using lazy index.
150  octave_idx_type zero = 0;
151  for (octave_idx_type i = 0; i < nval; i++)
152  {
153  octave_idx_type j = idx.xelem (i) - 1;
154  idx.xelem (i) = std::max (zero, std::min (j, n-2));
155  }
156 
157  retval = idx_vector (idx);
158  }
159  else if (left_inf)
160  {
161  // Results in valid indices. Optimize using lazy index.
162  octave_idx_type zero = 0;
163  for (octave_idx_type i = 0; i < nval; i++)
164  {
165  octave_idx_type j = idx.xelem (i) - 1;
166  idx.xelem (i) = std::max (zero, j);
167  }
168 
169  retval = idx_vector (idx);
170  }
171  else if (right_inf)
172  {
173  NDArray ridx (idx.dims ());
174 
175  for (octave_idx_type i = 0; i < nval; i++)
176  {
177  octave_idx_type j = idx.xelem (i);
178  ridx.xelem (i) = std::min (j, n-1);
179  }
180 
181  retval = ridx;
182  }
183  }
184  else
185  retval = idx;
186 
187  return retval;
188 }
189 
190 DEFUN (lookup, args, ,
191  doc: /* -*- texinfo -*-
192 @deftypefn {} {@var{idx} =} lookup (@var{table}, @var{y})
193 @deftypefnx {} {@var{idx} =} lookup (@var{table}, @var{y}, @var{opt})
194 Lookup values in a sorted table.
195 
196 This function is usually used as a prelude to interpolation.
197 
198 If table is increasing and @code{idx = lookup (table, y)}, then
199 @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)} within
200 the table. If @code{y(i) < table(1)} then @code{idx(i)} is 0. If
201 @code{y(i) >= table(end)} or @code{isnan (y(i))} then @code{idx(i)} is
202 @code{n}.
203 
204 If the table is decreasing, then the tests are reversed.
205 For non-strictly monotonic tables, empty intervals are always skipped.
206 The result is undefined if @var{table} is not monotonic, or if
207 @var{table} contains a NaN.
208 
209 The complexity of the lookup is O(M*log(N)) where N is the size of
210 @var{table} and M is the size of @var{y}. In the special case when @var{y}
211 is also sorted, the complexity is O(min(M*log(N),M+N)).
212 
213 @var{table} and @var{y} can also be cell arrays of strings
214 (or @var{y} can be a single string). In this case, string lookup
215 is performed using lexicographical comparison.
216 
217 If @var{opts} is specified, it must be a string with letters indicating
218 additional options.
219 
220 @table @code
221 @item m
222 @code{table(idx(i)) == val(i)} if @code{val(i)}
223 occurs in table; otherwise, @code{idx(i)} is zero.
224 
225 @item b
226 @code{idx(i)} is a logical 1 or 0, indicating whether
227 @code{val(i)} is contained in table or not.
228 
229 @item l
230 For numeric lookups
231 the leftmost subinterval shall be extended to infinity (i.e., all indices
232 at least 1)
233 
234 @item r
235 For numeric lookups
236 the rightmost subinterval shall be extended to infinity (i.e., all indices
237 at most n-1).
238 @end table
239 @end deftypefn */)
240 {
241  int nargin = args.length ();
242 
243  if (nargin < 2 || nargin > 3)
244  print_usage ();
245 
246  octave_value table = args(0);
247  octave_value y = args(1);
248  if (table.ndims () > 2 || (table.columns () > 1 && table.rows () > 1))
249  warning ("lookup: table is not a vector");
250 
252 
253  bool num_case = ((table.is_numeric_type () && y.is_numeric_type ())
254  || (table.is_char_matrix () && y.is_char_matrix ()));
255  bool str_case = table.is_cellstr () && (y.is_string () || y.is_cellstr ());
256  bool left_inf = false;
257  bool right_inf = false;
258  bool match_idx = false;
259  bool match_bool = false;
260 
261  if (nargin == 3)
262  {
263  std::string opt = args(2).xstring_value ("lookup: OPT must be a string");
264  left_inf = contains_char (opt, 'l');
265  right_inf = contains_char (opt, 'r');
266  match_idx = contains_char (opt, 'm');
267  match_bool = contains_char (opt, 'b');
268  if (opt.find_first_not_of ("lrmb") != std::string::npos)
269  error ("lookup: unrecognized option: %c",
270  opt[opt.find_first_not_of ("lrmb")]);
271  }
272 
273  if ((match_idx || match_bool) && (left_inf || right_inf))
274  error ("lookup: m, b cannot be specified with l or r");
275  else if (match_idx && match_bool)
276  error ("lookup: only one of m or b can be specified");
277  else if (str_case && (left_inf || right_inf))
278  error ("lookup: l, r are not recognized for string lookups");
279 
280  if (num_case)
281  {
282  // In the case of a complex array, absolute values will be used for
283  // compatibility (though it's not too meaningful).
284  if (table.is_complex_type ())
285  table = table.abs ();
286 
287  if (y.is_complex_type ())
288  y = y.abs ();
289 
291 
292  // PS: I learned this from data.cc
293  if INT_ARRAY_LOOKUP (int8)
294  else if INT_ARRAY_LOOKUP (int16)
295  else if INT_ARRAY_LOOKUP (int32)
296  else if INT_ARRAY_LOOKUP (int64)
297  else if INT_ARRAY_LOOKUP (uint8)
298  else if INT_ARRAY_LOOKUP (uint16)
299  else if INT_ARRAY_LOOKUP (uint32)
300  else if INT_ARRAY_LOOKUP (uint64)
301  else if (table.is_char_matrix () && y.is_char_matrix ())
302  retval = do_numeric_lookup (table.char_array_value (),
303  y.char_array_value (),
304  left_inf, right_inf,
305  match_idx, match_bool);
306  else if (table.is_single_type () || y.is_single_type ())
307  retval = do_numeric_lookup (table.float_array_value (),
308  y.float_array_value (),
309  left_inf, right_inf,
310  match_idx, match_bool);
311  else
312  retval = do_numeric_lookup (table.array_value (),
313  y.array_value (),
314  left_inf, right_inf,
315  match_idx, match_bool);
316  }
317  else if (str_case)
318  {
319  Array<std::string> str_table = table.cellstr_value ();
320  Array<std::string> str_y (dim_vector (1, 1));
321 
322  if (y.is_cellstr ())
323  str_y = y.cellstr_value ();
324  else
325  str_y(0) = y.string_value ();
326 
327  Array<octave_idx_type> idx = str_table.lookup (str_y);
328  octave_idx_type nval = str_y.numel ();
329 
330  // Post-process.
331  if (match_bool)
332  {
333  boolNDArray match (idx.dims ());
334  for (octave_idx_type i = 0; i < nval; i++)
335  {
336  octave_idx_type j = idx.xelem (i);
337  match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
338  }
339 
340  retval = match;
341  }
342  else if (match_idx)
343  {
344  NDArray ridx (idx.dims ());
345  if (match_idx)
346  {
347  for (octave_idx_type i = 0; i < nval; i++)
348  {
349  octave_idx_type j = idx.xelem (i);
350  ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1)) ? j
351  : 0;
352  }
353  }
354 
355  retval = ridx;
356  }
357  else
358  retval = idx;
359  }
360  else
361  print_usage ();
362 
363  return retval;
364 }
365 
366 /*
367 %!assert (lookup (1:3, 0.5), 0) # value before table
368 %!assert (lookup (1:3, 3.5), 3) # value after table error
369 %!assert (lookup (1:3, 1.5), 1) # value within table error
370 %!assert (lookup (1:3, [3,2,1]), [3,2,1])
371 %!assert (lookup ([1:4]', [1.2, 3.5]'), [1, 3]')
372 %!assert (lookup ([1:4], [1.2, 3.5]'), [1, 3]')
373 %!assert (lookup ([1:4]', [1.2, 3.5]), [1, 3])
374 %!assert (lookup ([1:4], [1.2, 3.5]), [1, 3])
375 %!assert (lookup (1:3, [3, 2, 1]), [3, 2, 1])
376 %!assert (lookup ([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
377 %!assert (isempty (lookup ([1:3], [])))
378 %!assert (isempty (lookup ([1:3]', [])))
379 %!assert (lookup (1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0])
380 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "m"), [1, 0; 3, 0])
381 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "m"), [4, 0; 2, 0])
382 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "b"), logical ([1, 0; 3, 0]))
383 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "b"), logical ([4, 0; 2, 0]))
384 %!
385 %!assert (lookup ({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"}), [1,1;0,2])
386 %!assert (lookup ({"apple","lemon","orange"}, "potato"), 3)
387 %!assert (lookup ({"orange","lemon","apple"}, "potato"), 0)
388 */
charNDArray char_array_value(bool frc_str_conv=false) const
Definition: ov.h:831
int ndims(void) const
Definition: ov.h:495
octave_idx_type rows(void) const
Definition: ov.h:489
OCTINTERP_API void print_usage(void)
Definition: defun.cc:52
sortmode
Definition: oct-sort.h:105
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:363
OCTAVE_EXPORT octave_value_list uint16
Definition: ov.cc:1258
sortmode get_sort_mode(const Array< T > &array, typename octave_sort< T >::compare_fcn_type desc_comp=octave_sort< T >::descending_compare)
Definition: lookup.cc:88
bool is_numeric_type(void) const
Definition: ov.h:679
#define DEFUN(name, args_name, nargout_name, doc)
Definition: defun.h:46
void error(const char *fmt,...)
Definition: error.cc:570
is greater than zero
Definition: load-path.cc:2339
octave_idx_type lookup(const T *x, octave_idx_type n, T y)
static octave_value do_numeric_lookup(const ArrayT &array, const ArrayT &values, bool left_inf, bool right_inf, bool match_idx, bool match_bool)
Definition: lookup.cc:111
octave_idx_type lookup(const T &value, sortmode mode=UNSORTED) const
Do a binary lookup in a sorted array.
Definition: Array.cc:2166
cell array If invoked with two or more scalar integer or a vector of integer values
Definition: ov-cell.cc:1205
calling an anonymous function involves an overhead quite comparable to the overhead of an m file function Passing a handle to a built in function is because the interpreter is not involved in the internal loop For a
Definition: cellfun.cc:398
JNIEnv void * args
Definition: ov-java.cc:67
Array< std::string > cellstr_value(void) const
Definition: ov.h:920
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:439
octave_idx_type columns(void) const
Definition: ov.h:491
FloatNDArray float_array_value(bool frc_str_conv=false) const
Definition: ov.h:796
bool is_char_matrix(void) const
Definition: ov.h:569
std::string string_value(bool force=false) const
Definition: ov.h:908
OCTAVE_EXPORT octave_value_list uint32
Definition: ov.cc:1258
then the function must return scalars which will be concatenated into the return array(s).If code
Definition: cellfun.cc:398
int nargin
Definition: graphics.cc:10115
bool is_string(void) const
Definition: ov.h:578
OCTAVE_EXPORT octave_value_list int16
Definition: ov.cc:1302
std::string str
Definition: hash.cc:118
bool is_complex_type(void) const
Definition: ov.h:670
octave_value retval
Definition: data.cc:6294
bool is_cellstr(void) const
Definition: ov.h:548
OCTAVE_EXPORT octave_value_list int32
Definition: ov.cc:1258
octave_value abs(void) const
Definition: ov.h:1346
the sparsity preserving column transformation such that that defines the pivoting threshold can be given in which case it defines the c
Definition: lu.cc:138
T & xelem(octave_idx_type n)
Definition: Array.h:455
OCTAVE_EXPORT octave_value_list int64
Definition: ov.cc:1258
void warning(const char *fmt,...)
Definition: error.cc:788
N Dimensional Array with copy-on-write semantics.
Definition: Array.h:126
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:228
static bool contains_char(const std::string &str, char c)
Definition: lookup.cc:45
NDArray array_value(bool frc_str_conv=false) const
Definition: ov.h:793
bool operator()(char x, char y) const
Definition: lookup.cc:60
=val(i)}if ode{val(i)}occurs in table i
Definition: lookup.cc:239
the element is set to zero In other the statement xample y
Definition: data.cc:5342
b
Definition: cellfun.cc:398
#define INT_ARRAY_LOOKUP(TYPE)
Definition: lookup.cc:103
bool is_single_type(void) const
Definition: ov.h:627
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:854
F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T const F77_REAL const F77_REAL F77_REAL &F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T const F77_DBLE F77_DBLE &F77_RET_T const F77_REAL F77_REAL &F77_RET_T F77_REAL F77_REAL &F77_RET_T F77_DBLE F77_DBLE &F77_RET_T const F77_DBLE * x
bool operator()(char x, char y) const
Definition: lookup.cc:54
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:205