GNU Octave  4.0.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
lookup.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2008-2015 VZLU Prague a.s., Czech Republic
4 
5 This file is part of Octave.
6 
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 // Author: Jaroslav Hajek <highegg@gmail.com>
24 
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28 
29 #include <cctype>
30 #include <functional>
31 #include <algorithm>
32 
33 #include "dNDArray.h"
34 #include "CNDArray.h"
35 
36 #include "Cell.h"
37 #include "defun.h"
38 #include "error.h"
39 #include "gripes.h"
40 #include "oct-obj.h"
41 #include "ov.h"
42 
43 static
44 bool
45 contains_char (const std::string& str, char c)
46 {
47  return (str.find (c) != std::string::npos
48  || str.find (std::toupper (c)) != std::string::npos);
49 }
50 
51 // case-insensitive character comparison functors
52 struct icmp_char_lt : public std::binary_function<char, char, bool>
53 {
54  bool operator () (char x, char y) const
55  { return std::toupper (x) < std::toupper (y); }
56 };
57 
58 struct icmp_char_gt : public std::binary_function<char, char, bool>
59 {
60  bool operator () (char x, char y) const
61  { return std::toupper (x) > std::toupper (y); }
62 };
63 
64 // FIXME: maybe these should go elsewhere?
65 // FIXME: are they even needed now?
66 // case-insensitive ascending comparator
67 #if 0
68 static bool
69 stri_comp_lt (const std::string& a, const std::string& b)
70 {
71  return std::lexicographical_compare (a.begin (), a.end (),
72  b.begin (), b.end (),
73  icmp_char_lt ());
74 }
75 
76 // case-insensitive descending comparator
77 static bool
78 stri_comp_gt (const std::string& a, const std::string& b)
79 {
80  return std::lexicographical_compare (a.begin (), a.end (),
81  b.begin (), b.end (),
82  icmp_char_gt ());
83 }
84 #endif
85 
86 template <class T>
87 inline sortmode
88 get_sort_mode (const Array<T>& array,
89  typename octave_sort<T>::compare_fcn_type desc_comp
91 {
92  octave_idx_type n = array.numel ();
93  if (n > 1 && desc_comp (array (0), array (n-1)))
94  return DESCENDING;
95  else
96  return ASCENDING;
97 }
98 
99 // FIXME: perhaps there should be octave_value::lookup?
100 // The question is, how should it behave w.r.t. the second argument's type.
101 // We'd need a dispatch on two arguments. Hmmm...
102 
103 #define INT_ARRAY_LOOKUP(TYPE) \
104  (table.is_ ## TYPE ## _type () && y.is_ ## TYPE ## _type ()) \
105  retval = do_numeric_lookup (table.TYPE ## _array_value (), \
106  y.TYPE ## _array_value (), \
107  left_inf, right_inf, \
108  match_idx, match_bool);
109 template <class ArrayT>
110 static octave_value
111 do_numeric_lookup (const ArrayT& array, const ArrayT& values,
112  bool left_inf, bool right_inf,
113  bool match_idx, bool match_bool)
114 {
115  octave_value retval;
116 
117  Array<octave_idx_type> idx = array.lookup (values);
118  octave_idx_type n = array.numel ();
119  octave_idx_type nval = values.numel ();
120 
121  // Post-process.
122  if (match_bool)
123  {
124  boolNDArray match (idx.dims ());
125  for (octave_idx_type i = 0; i < nval; i++)
126  {
127  octave_idx_type j = idx.xelem (i);
128  match.xelem (i) = j != 0 && values(i) == array(j-1);
129  }
130 
131  retval = match;
132  }
133  else if (match_idx || left_inf || right_inf)
134  {
135  if (match_idx)
136  {
137  NDArray ridx (idx.dims ());
138 
139  for (octave_idx_type i = 0; i < nval; i++)
140  {
141  octave_idx_type j = idx.xelem (i);
142  ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
143  }
144 
145  retval = ridx;
146  }
147  else if (left_inf && right_inf)
148  {
149  // Results in valid indices. Optimize using lazy index.
150  octave_idx_type zero = 0;
151  for (octave_idx_type i = 0; i < nval; i++)
152  {
153  octave_idx_type j = idx.xelem (i) - 1;
154  idx.xelem (i) = std::max (zero, std::min (j, n-2));
155  }
156 
157  retval = idx_vector (idx);
158  }
159  else if (left_inf)
160  {
161  // Results in valid indices. Optimize using lazy index.
162  octave_idx_type zero = 0;
163  for (octave_idx_type i = 0; i < nval; i++)
164  {
165  octave_idx_type j = idx.xelem (i) - 1;
166  idx.xelem (i) = std::max (zero, j);
167  }
168 
169  retval = idx_vector (idx);
170  }
171  else if (right_inf)
172  {
173  NDArray ridx (idx.dims ());
174 
175  for (octave_idx_type i = 0; i < nval; i++)
176  {
177  octave_idx_type j = idx.xelem (i);
178  ridx.xelem (i) = std::min (j, n-1);
179  }
180 
181  retval = ridx;
182  }
183  }
184  else
185  retval = idx;
186 
187  return retval;
188 }
189 
190 DEFUN (lookup, args, ,
191  "-*- texinfo -*-\n\
192 @deftypefn {Built-in Function} {@var{idx} =} lookup (@var{table}, @var{y})\n\
193 @deftypefnx {Built-in Function} {@var{idx} =} lookup (@var{table}, @var{y}, @var{opt})\n\
194 Lookup values in a sorted table.\n\
195 \n\
196 This function is usually used as a prelude to interpolation.\n\
197 \n\
198 If table is increasing and @code{idx = lookup (table, y)}, then\n\
199 @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)} within\n\
200 the table. If @code{y(i) < table(1)} then @code{idx(i)} is 0. If\n\
201 @code{y(i) >= table(end)} or @code{isnan (y(i))} then @code{idx(i)} is\n\
202 @code{n}.\n\
203 \n\
204 If the table is decreasing, then the tests are reversed.\n\
205 For non-strictly monotonic tables, empty intervals are always skipped.\n\
206 The result is undefined if @var{table} is not monotonic, or if\n\
207 @var{table} contains a NaN.\n\
208 \n\
209 The complexity of the lookup is O(M*log(N)) where N is the size of\n\
210 @var{table} and M is the size of @var{y}. In the special case when @var{y}\n\
211 is also sorted, the complexity is O(min(M*log(N),M+N)).\n\
212 \n\
213 @var{table} and @var{y} can also be cell arrays of strings\n\
214 (or @var{y} can be a single string). In this case, string lookup\n\
215 is performed using lexicographical comparison.\n\
216 \n\
217 If @var{opts} is specified, it must be a string with letters indicating\n\
218 additional options.\n\
219 \n\
220 @table @code\n\
221 @item m\n\
222 @code{table(idx(i)) == val(i)} if @code{val(i)}\n\
223 occurs in table; otherwise, @code{idx(i)} is zero.\n\
224 \n\
225 @item b\n\
226 @code{idx(i)} is a logical 1 or 0, indicating whether\n\
227 @code{val(i)} is contained in table or not.\n\
228 \n\
229 @item l\n\
230 For numeric lookups\n\
231 the leftmost subinterval shall be extended to infinity (i.e., all indices\n\
232 at least 1)\n\
233 \n\
234 @item r\n\
235 For numeric lookups\n\
236 the rightmost subinterval shall be extended to infinity (i.e., all indices\n\
237 at most n-1).\n\
238 @end table\n\
239 @end deftypefn")
240 {
241  octave_value retval;
242 
243  int nargin = args.length ();
244 
245  if (nargin < 2 || nargin > 3 || (nargin == 3 && ! args(2).is_string ()))
246  {
247  print_usage ();
248  return retval;
249  }
250 
251  octave_value table = args(0);
252  octave_value y = args(1);
253  if (table.ndims () > 2 || (table.columns () > 1 && table.rows () > 1))
254  warning ("lookup: table is not a vector");
255 
256  bool num_case = ((table.is_numeric_type () && y.is_numeric_type ())
257  || (table.is_char_matrix () && y.is_char_matrix ()));
258  bool str_case = table.is_cellstr () && (y.is_string () || y.is_cellstr ());
259  bool left_inf = false;
260  bool right_inf = false;
261  bool match_idx = false;
262  bool match_bool = false;
263 
264  if (nargin == 3)
265  {
266  std::string opt = args(2).string_value ();
267  left_inf = contains_char (opt, 'l');
268  right_inf = contains_char (opt, 'r');
269  match_idx = contains_char (opt, 'm');
270  match_bool = contains_char (opt, 'b');
271  if (opt.find_first_not_of ("lrmb") != std::string::npos)
272  {
273  error ("lookup: unrecognized option: %c",
274  opt[opt.find_first_not_of ("lrmb")]);
275  return retval;
276  }
277  }
278 
279  if ((match_idx || match_bool) && (left_inf || right_inf))
280  error ("lookup: m, b cannot be specified with l or r");
281  else if (match_idx && match_bool)
282  error ("lookup: only one of m or b can be specified");
283  else if (str_case && (left_inf || right_inf))
284  error ("lookup: l, r are not recognized for string lookups");
285 
286  if (error_state)
287  return retval;
288 
289  if (num_case)
290  {
291 
292  // In the case of a complex array, absolute values will be used for
293  // compatibility (though it's not too meaningful).
294 
295  if (table.is_complex_type ())
296  table = table.abs ();
297 
298  if (y.is_complex_type ())
299  y = y.abs ();
300 
302 
303  // PS: I learned this from data.cc
304  if INT_ARRAY_LOOKUP (int8)
305  else if INT_ARRAY_LOOKUP (int16)
306  else if INT_ARRAY_LOOKUP (int32)
307  else if INT_ARRAY_LOOKUP (int64)
308  else if INT_ARRAY_LOOKUP (uint8)
309  else if INT_ARRAY_LOOKUP (uint16)
310  else if INT_ARRAY_LOOKUP (uint32)
311  else if INT_ARRAY_LOOKUP (uint64)
312  else if (table.is_char_matrix () && y.is_char_matrix ())
313  retval = do_numeric_lookup (table.char_array_value (),
314  y.char_array_value (),
315  left_inf, right_inf,
316  match_idx, match_bool);
317  else if (table.is_single_type () || y.is_single_type ())
318  retval = do_numeric_lookup (table.float_array_value (),
319  y.float_array_value (),
320  left_inf, right_inf,
321  match_idx, match_bool);
322  else
323  retval = do_numeric_lookup (table.array_value (),
324  y.array_value (),
325  left_inf, right_inf,
326  match_idx, match_bool);
327 
328  }
329  else if (str_case)
330  {
331  Array<std::string> str_table = table.cellstr_value ();
332  Array<std::string> str_y (dim_vector (1, 1));
333 
334  if (y.is_cellstr ())
335  str_y = y.cellstr_value ();
336  else
337  str_y(0) = y.string_value ();
338 
339  Array<octave_idx_type> idx = str_table.lookup (str_y);
340  octave_idx_type nval = str_y.numel ();
341 
342  // Post-process.
343  if (match_bool)
344  {
345  boolNDArray match (idx.dims ());
346  for (octave_idx_type i = 0; i < nval; i++)
347  {
348  octave_idx_type j = idx.xelem (i);
349  match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
350  }
351 
352  retval = match;
353  }
354  else if (match_idx)
355  {
356  NDArray ridx (idx.dims ());
357  if (match_idx)
358  {
359  for (octave_idx_type i = 0; i < nval; i++)
360  {
361  octave_idx_type j = idx.xelem (i);
362  ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1)) ? j
363  : 0;
364  }
365  }
366 
367  retval = ridx;
368  }
369  else
370  retval = idx;
371  }
372  else
373  print_usage ();
374 
375  return retval;
376 
377 }
378 
379 /*
380 %!assert (lookup (1:3, 0.5), 0) # value before table
381 %!assert (lookup (1:3, 3.5), 3) # value after table error
382 %!assert (lookup (1:3, 1.5), 1) # value within table error
383 %!assert (lookup (1:3, [3,2,1]), [3,2,1])
384 %!assert (lookup ([1:4]', [1.2, 3.5]'), [1, 3]')
385 %!assert (lookup ([1:4], [1.2, 3.5]'), [1, 3]')
386 %!assert (lookup ([1:4]', [1.2, 3.5]), [1, 3])
387 %!assert (lookup ([1:4], [1.2, 3.5]), [1, 3])
388 %!assert (lookup (1:3, [3, 2, 1]), [3, 2, 1])
389 %!assert (lookup ([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
390 %!assert (isempty (lookup ([1:3], [])))
391 %!assert (isempty (lookup ([1:3]', [])))
392 %!assert (lookup (1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0])
393 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "m"), [1, 0; 3, 0])
394 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "m"), [4, 0; 2, 0])
395 %!assert (lookup (1:4, [1, 1.2; 3, 2.5], "b"), logical ([1, 0; 3, 0]))
396 %!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "b"), logical ([4, 0; 2, 0]))
397 %!
398 %!assert (lookup ({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"}), [1,1;0,2])
399 %!assert (lookup ({"apple","lemon","orange"}, "potato"), 3)
400 %!assert (lookup ({"orange","lemon","apple"}, "potato"), 0)
401 */
charNDArray char_array_value(bool frc_str_conv=false) const
Definition: ov.h:817
int ndims(void) const
Definition: ov.h:479
octave_idx_type rows(void) const
Definition: ov.h:473
OCTINTERP_API void print_usage(void)
Definition: defun.cc:51
sortmode
Definition: oct-sort.h:103
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:275
sortmode get_sort_mode(const Array< T > &array, typename octave_sort< T >::compare_fcn_type desc_comp=octave_sort< T >::descending_compare)
Definition: lookup.cc:88
bool is_numeric_type(void) const
Definition: ov.h:663
#define DEFUN(name, args_name, nargout_name, doc)
Definition: defun.h:44
void error(const char *fmt,...)
Definition: error.cc:476
octave_idx_type lookup(const T *x, octave_idx_type n, T y)
static octave_value do_numeric_lookup(const ArrayT &array, const ArrayT &values, bool left_inf, bool right_inf, bool match_idx, bool match_bool)
Definition: lookup.cc:111
octave_idx_type lookup(const T &value, sortmode mode=UNSORTED) const
Do a binary lookup in a sorted array.
Definition: Array.cc:2166
Array< std::string > cellstr_value(void) const
Definition: ov.h:900
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:337
octave_idx_type columns(void) const
Definition: ov.h:475
FloatNDArray float_array_value(bool frc_str_conv=false) const
Definition: ov.h:782
bool is_char_matrix(void) const
Definition: ov.h:553
std::string string_value(bool force=false) const
Definition: ov.h:897
bool is_string(void) const
Definition: ov.h:562
int error_state
Definition: error.cc:101
bool is_complex_type(void) const
Definition: ov.h:654
bool is_cellstr(void) const
Definition: ov.h:532
octave_value abs(void) const
Definition: ov.h:1158
octave_idx_type length(void) const
Definition: ov.cc:1525
T & xelem(octave_idx_type n)
Definition: Array.h:353
void warning(const char *fmt,...)
Definition: error.cc:681
Handles the reference counting for all the derived classes.
Definition: Array.h:45
charNDArray max(char d, const charNDArray &m)
Definition: chNDArray.cc:233
static bool contains_char(const std::string &str, char c)
Definition: lookup.cc:45
NDArray array_value(bool frc_str_conv=false) const
Definition: ov.h:779
bool operator()(char x, char y) const
Definition: lookup.cc:60
#define INT_ARRAY_LOOKUP(TYPE)
Definition: lookup.cc:103
static bool match(const std::string &filename_arg, const std::string &path_elt_arg)
Definition: kpse.cc:1738
bool is_single_type(void) const
Definition: ov.h:611
bool operator()(char x, char y) const
Definition: lookup.cc:54
F77_RET_T const double * x
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:210