GNU Octave  4.2.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
strfind.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2009-2017 Jaroslav Hajek
4 Copyright (C) 2009-2010 VZLU Prague
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #if defined (HAVE_CONFIG_H)
25 # include "config.h"
26 #endif
27 
28 #include <algorithm>
29 #include <deque>
30 #include <limits>
31 #include <string>
32 
33 #include "oct-locbuf.h"
34 
35 #include "Cell.h"
36 #include "ov.h"
37 #include "defun.h"
38 #include "unwind-prot.h"
39 #include "errwarn.h"
40 #include "utils.h"
41 
42 // This allows safe indexing with char.
43 // In C++, char may be (and often is) signed!
44 #define ORD(ch) static_cast<unsigned char>(ch)
45 #define TABSIZE (std::numeric_limits<unsigned char>::max () + 1)
46 
47 // This is the quick search algorithm, as described at
48 // http://www-igm.univ-mlv.fr/~lecroq/string/node19.html
49 static void
50 qs_preprocess (const Array<char>& needle,
51  octave_idx_type *table)
52 {
53  const char *x = needle.data ();
54  octave_idx_type m = needle.numel ();
55 
56  for (octave_idx_type i = 0; i < TABSIZE; i++)
57  table[i] = m + 1;
58  for (octave_idx_type i = 0; i < m; i++)
59  table[ORD(x[i])] = m - i;
60 }
61 
63 qs_search (const Array<char>& needle,
64  const Array<char>& haystack,
65  const octave_idx_type *table,
66  bool overlaps = true)
67 {
68  const char *x = needle.data ();
69  octave_idx_type m = needle.numel ();
70  const char *y = haystack.data ();
71  octave_idx_type n = haystack.numel ();
72 
73  // We'll use deque because it typically has the most favorable properties for
74  // the operation we need.
75  std::deque<octave_idx_type> accum;
76  if (m == 1)
77  {
78  // Looking for a single character.
79  for (octave_idx_type i = 0; i < n; i++)
80  {
81  if (y[i] == x[0])
82  accum.push_back (i);
83  }
84  }
85  else if (m == 2)
86  {
87  // Two characters.
88  if (overlaps)
89  {
90  for (octave_idx_type i = 0; i < n-1; i++)
91  {
92  if (y[i] == x[0] && y[i+1] == x[1])
93  accum.push_back (i);
94  }
95  }
96  else
97  {
98  for (octave_idx_type i = 0; i < n-1; i++)
99  {
100  if (y[i] == x[0] && y[i+1] == x[1])
101  accum.push_back (i++);
102  }
103  }
104  }
105  else if (n >= m)
106  {
107  // General case.
108  octave_idx_type j = 0;
109 
110  if (overlaps)
111  {
112  while (j < n - m)
113  {
114  if (std::equal (x, x + m, y + j))
115  accum.push_back (j);
116  j += table[ORD(y[j + m])];
117  }
118  }
119  else
120  {
121  while (j < n - m)
122  {
123  if (std::equal (x, x + m, y + j))
124  {
125  accum.push_back (j);
126  j += m;
127  }
128  else
129  j += table[ORD(y[j + m])];
130  }
131  }
132 
133  if (j == n - m && std::equal (x, x + m, y + j))
134  accum.push_back (j);
135  }
136 
137  octave_idx_type nmatch = accum.size ();
138  octave_idx_type one = 1;
139  Array<octave_idx_type> result (dim_vector (std::min (one, nmatch), nmatch));
140  octave_idx_type k = 0;
141  for (std::deque<octave_idx_type>::const_iterator iter = accum.begin ();
142  iter != accum.end (); iter++)
143  {
144  result.xelem (k++) = *iter;
145  }
146 
147  return result;
148 }
149 
150 DEFUN (strfind, args, ,
151  doc: /* -*- texinfo -*-
152 @deftypefn {} {@var{idx} =} strfind (@var{str}, @var{pattern})
153 @deftypefnx {} {@var{idx} =} strfind (@var{cellstr}, @var{pattern})
154 @deftypefnx {} {@var{idx} =} strfind (@dots{}, "overlaps", @var{val})
155 Search for @var{pattern} in the string @var{str} and return the starting
156 index of every such occurrence in the vector @var{idx}.
157 
158 If there is no such occurrence, or if @var{pattern} is longer than
159 @var{str}, or if @var{pattern} itself is empty, then @var{idx} is the empty
160 array @code{[]}.
161 
162 The optional argument @qcode{"overlaps"} determines whether the pattern
163 can match at every position in @var{str} (true), or only for unique
164 occurrences of the complete pattern (false). The default is true.
165 
166 If a cell array of strings @var{cellstr} is specified then @var{idx} is a
167 cell array of vectors, as specified above.
168 
169 Examples:
170 
171 @example
172 @group
173 strfind ("abababa", "aba")
174  @result{} [1, 3, 5]
175 
176 strfind ("abababa", "aba", "overlaps", false)
177  @result{} [1, 5]
178 
179 strfind (@{"abababa", "bebebe", "ab"@}, "aba")
180  @result{}
181  @{
182  [1,1] =
183 
184  1 3 5
185 
186  [1,2] = [](1x0)
187  [1,3] = [](1x0)
188  @}
189 @end group
190 @end example
191 @seealso{findstr, strmatch, regexp, regexpi, find}
192 @end deftypefn */)
193 {
194  int nargin = args.length ();
195 
196  if (nargin != 4 && nargin != 2)
197  print_usage ();
198 
199  bool overlaps = true;
200  if (nargin == 4)
201  {
202  if (! args(2).is_string () || ! args(3).is_scalar_type ())
203  error ("strfind: invalid optional arguments");
204 
205  std::string opt = args(2).string_value ();
206 
207  if (opt == "overlaps")
208  overlaps = args(3).bool_value ();
209  else
210  error ("strfind: unknown option: %s", opt.c_str ());
211  }
212 
214 
215  octave_value argstr = args(0);
216  octave_value argpat = args(1);
217 
218  if (argpat.is_string ())
219  {
220  Array<char> needle = argpat.char_array_value ();
222  qs_preprocess (needle, table);
223 
224  if (argstr.is_string ())
225  if (argpat.is_empty ())
226  // Return a null matrix for null pattern for MW compatibility
227  retval = Matrix ();
228  else
229  retval = octave_value (qs_search (needle,
230  argstr.char_array_value (),
231  table, overlaps),
232  true, true);
233  else if (argstr.is_cell ())
234  {
235  const Cell argsc = argstr.cell_value ();
236  Cell retc (argsc.dims ());
237  octave_idx_type ns = argsc.numel ();
238 
239  for (octave_idx_type i = 0; i < ns; i++)
240  {
241  octave_value argse = argsc(i);
242  if (! argse.is_string ())
243  error ("strfind: each element of CELLSTR must be a string");
244 
245  if (argpat.is_empty ())
246  retc(i) = Matrix ();
247  else
248  retc(i) = octave_value (qs_search (needle,
249  argse.char_array_value (),
250  table, overlaps),
251  true, true);
252  }
253 
254  retval = retc;
255  }
256  else
257  error ("strfind: first argument must be a string or cell array of strings");
258  }
259  else if (argpat.is_cell ())
260  retval = do_simple_cellfun (Fstrfind, "strfind", args);
261  else
262  error ("strfind: PATTERN must be a string or cell array of strings");
263 
264  return retval;
265 }
266 
267 /*
268 %!assert (strfind ("abababa", "aba"), [1, 3, 5])
269 %!assert (strfind ("abababa", "aba", "overlaps", false), [1, 5])
270 %!assert (strfind ({"abababa", "bla", "bla"}, "a"), {[1, 3, 5, 7], 3, 3})
271 %!assert (strfind ("Linux _is_ user-friendly. It just isn't ignorant-friendly or idiot-friendly.", "friendly"), [17, 50, 68])
272 %!assert (strfind ("abc", ""), [])
273 %!assert (strfind ("abc", {"", "b", ""}), {[], 2, []})
274 %!assert (strfind ({"abc", "def"}, ""), {[], []})
275 
276 %!error strfind ()
277 %!error strfind ("foo", "bar", 1)
278 %!error <unknown option: foobar> strfind ("foo", 100, "foobar", 1)
279 %!error <each element of CELLSTR must be a string> strfind ({"A", 1}, "foo")
280 %!error <first argument must be a string> strfind (100, "foo")
281 %!error <PATTERN must be a string> strfind ("foo", 100)
282 */
283 
284 static Array<char>
285 qs_replace (const Array<char>& str, const Array<char>& pat,
286  const Array<char>& rep,
287  const octave_idx_type *table,
288  bool overlaps = true)
289 {
290  Array<char> ret = str;
291 
292  octave_idx_type siz = str.numel ();
293  octave_idx_type psiz = pat.numel ();
294  octave_idx_type rsiz = rep.numel ();
295 
296  if (psiz != 0)
297  {
298  // Look up matches, without overlaps.
299  const Array<octave_idx_type> idx = qs_search (pat, str, table, overlaps);
300  octave_idx_type nidx = idx.numel ();
301 
302  if (nidx)
303  {
304  // Compute result size.
305  octave_idx_type retsiz;
306  if (overlaps)
307  {
308  retsiz = 0;
309  // OMG. Is this the "right answer" MW always looks for, or
310  // someone was just lazy?
311  octave_idx_type k = 0;
312  for (octave_idx_type i = 0; i < nidx; i++)
313  {
314  octave_idx_type j = idx(i);
315  if (j >= k)
316  retsiz += j - k;
317  retsiz += rsiz;
318  k = j + psiz;
319  }
320 
321  retsiz += siz - k;
322  }
323  else
324  retsiz = siz + nidx * (rsiz - psiz);
325 
326  if (retsiz == 0)
327  ret.clear (dim_vector (0, 0));
328  else
329  {
330  ret.clear (dim_vector (1, retsiz));
331  const char *src = str.data ();
332  const char *reps = rep.data ();
333  char *dest = ret.fortran_vec ();
334 
335  octave_idx_type k = 0;
336  for (octave_idx_type i = 0; i < nidx; i++)
337  {
338  octave_idx_type j = idx(i);
339  if (j >= k)
340  dest = std::copy (src + k, src + j, dest);
341  dest = std::copy (reps, reps + rsiz, dest);
342  k = j + psiz;
343  }
344 
345  std::copy (src + k, src + siz, dest);
346  }
347  }
348  }
349 
350  return ret;
351 }
352 
353 DEFUN (strrep, args, ,
354  doc: /* -*- texinfo -*-
355 @deftypefn {} {@var{newstr} =} strrep (@var{str}, @var{ptn}, @var{rep})
356 @deftypefnx {} {@var{newstr} =} strrep (@var{cellstr}, @var{ptn}, @var{rep})
357 @deftypefnx {} {@var{newstr} =} strrep (@dots{}, "overlaps", @var{val})
358 Replace all occurrences of the pattern @var{ptn} in the string @var{str}
359 with the string @var{rep} and return the result.
360 
361 The optional argument @qcode{"overlaps"} determines whether the pattern
362 can match at every position in @var{str} (true), or only for unique
363 occurrences of the complete pattern (false). The default is true.
364 
365 @var{s} may also be a cell array of strings, in which case the replacement
366 is done for each element and a cell array is returned.
367 
368 Example:
369 
370 @example
371 @group
372 strrep ("This is a test string", "is", "&%$")
373  @result{} "Th&%$ &%$ a test string"
374 @end group
375 @end example
376 
377 @seealso{regexprep, strfind, findstr}
378 @end deftypefn */)
379 {
380  int nargin = args.length ();
381 
382  if (nargin != 3 && nargin != 5)
383  print_usage ();
384 
385  bool overlaps = true;
386 
387  if (nargin == 5)
388  {
389  if (! args(3).is_string () || ! args(4).is_scalar_type ())
390  error ("strrep: invalid optional arguments");
391 
392  std::string opt = args(3).string_value ();
393  if (opt == "overlaps")
394  overlaps = args(4).bool_value ();
395  else
396  error ("strrep: unknown option: %s", opt.c_str ());
397  }
398 
400 
401  octave_value argstr = args(0);
402  octave_value argpat = args(1);
403  octave_value argrep = args(2);
404 
405  if (argpat.is_string () && argrep.is_string ())
406  {
407  const Array<char> pat = argpat.char_array_value ();
408  const Array<char> rep = argrep.char_array_value ();
409 
411  qs_preprocess (pat, table);
412 
413  if (argstr.is_string ())
414  retval = qs_replace (argstr.char_array_value (), pat, rep,
415  table, overlaps);
416  else if (argstr.is_cell ())
417  {
418  const Cell argsc = argstr.cell_value ();
419  Cell retc (argsc.dims ());
420  octave_idx_type ns = argsc.numel ();
421 
422  for (octave_idx_type i = 0; i < ns; i++)
423  {
424  octave_value argse = argsc(i);
425  if (argse.is_string ())
426  retc(i) = qs_replace (argse.char_array_value (), pat, rep,
427  table, overlaps);
428  else
429  error ("strrep: each element of S must be a string");
430  }
431 
432  retval = retc;
433  }
434  else
435  error ("strrep: S must be a string or cell array of strings");
436  }
437  else if (argpat.is_cell () || argrep.is_cell ())
438  retval = do_simple_cellfun (Fstrrep, "strrep", args);
439  else
440  error ("strrep: PTN and REP arguments must be strings or cell arrays of strings");
441 
442  return retval;
443 }
444 
445 /*
446 %!assert (strrep ("This is a test string", "is", "&%$"),
447 %! "Th&%$ &%$ a test string")
448 %!assert (strrep ("abababc", "abab", "xyz"), "xyzxyzc")
449 %!assert (strrep ("abababc", "abab", "xyz", "overlaps", false), "xyzabc")
450 
451 %!assert (size (strrep ("a", "a", "")), [0 0])
452 
453 %!error strrep ()
454 %!error strrep ("foo", "bar", 3, 4)
455 */
Definition: Cell.h:37
charNDArray char_array_value(bool frc_str_conv=false) const
Definition: ov.h:831
OCTINTERP_API void print_usage(void)
Definition: defun.cc:52
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:363
bool is_scalar_type(void) const
Definition: ov.h:673
for large enough k
Definition: lu.cc:606
static Array< octave_idx_type > qs_search(const Array< char > &needle, const Array< char > &haystack, const octave_idx_type *table, bool overlaps=true)
Definition: strfind.cc:63
#define DEFUN(name, args_name, nargout_name, doc)
Definition: defun.h:46
void error(const char *fmt,...)
Definition: error.cc:570
bool is_cell(void) const
Definition: ov.h:545
OCTAVE_EXPORT octave_value_list Fstrfind(const octave_value_list &args, int) or if ar
Definition: strfind.cc:192
octave_base_value * rep
Definition: ov.h:1437
Cell cell_value(void) const
Definition: ov.cc:1687
JNIEnv void * args
Definition: ov-java.cc:67
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:439
nd deftypefn *octave_map m
Definition: ov-struct.cc:2058
int nargin
Definition: graphics.cc:10115
bool is_string(void) const
Definition: ov.h:578
std::string str
Definition: hash.cc:118
const T * data(void) const
Definition: Array.h:582
octave_value retval
Definition: data.cc:6294
Definition: dMatrix.h:37
#define ORD(ch)
Definition: strfind.cc:44
With real return the complex result
Definition: data.cc:3375
T & xelem(octave_idx_type n)
Definition: Array.h:455
void clear(void)
Definition: Array.cc:95
bool is_empty(void) const
Definition: ov.h:542
=val(i)}if ode{val(i)}occurs in table i
Definition: lookup.cc:239
OCTINTERP_API octave_value_list do_simple_cellfun(octave_value_list(*fun)(const octave_value_list &, int), const char *fun_name, const octave_value_list &args, int nargout)
the element is set to zero In other the statement xample y
Definition: data.cc:5342
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:200
static void qs_preprocess(const Array< char > &needle, octave_idx_type *table)
Definition: strfind.cc:50
const T * fortran_vec(void) const
Definition: Array.h:584
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:854
#define TABSIZE
Definition: strfind.cc:45
OCTINTERP_API octave_value_list Fstrrep(const octave_value_list &=octave_value_list(), int=0)
F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T const F77_REAL const F77_REAL F77_REAL &F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T const F77_DBLE F77_DBLE &F77_RET_T const F77_REAL F77_REAL &F77_RET_T F77_REAL F77_REAL &F77_RET_T F77_DBLE F77_DBLE &F77_RET_T const F77_DBLE * x
return octave_value(v1.char_array_value().concat(v2.char_array_value(), ra_idx),((a1.is_sq_string()||a2.is_sq_string())? '\'': '"'))
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:205