GNU Octave  4.4.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
regexp.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2005-2018 David Bateman
4 Copyright (C) 2002-2005 Paul Kienzle
5 
6 This file is part of Octave.
7 
8 Octave is free software: you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <https://www.gnu.org/licenses/>.
21 
22 */
23 
24 #if defined (HAVE_CONFIG_H)
25 # include "config.h"
26 #endif
27 
28 #include <list>
29 #include <sstream>
30 
31 #include <pcre.h>
32 
33 #include "base-list.h"
34 #include "oct-locbuf.h"
35 #include "quit.h"
36 #include "lo-regexp.h"
37 #include "str-vec.h"
38 
39 #include "defun.h"
40 #include "Cell.h"
41 #include "error.h"
42 #include "errwarn.h"
43 #include "oct-map.h"
44 #include "ovl.h"
45 #include "utils.h"
46 
47 // Replace backslash escapes in a string with the real values. We need
48 // two special functions instead of the one in utils.cc because the set
49 // of escape sequences used for regexp patterns and replacement strings
50 // is different from those used in the *printf functions.
51 
52 static std::string
53 do_regexp_ptn_string_escapes (const std::string& s, bool is_sq_str)
54 {
56 
57  size_t i = 0;
58  size_t j = 0;
59  size_t len = s.length ();
60 
61  retval.resize (len);
62 
63  while (j < len)
64  {
65  if (s[j] == '\\' && j+1 < len)
66  {
67  switch (s[++j])
68  {
69  case 'b': // backspace
70  if (is_sq_str)
71  retval[i] = '\b';
72  else
73  {
74  // Pass escape sequence through
75  retval[i] = '\\';
76  retval[++i] = 'b';
77  }
78  break;
79 
80  // Translate < and > to PCRE word boundary
81  case '<': // begin word boundary
82  case '>': // end word boundary
83  retval[i] = '\\';
84  retval[++i] = 'b';
85  break;
86 
87  case 'o': // octal input
88  {
89  bool bad_esc_seq = (j+1 >= len);
90 
91  bool brace = false;
92  if (! bad_esc_seq && s[++j] == '{')
93  {
94  brace = true;
95  j++;
96  }
97 
98  int tmpi = 0;
99  size_t k;
100  for (k = j; k < std::min (j+3+brace, len); k++)
101  {
102  int digit = s[k] - '0';
103  if (digit < 0 || digit > 7)
104  break;
105  tmpi <<= 3;
106  tmpi += digit;
107  }
108  if (bad_esc_seq || (brace && s[k++] != '}'))
109  {
110  bad_esc_seq = true;
111  tmpi = 0;
112  warning (R"(malformed octal escape sequence '\o' -- converting to '\0')");
113  }
114  retval[i] = tmpi;
115  j = k - 1;
116  break;
117  }
118 
119  default: // pass escape sequence through
120  retval[i] = '\\';
121  retval[++i] = s[j];
122  break;
123  }
124  }
125  else
126  {
127  retval[i] = s[j];
128  }
129 
130  i++;
131  j++;
132  }
133 
134  retval.resize (i);
135 
136  return retval;
137 }
138 
139 static std::string
141 {
143 
144  size_t i = 0;
145  size_t j = 0;
146  size_t len = s.length ();
147 
148  retval.resize (len);
149 
150  while (j < len)
151  {
152  if (s[j] == '\\' && j+1 < len)
153  {
154  switch (s[++j])
155  {
156  case 'a': // alarm
157  retval[i] = '\a';
158  break;
159 
160  case 'b': // backspace
161  retval[i] = '\b';
162  break;
163 
164  case 'f': // formfeed
165  retval[i] = '\f';
166  break;
167 
168  case 'n': // newline
169  retval[i] = '\n';
170  break;
171 
172  case 'r': // carriage return
173  retval[i] = '\r';
174  break;
175 
176  case 't': // horizontal tab
177  retval[i] = '\t';
178  break;
179 
180  case 'v': // vertical tab
181  retval[i] = '\v';
182  break;
183 
184  case '0':
185  case '1':
186  case '2':
187  case '3':
188  case '4':
189  case '5':
190  case '6':
191  case '7': // octal input
192  {
193  size_t k;
194  int tmpi = s[j] - '0';
195  for (k = j+1; k < std::min (j+3, len); k++)
196  {
197  int digit = s[k] - '0';
198  if (digit < 0 || digit > 7)
199  break;
200  tmpi <<= 3;
201  tmpi += digit;
202  }
203  retval[i] = tmpi;
204  j = k - 1;
205  break;
206  }
207 
208  case 'o': // octal input
209  {
210  bool bad_esc_seq = (j+1 >= len);
211 
212  bool brace = false;
213  if (! bad_esc_seq && s[++j] == '{')
214  {
215  brace = true;
216  j++;
217  }
218 
219  int tmpi = 0;
220  size_t k;
221  for (k = j; k < std::min (j+3+brace, len); k++)
222  {
223  int digit = s[k] - '0';
224  if (digit < 0 || digit > 7)
225  break;
226  tmpi <<= 3;
227  tmpi += digit;
228  }
229  if (bad_esc_seq || (brace && s[k++] != '}'))
230  {
231  warning (R"(malformed octal escape sequence '\o' -- converting to '\0')");
232  tmpi = 0;
233  }
234  retval[i] = tmpi;
235  j = k - 1;
236  break;
237  }
238 
239  case 'x': // hex input
240  {
241  bool bad_esc_seq = (j+1 >= len);
242 
243  bool brace = false;
244  if (! bad_esc_seq && s[++j] == '{')
245  {
246  brace = true;
247  j++;
248  }
249 
250  int tmpi = 0;
251  size_t k;
252  for (k = j; k < std::min (j+2+brace, len); k++)
253  {
254  if (! isxdigit (s[k]))
255  break;
256 
257  tmpi <<= 4;
258  int digit = s[k];
259  if (digit >= 'a')
260  tmpi += digit - 'a' + 10;
261  else if (digit >= 'A')
262  tmpi += digit - 'A' + 10;
263  else
264  tmpi += digit - '0';
265  }
266  if (bad_esc_seq || (brace && s[k++] != '}'))
267  {
268  warning (R"(malformed hex escape sequence '\x' -- converting to '\0')");
269  tmpi = 0;
270  }
271  retval[i] = tmpi;
272  j = k - 1;
273  break;
274  }
275 
276  // Both dollar sign (for capture buffer) and backslash are
277  // passed through with their escape backslash. The processing
278  // for these must occur during the actual replacement operation
279  // in lo-regexp.cc.
280  case '$': // pass dollar sign through with escape
281  retval[i] = '\\'; retval[++i] = '$';
282  break;
283 
284  case '\\': // pass backslash through with escape
285  retval[i] = '\\'; retval[++i] = '\\';
286  break;
287 
288  default: // convert escaped character to unescaped char
289  retval[i] = s[j];
290  break;
291  }
292  }
293  else
294  {
295  retval[i] = s[j];
296  }
297 
298  i++;
299  j++;
300  }
301 
302  retval.resize (i);
303 
304  return retval;
305 }
306 
307 static void
309  const std::string& who, int skip, bool& extra_args)
310 {
311  extra_args = false;
312 
313  for (int i = skip; i < args.length (); i++)
314  {
316 
317  str = args(i).xstring_value ("%s: optional arguments must be strings", who.c_str ());
318 
319  std::transform (str.begin (), str.end (), str.begin (), tolower);
320 
321  if (str.find ("once", 0) == 0)
322  options.once (true);
323  else if (str.find ("matchcase", 0) == 0)
324  options.case_insensitive (false);
325  else if (str.find ("ignorecase", 0) == 0)
326  options.case_insensitive (true);
327  else if (str.find ("dotall", 0) == 0)
328  options.dotexceptnewline (false);
329  else if (str.find ("stringanchors", 0) == 0)
330  options.lineanchors (false);
331  else if (str.find ("literalspacing", 0) == 0)
332  options.freespacing (false);
333  else if (str.find ("noemptymatch", 0) == 0)
334  options.emptymatch (false);
335  else if (str.find ("dotexceptnewline", 0) == 0)
336  options.dotexceptnewline (true);
337  else if (str.find ("lineanchors", 0) == 0)
338  options.lineanchors (true);
339  else if (str.find ("freespacing", 0) == 0)
340  options.freespacing (true);
341  else if (str.find ("emptymatch", 0) == 0)
342  options.emptymatch (true);
343  else if (str.find ("start", 0) == 0
344  || str.find ("end", 0) == 0
345  || str.find ("tokenextents", 0) == 0
346  || str.find ("match", 0) == 0
347  || str.find ("tokens", 0) == 0
348  || str.find ("names", 0) == 0
349  || str.find ("split", 0) == 0)
350  extra_args = true;
351  else
352  error ("%s: unrecognized option", who.c_str ());
353  }
354 }
355 
356 static octave_value_list
358  const std::string& who, bool case_insensitive = false)
359 {
361 
362  int nargin = args.length ();
363 
364  // Make sure we have string, pattern
365  const std::string buffer = args(0).string_value ();
366 
367  std::string pattern = args(1).string_value ();
368 
369  // Rewrite pattern for PCRE
370  pattern = do_regexp_ptn_string_escapes (pattern, args(1).is_sq_string ());
371 
373  options.case_insensitive (case_insensitive);
374  bool extra_options = false;
375  parse_options (options, args, who, 2, extra_options);
376 
378  = octave::regexp::match (pattern, buffer, options, who);
379 
380  string_vector named_pats = rx_lst.named_patterns ();
381 
382  size_t sz = rx_lst.size ();
383 
384  // Converted the linked list in the correct form for the return values
385 
386  octave_map nmap (dim_vector ((sz == 0 ? 0 : 1), sz), named_pats);
387 
388  retval.resize (7);
389 
390  if (sz != 0)
391  {
392  for (int j = 0; j < named_pats.numel (); j++)
393  {
394  Cell ctmp (dim_vector (1, sz));
395  octave_idx_type i = 0;
396 
397  for (const auto& match_data : rx_lst)
398  {
399  string_vector named_tokens = match_data.named_tokens ();
400 
401  ctmp(i++) = named_tokens(j);
402  }
403 
404  nmap.assign (named_pats(j), ctmp);
405  }
406  }
407  retval(5) = nmap;
408 
409  if (options.once ())
410  {
412 
413  retval(4) = (sz ? p->tokens () : Cell ());
414  retval(3) = (sz ? p->match_string () : "");
415  retval(2) = (sz ? p->token_extents () : Matrix ());
416 
417  if (sz)
418  {
419  double start = p->start ();
420  double end = p->end ();
421 
422  Cell split (dim_vector (1, 2));
423  split(0) = buffer.substr (0, start-1);
424  split(1) = buffer.substr (end);
425 
426  retval(6) = split;
427  retval(1) = end;
428  retval(0) = start;
429  }
430  else
431  {
432  retval(6) = buffer;
433  retval(1) = Matrix ();
434  retval(0) = Matrix ();
435  }
436  }
437  else
438  {
439  Cell tokens (dim_vector (1, sz));
440  Cell match_string (dim_vector (1, sz));
441  Cell token_extents (dim_vector (1, sz));
442  NDArray end (dim_vector (1, sz));
443  NDArray start (dim_vector (1, sz));
444  Cell split (dim_vector (1, sz+1));
445  size_t sp_start = 0;
446 
447  octave_idx_type i = 0;
448  for (const auto& match_data : rx_lst)
449  {
450  double s = match_data.start ();
451  double e = match_data.end ();
452 
453  string_vector tmp = match_data.tokens ();
454  tokens(i) = Cell (dim_vector (1, tmp.numel ()), tmp);
455  match_string(i) = match_data.match_string ();
456  token_extents(i) = match_data.token_extents ();
457  end(i) = e;
458  start(i) = s;
459  split(i) = buffer.substr (sp_start, s-sp_start-1);
460  sp_start = e;
461  i++;
462  }
463 
464  split(i) = buffer.substr (sp_start);
465 
466  retval(6) = split;
467  retval(4) = tokens;
468  retval(3) = match_string;
469  retval(2) = token_extents;
470  retval(1) = end;
471  retval(0) = start;
472  }
473 
474  // Alter the order of the output arguments
475 
476  if (extra_options)
477  {
478  int n = 0;
479  octave_value_list new_retval;
480  new_retval.resize (nargout);
481 
482  int arg_used[7] {};
483 
484  for (int j = 2; j < nargin; j++)
485  {
486  int k = 0;
487  std::string str = args(j).string_value ();
488  std::transform (str.begin (), str.end (), str.begin (), tolower);
489 
490  if (str.find ("once", 0) == 0
491  || str.find ("stringanchors", 0) == 0
492  || str.find ("lineanchors", 0) == 0
493  || str.find ("matchcase", 0) == 0
494  || str.find ("ignorecase", 0) == 0
495  || str.find ("dotall", 0) == 0
496  || str.find ("dotexceptnewline", 0) == 0
497  || str.find ("literalspacing", 0) == 0
498  || str.find ("freespacing", 0) == 0
499  || str.find ("noemptymatch", 0) == 0
500  || str.find ("emptymatch", 0) == 0)
501  continue;
502  else if (str.find ("start", 0) == 0)
503  k = 0;
504  else if (str.find ("end", 0) == 0)
505  k = 1;
506  else if (str.find ("tokenextents", 0) == 0)
507  k = 2;
508  else if (str.find ("match", 0) == 0)
509  k = 3;
510  else if (str.find ("tokens", 0) == 0)
511  k = 4;
512  else if (str.find ("names", 0) == 0)
513  k = 5;
514  else if (str.find ("split", 0) == 0)
515  k = 6;
516 
517  new_retval(n++) = retval(k);
518  arg_used[k] = true;
519 
520  if (n == nargout)
521  break;
522  }
523 
524  // Fill in the rest of the arguments
525  if (n < nargout)
526  {
527  for (int j = 0; j < 7; j++)
528  {
529  if (! arg_used[j])
530  new_retval(n++) = retval(j);
531  }
532  }
533 
534  retval = new_retval;
535  }
536 
537  return retval;
538 }
539 
540 static octave_value_list
542  const std::string& who, bool case_insensitive = false)
543 {
545 
546  if (args(0).iscell ())
547  {
548  OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
549  octave_value_list new_args = args;
550  Cell cellstr = args(0).cell_value ();
551  if (args(1).iscell ())
552  {
553  Cell cellpat = args(1).cell_value ();
554 
555  if (cellpat.numel () == 1)
556  {
557  for (int j = 0; j < nargout; j++)
558  newretval[j].resize (cellstr.dims ());
559 
560  new_args(1) = cellpat(0);
561 
562  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
563  {
564  new_args(0) = cellstr(i);
565  octave_value_list tmp = octregexp (new_args, nargout, who,
566  case_insensitive);
567 
568  for (int j = 0; j < nargout; j++)
569  newretval[j](i) = tmp(j);
570  }
571  }
572  else if (cellstr.numel () == 1)
573  {
574  for (int j = 0; j < nargout; j++)
575  newretval[j].resize (cellpat.dims ());
576 
577  new_args(0) = cellstr(0);
578 
579  for (octave_idx_type i = 0; i < cellpat.numel (); i++)
580  {
581  new_args(1) = cellpat(i);
582  octave_value_list tmp = octregexp (new_args, nargout, who,
583  case_insensitive);
584 
585  for (int j = 0; j < nargout; j++)
586  newretval[j](i) = tmp(j);
587  }
588  }
589  else if (cellstr.numel () == cellpat.numel ())
590  {
591  if (cellstr.dims () != cellpat.dims ())
592  error ("%s: inconsistent cell array dimensions", who.c_str ());
593 
594  for (int j = 0; j < nargout; j++)
595  newretval[j].resize (cellstr.dims ());
596 
597  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
598  {
599  new_args(0) = cellstr(i);
600  new_args(1) = cellpat(i);
601 
602  octave_value_list tmp = octregexp (new_args, nargout, who,
603  case_insensitive);
604 
605  for (int j = 0; j < nargout; j++)
606  newretval[j](i) = tmp(j);
607  }
608  }
609  else
610  error ("regexp: cell array arguments must be scalar or equal size");
611  }
612  else
613  {
614  for (int j = 0; j < nargout; j++)
615  newretval[j].resize (cellstr.dims ());
616 
617  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
618  {
619  new_args(0) = cellstr(i);
620  octave_value_list tmp = octregexp (new_args, nargout, who,
621  case_insensitive);
622 
623  for (int j = 0; j < nargout; j++)
624  newretval[j](i) = tmp(j);
625  }
626  }
627 
628  for (int j = 0; j < nargout; j++)
629  retval(j) = octave_value (newretval[j]);
630  }
631  else if (args(1).iscell ())
632  {
633  OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
634  octave_value_list new_args = args;
635  Cell cellpat = args(1).cell_value ();
636 
637  for (int j = 0; j < nargout; j++)
638  newretval[j].resize (cellpat.dims ());
639 
640  for (octave_idx_type i = 0; i < cellpat.numel (); i++)
641  {
642  new_args(1) = cellpat(i);
643  octave_value_list tmp = octregexp (new_args, nargout, who,
644  case_insensitive);
645 
646  for (int j = 0; j < nargout; j++)
647  newretval[j](i) = tmp(j);
648  }
649 
650  for (int j = 0; j < nargout; j++)
651  retval(j) = octave_value (newretval[j]);
652  }
653  else
654  retval = octregexp (args, nargout, who, case_insensitive);
655 
656  return retval;
657 
658 }
659 
660 DEFUN (regexp, args, nargout,
661  doc: /* -*- texinfo -*-
662 @deftypefn {} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexp (@var{str}, @var{pat})
663 @deftypefnx {} {[@dots{}] =} regexp (@var{str}, @var{pat}, "@var{opt1}", @dots{})
664 Regular expression string matching.
665 
666 Search for @var{pat} in @var{str} and return the positions and substrings of
667 any matches, or empty values if there are none.
668 
669 The matched pattern @var{pat} can include any of the standard regex
670 operators, including:
671 
672 @table @code
673 @item .
674 Match any character
675 
676 @item * + ? @{@}
677 Repetition operators, representing
678 
679 @table @code
680 @item *
681 Match zero or more times
682 
683 @item +
684 Match one or more times
685 
686 @item ?
687 Match zero or one times
688 
689 @item @{@var{n}@}
690 Match exactly @var{n} times
691 
692 @item @{@var{n},@}
693 Match @var{n} or more times
694 
695 @item @{@var{m},@var{n}@}
696 Match between @var{m} and @var{n} times
697 @end table
698 
699 @item [@dots{}] [^@dots{}]
700 
701 List operators. The pattern will match any character listed between
702 @qcode{"["} and @qcode{"]"}. If the first character is @qcode{"^"} then the
703 pattern is inverted and any character except those listed between brackets
704 will match.
705 
706 Escape sequences defined below can also be used inside list operators. For
707 example, a template for a floating point number might be @code{[-+.\d]+}.
708 
709 @item () (?:)
710 Grouping operator. The first form, parentheses only, also creates a token.
711 
712 @item |
713 Alternation operator. Match one of a choice of regular expressions. The
714 alternatives must be delimited by the grouping operator @code{()} above.
715 
716 @item ^ $
717 Anchoring operators. Requires pattern to occur at the start (@code{^}) or
718 end (@code{$}) of the string.
719 @end table
720 
721 In addition, the following escaped characters have special meaning.
722 
723 @table @code
724 
725 @item \d
726 Match any digit
727 
728 @item \D
729 Match any non-digit
730 
731 @item \s
732 Match any whitespace character
733 
734 @item \S
735 Match any non-whitespace character
736 
737 @item \w
738 Match any word character
739 
740 @item \W
741 Match any non-word character
742 
743 @item <
744 Match the beginning of a word
745 
746 @item >
747 Match the end of a word
748 
749 @item \B
750 Match within a word
751 @end table
752 
753 Implementation Note: For compatibility with @sc{matlab}, escape sequences
754 in @var{pat} (e.g., @qcode{"@xbackslashchar{}n"} => newline) are expanded
755 even when @var{pat} has been defined with single quotes. To disable
756 expansion use a second backslash before the escape sequence (e.g.,
757 "@xbackslashchar{}@xbackslashchar{}n") or use the @code{regexptranslate}
758 function.
759 
760 The outputs of @code{regexp} default to the order given below
761 
762 @table @var
763 @item s
764 The start indices of each matching substring
765 
766 @item e
767 The end indices of each matching substring
768 
769 @item te
770 The extents of each matched token surrounded by @code{(@dots{})} in
771 @var{pat}
772 
773 @item m
774 A cell array of the text of each match
775 
776 @item t
777 A cell array of the text of each token matched
778 
779 @item nm
780 A structure containing the text of each matched named token, with the name
781 being used as the fieldname. A named token is denoted by
782 @code{(?<name>@dots{})}.
783 
784 @item sp
785 A cell array of the text not returned by match, i.e., what remains if you
786 split the string based on @var{pat}.
787 @end table
788 
789 Particular output arguments, or the order of the output arguments, can be
790 selected by additional @var{opt} arguments. These are strings and the
791 correspondence between the output arguments and the optional argument
792 are
793 
794 @multitable @columnfractions 0.2 0.3 0.3 0.2
795 @item @tab @qcode{'start'} @tab @var{s} @tab
796 @item @tab @qcode{'end'} @tab @var{e} @tab
797 @item @tab @qcode{'tokenExtents'} @tab @var{te} @tab
798 @item @tab @qcode{'match'} @tab @var{m} @tab
799 @item @tab @qcode{'tokens'} @tab @var{t} @tab
800 @item @tab @qcode{'names'} @tab @var{nm} @tab
801 @item @tab @qcode{'split'} @tab @var{sp} @tab
802 @end multitable
803 
804 Additional arguments are summarized below.
805 
806 @table @samp
807 @item once
808 Return only the first occurrence of the pattern.
809 
810 @item matchcase
811 Make the matching case sensitive. (default)
812 
813 Alternatively, use (?-i) in the pattern.
814 
815 @item ignorecase
816 Ignore case when matching the pattern to the string.
817 
818 Alternatively, use (?i) in the pattern.
819 
820 @item stringanchors
821 Match the anchor characters at the beginning and end of the string.
822 (default)
823 
824 Alternatively, use (?-m) in the pattern.
825 
826 @item lineanchors
827 Match the anchor characters at the beginning and end of the line.
828 
829 Alternatively, use (?m) in the pattern.
830 
831 @item dotall
832 The pattern @code{.} matches all characters including the newline character.
833  (default)
834 
835 Alternatively, use (?s) in the pattern.
836 
837 @item dotexceptnewline
838 The pattern @code{.} matches all characters except the newline character.
839 
840 Alternatively, use (?-s) in the pattern.
841 
842 @item literalspacing
843 All characters in the pattern, including whitespace, are significant and are
844 used in pattern matching. (default)
845 
846 Alternatively, use (?-x) in the pattern.
847 
848 @item freespacing
849 The pattern may include arbitrary whitespace and also comments beginning
850 with the character @samp{#}.
851 
852 Alternatively, use (?x) in the pattern.
853 
854 @item noemptymatch
855 Zero-length matches are not returned. (default)
856 
857 @item emptymatch
858 Return zero-length matches.
859 
860 @code{regexp ('a', 'b*', 'emptymatch')} returns @code{[1 2]} because there
861 are zero or more @qcode{'b'} characters at positions 1 and end-of-string.
862 
863 @end table
864 
865 Stack Limitation Note: Pattern searches are done with a recursive function
866 which can overflow the program stack when there are a high number of matches.
867 For example,
868 
869 @example
870 @code{regexp (repmat ('a', 1, 1e5), '(a)+')}
871 @end example
872 
873 @noindent
874 may lead to a segfault. As an alternative, consider constructing pattern
875 searches that reduce the number of matches (e.g., by creatively using set
876 complement), and then further processing the return variables (now reduced in
877 size) with successive @code{regexp} searches.
878 @seealso{regexpi, strfind, regexprep}
879 @end deftypefn */)
880 {
881  if (args.length () < 2)
882  print_usage ();
883 
885 
886  if (args(0).iscell () || args(1).iscell ())
887  retval = (octcellregexp (args, (nargout > 0 ? nargout : 1), "regexp"));
888  else
889  retval = octregexp (args, nargout, "regexp");
890 
891  return retval;
892 }
893 
894 /*
895 ## PCRE_ERROR_MATCHLIMIT test
896 %!test
897 %! s = sprintf ('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-');
898 %! ws = warning ("query");
899 %! unwind_protect
900 %! warning ("off");
901 %! regexp (s, '(\s*-*\d+[.]*\d*\s*)+\n');
902 %! unwind_protect_cleanup
903 %! warning (ws);
904 %! end_unwind_protect
905 
906 ## segfault test
907 %!assert (regexp ("abcde", "."), [1,2,3,4,5])
908 ## Infinite loop test
909 %!assert (isempty (regexp ("abcde", "")))
910 
911 ## Check that anchoring of pattern works correctly
912 %!assert (regexp ('abcabc', '^abc'), 1)
913 %!assert (regexp ('abcabc', 'abc$'), 4)
914 %!assert (regexp ('abcabc', '^abc$'), zeros (1,0))
915 
916 %!test
917 %! [s, e, te, m, t] = regexp (' No Match ', 'f(.*)uck');
918 %! assert (s, zeros (1,0));
919 %! assert (e, zeros (1,0));
920 %! assert (te, cell (1,0));
921 %! assert (m, cell (1,0));
922 %! assert (t, cell (1,0));
923 
924 %!test
925 %! [s, e, te, m, t] = regexp (' FiRetrUck ', 'f(.*)uck');
926 %! assert (s, zeros (1,0));
927 %! assert (e, zeros (1,0));
928 %! assert (te, cell (1,0));
929 %! assert (m, cell (1,0));
930 %! assert (t, cell (1,0));
931 
932 %!test
933 %! [s, e, te, m, t] = regexp (' firetruck ', 'f(.*)uck');
934 %! assert (s, 2);
935 %! assert (e, 10);
936 %! assert (te{1}, [3, 7]);
937 %! assert (m{1}, 'firetruck');
938 %! assert (t{1}{1}, 'iretr');
939 
940 %!test
941 %! [s, e, te, m, t] = regexp ('short test string', '\w*r\w*');
942 %! assert (s, [1, 12]);
943 %! assert (e, [5, 17]);
944 %! assert (size (te), [1, 2]);
945 %! assert (isempty (te{1}));
946 %! assert (isempty (te{2}));
947 %! assert (m{1}, 'short');
948 %! assert (m{2}, 'string');
949 %! assert (size (t), [1, 2]);
950 %! assert (isempty (t{1}));
951 %! assert (isempty (t{2}));
952 
953 %!test
954 %! [s, e, te, m, t] = regexp ('short test string', '\w*r\w*', 'once');
955 %! assert (s, 1);
956 %! assert (e, 5);
957 %! assert (isempty (te));
958 %! assert (m, 'short');
959 %! assert (isempty (t));
960 
961 %!test
962 %! [m, te, e, s, t] = regexp ('short test string', '\w*r\w*', 'once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
963 %! assert (s, 1);
964 %! assert (e, 5);
965 %! assert (isempty (te));
966 %! assert (m, 'short');
967 %! assert (isempty (t));
968 
969 %!test
970 %! [s, e, te, m, t, nm] = regexp ('short test string', '(?<word1>\w*t)\s*(?<word2>\w*t)');
971 %! assert (s, 1);
972 %! assert (e, 10);
973 %! assert (size (te), [1, 1]);
974 %! assert (te{1}, [1,5; 7,10]);
975 %! assert (m{1}, 'short test');
976 %! assert (size (t), [1, 1]);
977 %! assert (t{1}{1}, 'short');
978 %! assert (t{1}{2}, 'test');
979 %! assert (size (nm), [1, 1]);
980 %! assert (! isempty (fieldnames (nm)));
981 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
982 %! assert (nm.word1, 'short');
983 %! assert (nm.word2, 'test');
984 
985 %!test
986 %! [nm, m, te, e, s, t] = regexp ('short test string', '(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
987 %! assert (s, 1);
988 %! assert (e, 10);
989 %! assert (size (te), [1, 1]);
990 %! assert (te{1}, [1,5; 7,10]);
991 %! assert (m{1}, 'short test');
992 %! assert (size (t), [1, 1]);
993 %! assert (t{1}{1}, 'short');
994 %! assert (t{1}{2}, 'test');
995 %! assert (size (nm), [1, 1]);
996 %! assert (! isempty (fieldnames (nm)));
997 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
998 %! assert (nm.word1, 'short');
999 %! assert (nm.word2, 'test');
1000 
1001 %!test
1002 %! [t, nm] = regexp ("John Davis\nRogers, James", '(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)', 'tokens', 'names');
1003 %! assert (size (t), [1, 2]);
1004 %! assert (t{1}{1}, "John");
1005 %! assert (t{1}{2}, "Davis");
1006 %! assert (t{2}{1}, "Rogers");
1007 %! assert (t{2}{2}, "James");
1008 %! assert (size (nm), [1, 2]);
1009 %! assert (nm(1).first, "John");
1010 %! assert (nm(1).last, "Davis");
1011 %! assert (nm(2).first, "James");
1012 %! assert (nm(2).last, "Rogers");
1013 
1014 ## Tests for nulls in strings properly matching
1015 %!test
1016 %! str = "A\0B\0\0C";
1017 %! ptn = '(\0+)'; # also test null in single-quote pattern
1018 %! M = regexp (str, ptn, "match");
1019 %! assert (size (M), [1, 2]);
1020 %! assert (double (M{1}), [0]);
1021 %! assert (double (M{2}), [0, 0]);
1022 
1023 %!test
1024 %! str = "A\0B\0\0C";
1025 %! ptn = "(\0+)"; # also test null in double-quote pattern
1026 %! T = regexp (str, ptn, "tokens");
1027 %! assert (size (T), [1, 2]);
1028 %! assert (double (T{1}{1}), [0]);
1029 %! assert (double (T{2}{1}), [0, 0]);
1030 
1031 %!test
1032 %! str = "A\0B\0\0C";
1033 %! ptn = '(?<namedtoken>\0+)';
1034 %! NT = regexp (str, ptn, "names");
1035 %! assert (size (NT), [1, 2]);
1036 %! assert (double (NT(1).namedtoken), [0]);
1037 %! assert (double (NT(2).namedtoken), [0, 0]);
1038 
1039 ## Tests for named tokens
1040 %!test
1041 %! ## Parenthesis in named token (ie (int)) causes a problem
1042 %! assert (regexp ('qwe int asd', ['(?<typestr>(int))'], 'names'),
1043 %! struct ('typestr', 'int'));
1044 
1045 %!test <*35683>
1046 %! ## Mix of named and unnamed tokens can cause segfault
1047 %! str = "abcde";
1048 %! ptn = '(?<T1>a)(\w+)(?<T2>d\w+)';
1049 %! tokens = regexp (str, ptn, "names");
1050 %! assert (isstruct (tokens) && numel (tokens) == 1);
1051 %! assert (tokens.T1, "a");
1052 %! assert (tokens.T2, "de");
1053 
1054 ## Test options to regexp
1055 %!assert (regexp ("abc\nabc", '.'), [1:7])
1056 %!assert (regexp ("abc\nabc", '.', 'dotall'), [1:7])
1057 %!test
1058 %! assert (regexp ("abc\nabc", '(?s).'), [1:7]);
1059 %! assert (regexp ("abc\nabc", '.', 'dotexceptnewline'), [1,2,3,5,6,7]);
1060 %! assert (regexp ("abc\nabc", '(?-s).'), [1,2,3,5,6,7]);
1061 
1062 %!assert (regexp ("caseCaSe", 'case'), 1)
1063 %!assert (regexp ("caseCaSe", 'case', "matchcase"), 1)
1064 %!assert (regexp ("caseCaSe", 'case', "ignorecase"), [1,5])
1065 %!test
1066 %! assert (regexp ("caseCaSe", '(?-i)case'), 1);
1067 %! assert (regexp ("caseCaSe", '(?i)case'), [1, 5]);
1068 
1069 %!assert (regexp ("abc\nabc", 'c$'), 7)
1070 %!assert (regexp ("abc\nabc", 'c$', "stringanchors"), 7)
1071 %!test
1072 %! assert (regexp ("abc\nabc", '(?-m)c$'), 7);
1073 %! assert (regexp ("abc\nabc", 'c$',"lineanchors"), [3, 7]);
1074 %! assert (regexp ("abc\nabc", '(?m)c$'), [3,7]);
1075 
1076 %!assert (regexp ("this word", 's w'), 4)
1077 %!assert (regexp ("this word", 's w', 'literalspacing'), 4)
1078 %!test
1079 %! assert (regexp ("this word", '(?-x)s w', 'literalspacing'), 4);
1080 %! assert (regexp ("this word", 's w', 'freespacing'), zeros (1,0));
1081 %! assert (regexp ("this word", '(?x)s w'), zeros (1,0));
1082 
1083 %!test
1084 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '[VOCT]*', 'noemptymatch');
1085 %! assert (s, [1 5]);
1086 %! assert (e, [3 5]);
1087 %! assert (te, { zeros(0,2), zeros(0,2) });
1088 %! assert (m, { "OCT", "V" });
1089 %! assert (t, { cell(1,0), cell(1,0) });
1090 %! assert (isempty (fieldnames (nm)));
1091 %! assert (sp, { "", "A", "E" });
1092 
1093 %!test
1094 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '([VOCT]*)', 'noemptymatch');
1095 %! assert (s, [1 5]);
1096 %! assert (e, [3 5]);
1097 %! assert (te, { [1 3], [5 5] });
1098 %! assert (m, { "OCT", "V" });
1099 %! assert (t, { {"OCT"}, {"V"} });
1100 %! assert (isempty (fieldnames (nm)));
1101 %! assert (sp, { "", "A", "E" });
1102 
1103 %!test
1104 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '[VOCT]*', 'emptymatch');
1105 %! assert (s, [1 4 5 6 7]);
1106 %! assert (e, [3 3 5 5 6]);
1107 %! assert (te, repmat ({zeros(0,2)}, [1, 5]));
1108 %! assert (m, { "OCT", "", "V", "", "" });
1109 %! assert (t, repmat({cell(1,0)}, [1, 5]));
1110 %! assert (isempty (fieldnames (nm)));
1111 %! assert (sp, { "", "", "A", "", "E", "" });
1112 
1113 %!test
1114 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '([VOCT]*)', 'emptymatch');
1115 %! assert (s, [1 4 5 6 7]);
1116 %! assert (e, [3 3 5 5 6]);
1117 %! assert (te, { [1 3], [4 3], [5 5], [6 5], [7 6] });
1118 %! assert (m, { "OCT", "", "V", "", "" });
1119 %! assert (t, { {"OCT"}, {""}, {"V"}, {""}, {""} });
1120 %! assert (isempty (fieldnames (nm)));
1121 %! assert (sp, { "", "", "A", "", "E", "" });
1122 
1123 %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1,0)})
1124 %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]})
1125 %!assert (regexp ('Strings', {'t','s'}), {2, 7})
1126 
1127 ## Test case for lookaround operators
1128 %!test
1129 %! assert (regexp ('Iraq', 'q(?!u)'), 4);
1130 %! assert (regexp ('quit', 'q(?!u)'), zeros (1, 0));
1131 %! assert (regexp ('quit', 'q(?=u)' , 'match'), {'q'});
1132 %! assert (regexp ("quit", 'q(?=u+)', 'match'), {'q'});
1133 %! assert (regexp ("qit", 'q(?=u+)', 'match'), cell (1, 0));
1134 %! assert (regexp ("qit", 'q(?=u*)', 'match'), {'q'});
1135 %! assert (regexp ('thingamabob', '(?<=a)b'), 9);
1136 
1137 ## Tests for split option.
1138 %!shared str
1139 %! str = "foo bar foo";
1140 %!test
1141 %! [a, b] = regexp (str, "f..", "match", "split");
1142 %! assert (a, {"foo", "foo"});
1143 %! assert (b, {"", " bar ", ""});
1144 %!test
1145 %! [a, b] = regexp (str, "f..", "match", "split", "once");
1146 %! assert (a, "foo");
1147 %! assert (b, {"", " bar foo"});
1148 %!test
1149 %! [a, b] = regexp (str, "fx.", "match", "split");
1150 %! assert (a, cell (1, 0));
1151 %! assert (b, {"foo bar foo"});
1152 %!test
1153 %! [a, b] = regexp (str, "fx.", "match", "split", "once");
1154 %! assert (a, "");;
1155 %! assert (b, "foo bar foo");
1156 
1157 %!shared str
1158 %! str = "foo bar";
1159 %!test
1160 %! [a, b] = regexp (str, "f..", "match", "split");
1161 %! assert (a, {"foo"});
1162 %! assert (b, {"", " bar"});
1163 %!test
1164 %! [a, b] = regexp (str, "b..", "match", "split");
1165 %! assert (a, {"bar"});
1166 %! assert (b, {"foo ", ""});
1167 %!test
1168 %! [a, b] = regexp (str, "x", "match", "split");
1169 %! assert (a, cell (1, 0));
1170 %! assert (b, {"foo bar"});
1171 %!test
1172 %! [a, b] = regexp (str, "[o]+", "match", "split");
1173 %! assert (a, {"oo"});
1174 %! assert (b, {"f", " bar"});
1175 
1176 ## Test escape sequences are expanded even in single-quoted strings
1177 %!assert (regexp ("\n", '\n'), 1)
1178 %!assert (regexp ("\n", "\n"), 1)
1179 
1180 # Test escape sequences are silently converted
1181 %!test <*45407>
1182 %! assert (regexprep ('s', 's', 'x\.y'), 'x.y');
1183 %! assert (regexprep ('s', '(s)', 'x\$1y'), 'x$1y');
1184 %! assert (regexprep ('s', '(s)', 'x\\$1y'), 'x\sy');
1185 
1186 ## Test input validation
1187 %!error regexp ('string', 'tri', 'BadArg')
1188 %!error regexp ('string')
1189 
1190 */
1191 
1192 DEFUN (regexpi, args, nargout,
1193  doc: /* -*- texinfo -*-
1194 @deftypefn {} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexpi (@var{str}, @var{pat})
1195 @deftypefnx {} {[@dots{}] =} regexpi (@var{str}, @var{pat}, "@var{opt1}", @dots{})
1196 
1197 Case insensitive regular expression string matching.
1198 
1199 Search for @var{pat} in @var{str} and return the positions and substrings of
1200 any matches, or empty values if there are none. @xref{XREFregexp,,regexp},
1201 for details on the syntax of the search pattern.
1202 @seealso{regexp}
1203 @end deftypefn */)
1204 {
1205  if (args.length () < 2)
1206  print_usage ();
1207 
1208  if (args(0).iscell () || args(1).iscell ())
1209  return octcellregexp (args, (nargout > 0 ? nargout : 1), "regexpi", true);
1210  else
1211  return octregexp (args, nargout, "regexpi", true);
1212 }
1213 
1214 /*
1215 ## segfault test
1216 %!assert (regexpi ("abcde", "."), [1,2,3,4,5])
1217 
1218 ## Check that anchoring of pattern works correctly
1219 %!assert (regexpi ('abcabc', '^ABC'), 1)
1220 %!assert (regexpi ('abcabc', 'ABC$'), 4)
1221 %!assert (regexpi ('abcabc', '^ABC$'), zeros (1,0))
1222 
1223 %!test
1224 %! [s, e, te, m, t] = regexpi (' No Match ', 'f(.*)uck');
1225 %! assert (s, zeros (1,0));
1226 %! assert (e, zeros (1,0));
1227 %! assert (te, cell (1,0));
1228 %! assert (m, cell (1,0));
1229 %! assert (t, cell (1,0));
1230 
1231 %!test
1232 %! [s, e, te, m, t] = regexpi (' FiRetrUck ', 'f(.*)uck');
1233 %! assert (s, 2);
1234 %! assert (e, 10);
1235 %! assert (te{1}, [3, 7]);
1236 %! assert (m{1}, 'FiRetrUck');
1237 %! assert (t{1}{1}, 'iRetr');
1238 
1239 %!test
1240 %! [s, e, te, m, t] = regexpi (' firetruck ', 'f(.*)uck');
1241 %! assert (s, 2);
1242 %! assert (e, 10);
1243 %! assert (te{1}, [3, 7]);
1244 %! assert (m{1}, 'firetruck');
1245 %! assert (t{1}{1}, 'iretr');
1246 
1247 %!test
1248 %! [s, e, te, m, t] = regexpi ('ShoRt Test String', '\w*r\w*');
1249 %! assert (s, [1, 12]);
1250 %! assert (e, [5, 17]);
1251 %! assert (size (te), [1, 2]);
1252 %! assert (isempty (te{1}));
1253 %! assert (isempty (te{2}));
1254 %! assert (m{1}, 'ShoRt');
1255 %! assert (m{2}, 'String');
1256 %! assert (size (t), [1, 2]);
1257 %! assert (isempty (t{1}));
1258 %! assert (isempty (t{2}));
1259 
1260 %!test
1261 %! [s, e, te, m, t] = regexpi ('ShoRt Test String', '\w*r\w*', 'once');
1262 %! assert (s, 1);
1263 %! assert (e, 5);
1264 %! assert (isempty (te));
1265 %! assert (m, 'ShoRt');
1266 %! assert (isempty (t));
1267 
1268 %!test
1269 %! [m, te, e, s, t] = regexpi ('ShoRt Test String', '\w*r\w*', 'once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1270 %! assert (s, 1);
1271 %! assert (e, 5);
1272 %! assert (isempty (te));
1273 %! assert (m, 'ShoRt');
1274 %! assert (isempty (t));
1275 
1276 %!test
1277 %! [s, e, te, m, t, nm] = regexpi ('ShoRt Test String', '(?<word1>\w*t)\s*(?<word2>\w*t)');
1278 %! assert (s, 1);
1279 %! assert (e, 10);
1280 %! assert (size (te), [1, 1]);
1281 %! assert (te{1}, [1,5; 7,10]);
1282 %! assert (m{1}, 'ShoRt Test');
1283 %! assert (size (t), [1, 1]);
1284 %! assert (t{1}{1}, 'ShoRt');
1285 %! assert (t{1}{2}, 'Test');
1286 %! assert (size (nm), [1, 1]);
1287 %! assert (! isempty (fieldnames (nm)));
1288 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
1289 %! assert (nm.word1, 'ShoRt');
1290 %! assert (nm.word2, 'Test');
1291 
1292 %!test
1293 %! [nm, m, te, e, s, t] = regexpi ('ShoRt Test String', '(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1294 %! assert (s, 1);
1295 %! assert (e, 10);
1296 %! assert (size (te), [1, 1]);
1297 %! assert (te{1}, [1,5; 7,10]);
1298 %! assert (m{1}, 'ShoRt Test');
1299 %! assert (size (t), [1, 1]);
1300 %! assert (t{1}{1}, 'ShoRt');
1301 %! assert (t{1}{2}, 'Test');
1302 %! assert (size (nm), [1, 1]);
1303 %! assert (! isempty (fieldnames (nm)));
1304 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
1305 %! assert (nm.word1, 'ShoRt');
1306 %! assert (nm.word2, 'Test');
1307 
1308 %!assert (regexpi ("abc\nabc", '.'), [1:7])
1309 %!assert (regexpi ("abc\nabc", '.', 'dotall'), [1:7])
1310 %!test
1311 %! assert (regexpi ("abc\nabc", '(?s).'), [1:7]);
1312 %! assert (regexpi ("abc\nabc", '.', 'dotexceptnewline'), [1,2,3,5,6,7]);
1313 %! assert (regexpi ("abc\nabc", '(?-s).'), [1,2,3,5,6,7]);
1314 
1315 %!assert (regexpi ("caseCaSe", 'case'), [1, 5])
1316 %!assert (regexpi ("caseCaSe", 'case', "matchcase"), 1)
1317 %!assert (regexpi ("caseCaSe", 'case', "ignorecase"), [1, 5])
1318 %!test
1319 %! assert (regexpi ("caseCaSe", '(?-i)case'), 1);
1320 %! assert (regexpi ("caseCaSe", '(?i)case'), [1, 5]);
1321 
1322 %!assert (regexpi ("abc\nabc", 'C$'), 7)
1323 %!assert (regexpi ("abc\nabc", 'C$', "stringanchors"), 7)
1324 %!test
1325 %! assert (regexpi ("abc\nabc", '(?-m)C$'), 7);
1326 %! assert (regexpi ("abc\nabc", 'C$', "lineanchors"), [3, 7]);
1327 %! assert (regexpi ("abc\nabc", '(?m)C$'), [3, 7]);
1328 
1329 %!assert (regexpi ("this word", 'S w'), 4)
1330 %!assert (regexpi ("this word", 'S w', 'literalspacing'), 4)
1331 %!test
1332 %! assert (regexpi ("this word", '(?-x)S w', 'literalspacing'), 4);
1333 %! assert (regexpi ("this word", 'S w', 'freespacing'), zeros (1,0));
1334 %! assert (regexpi ("this word", '(?x)S w'), zeros (1,0));
1335 
1336 %!error regexpi ('string', 'tri', 'BadArg')
1337 %!error regexpi ('string')
1338 
1339 %!assert (regexpi ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1, 0)})
1340 %!assert (regexpi ({'asdfg-dfd', '-dfd-dfd-', 'qasfdfdaq'}, '-'), {6, [1,5,9], zeros(1,0)})
1341 %!assert (regexpi ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]})
1342 %!assert (regexpi ('Strings', {'t', 's'}), {2, [1, 7]})
1343 
1344 %!assert (regexpi ("\n", '\n'), 1)
1345 %!assert (regexpi ("\n", "\n"), 1)
1346 */
1347 
1348 static octave_value
1349 octregexprep (const octave_value_list& args, const std::string& who)
1350 {
1351  int nargin = args.length ();
1352 
1353  // Make sure we have string, pattern, replacement
1354  const std::string buffer = args(0).string_value ();
1355 
1356  std::string pattern = args(1).string_value ();
1357 
1358  // Rewrite pattern for PCRE
1359  pattern = do_regexp_ptn_string_escapes (pattern, args(1).is_sq_string ());
1360 
1361  std::string replacement = args(2).string_value ();
1362 
1363  // Matlab compatibility.
1364  if (args(2).is_sq_string ())
1365  replacement = do_regexp_rep_string_escapes (replacement);
1366 
1367  // Pack options excluding 'tokenize' and various output
1368  // reordering strings into regexp arg list
1369  octave_value_list regexpargs (nargin-3, octave_value ());
1370 
1371  int len = 0;
1372  for (int i = 3; i < nargin; i++)
1373  {
1374  const std::string opt = args(i).string_value ();
1375  if (opt != "tokenize" && opt != "start" && opt != "end"
1376  && opt != "tokenextents" && opt != "match" && opt != "tokens"
1377  && opt != "names" && opt != "split" && opt != "warnings")
1378  {
1379  regexpargs(len++) = args(i);
1380  }
1381  }
1382  regexpargs.resize (len);
1383 
1385  bool extra_args = false;
1386  parse_options (options, regexpargs, who, 0, extra_args);
1387 
1388  return octave::regexp::replace (pattern, buffer, replacement, options, who);
1389 }
1390 
1391 DEFUN (regexprep, args, ,
1392  doc: /* -*- texinfo -*-
1393 @deftypefn {} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})
1394 @deftypefnx {} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, "@var{opt1}", @dots{})
1395 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.
1396 
1397 The pattern is a regular expression as documented for @code{regexp}.
1398 @xref{XREFregexp,,regexp}.
1399 
1400 The replacement string may contain @code{$i}, which substitutes for the ith
1401 set of parentheses in the match string. For example,
1402 
1403 @example
1404 regexprep ("Bill Dunn", '(\w+) (\w+)', '$2, $1')
1405 @end example
1406 
1407 @noindent
1408 returns @qcode{"Dunn, Bill"}
1409 
1410 Options in addition to those of @code{regexp} are
1411 
1412 @table @samp
1413 
1414 @item once
1415 Replace only the first occurrence of @var{pat} in the result.
1416 
1417 @item warnings
1418 This option is present for compatibility but is ignored.
1419 
1420 @end table
1421 
1422 Implementation Note: For compatibility with @sc{matlab}, escape sequences
1423 in @var{pat} (e.g., @qcode{"@xbackslashchar{}n"} => newline) are expanded
1424 even when @var{pat} has been defined with single quotes. To disable
1425 expansion use a second backslash before the escape sequence (e.g.,
1426 "@xbackslashchar{}@xbackslashchar{}n") or use the @code{regexptranslate}
1427 function.
1428 @seealso{regexp, regexpi, strrep}
1429 @end deftypefn */)
1430 {
1431  if (args.length () < 3)
1432  print_usage ();
1433 
1435 
1436  if (args(0).iscell () || args(1).iscell () || args(2).iscell ())
1437  {
1438  Cell str, pat, rep;
1439  dim_vector dv0;
1440  dim_vector dv1 (1, 1);
1441 
1442  if (args(0).iscell ())
1443  str = args(0).cell_value ();
1444  else
1445  str = Cell (args(0));
1446 
1447  if (args(1).iscell ())
1448  pat = args(1).cell_value ();
1449  else
1450  pat = Cell (args(1));
1451 
1452  if (args(2).iscell ())
1453  rep = args(2).cell_value ();
1454  else
1455  rep = Cell (args(2));
1456 
1457  dv0 = str.dims ();
1458  if (pat.numel () != 1)
1459  {
1460  dv1 = pat.dims ();
1461  if (rep.numel () != 1 && dv1 != rep.dims ())
1462  error ("regexprep: inconsistent cell array dimensions");
1463  }
1464  else if (rep.numel () != 1)
1465  dv1 = rep.dims ();
1466 
1467  Cell ret (dv0);
1468  octave_value_list new_args = args;
1469 
1470  for (octave_idx_type i = 0; i < dv0.numel (); i++)
1471  {
1472  new_args(0) = str(i);
1473  if (pat.numel () == 1)
1474  new_args(1) = pat(0);
1475  if (rep.numel () == 1)
1476  new_args(2) = rep(0);
1477 
1478  for (octave_idx_type j = 0; j < dv1.numel (); j++)
1479  {
1480  if (pat.numel () != 1)
1481  new_args(1) = pat(j);
1482  if (rep.numel () != 1)
1483  new_args(2) = rep(j);
1484  new_args(0) = octregexprep (new_args, "regexprep");
1485  }
1486 
1487  ret(i) = new_args(0);
1488  }
1489 
1490  retval = (args(0).iscell () ? ovl (ret) : ovl (ret(0)));
1491  }
1492  else
1493  retval = octregexprep (args, "regexprep");
1494 
1495  return retval;
1496 }
1497 
1498 /*
1499 %!test # Replace with empty
1500 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1501 %! t = regexprep (xml, '<[!?][^>]*>', '');
1502 %! assert (t, ' <tag v="hello">some stuff</tag>');
1503 
1504 %!test # Replace with non-empty
1505 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1506 %! t = regexprep (xml, '<[!?][^>]*>', '?');
1507 %! assert (t, '? <tag v="hello">some stuff?</tag>');
1508 
1509 %!test # Check that 'tokenize' is ignored
1510 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1511 %! t = regexprep (xml, '<[!?][^>]*>', '', 'tokenize');
1512 %! assert (t, ' <tag v="hello">some stuff</tag>');
1513 
1514 ## Test capture replacement
1515 %!test
1516 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins";
1517 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam";
1518 %! t = regexprep (data, '(?m)^(\w+)\s+(\w+)$', '$2, $1');
1519 %! assert (t, result);
1520 
1521 ## Return the original if no match
1522 %!assert (regexprep ('hello', 'world', 'earth'), 'hello')
1523 
1524 ## Test emptymatch option
1525 %!assert (regexprep ('World', '^', 'Hello '), 'World')
1526 %!assert (regexprep ('World', '^', 'Hello ', 'emptymatch'), 'Hello World')
1527 
1528 ## Test a general replacement
1529 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g")
1530 
1531 ## Make sure replacements work at the beginning and end of string
1532 %!assert (regexprep ("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g")
1533 %!assert (regexprep ("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_")
1534 
1535 ## Test options "once" and "ignorecase"
1536 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"),
1537 %! "a_b]c{d}e-f=g")
1538 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"),
1539 %! "a_b_c_d_e_f_g")
1540 
1541 ## Option combinations
1542 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"),
1543 %! "a_b]c{d}e-f=g")
1544 
1545 ## End conditions on replacement
1546 %!assert (regexprep ("abc", "(b)", ".$1"), "a.bc")
1547 %!assert (regexprep ("abc", "(b)", "$1"), "abc")
1548 %!assert (regexprep ("abc", "(b)", "$1."), "ab.c")
1549 %!assert (regexprep ("abc", "(b)", "$1.."), "ab..c")
1550 
1551 ## Test cell array arguments
1552 %!assert (regexprep ("abc", {"b","a"}, "?"), "??c")
1553 %!assert (regexprep ({"abc","cba"}, "b", "?"), {"a?c","c?a"})
1554 %!assert (regexprep ({"abc","cba"}, {"b","a"}, {"?","!"}), {"!?c","c?!"})
1555 
1556 ## Nasty lookbehind expression
1557 %!test
1558 %! warning ("off", "Octave:regexp-lookbehind-limit", "local");
1559 %! assert (regexprep ('x^(-1)+y(-1)+z(-1)=0', '(?<=[a-z]+)\(\-[1-9]*\)',
1560 %! '_minus1'),'x^(-1)+y_minus1+z_minus1=0');
1561 
1562 ## Verify escape sequences in pattern
1563 %!assert (regexprep ("\n", '\n', "X"), "X")
1564 %!assert (regexprep ("\n", "\n", "X"), "X")
1565 
1566 ## Verify NULLs in pattern and replacement string
1567 %!assert (regexprep ("A\0A", "\0", ","), "A,A")
1568 %!assert (regexprep ("A\0A", '\0', ","), "A,A")
1569 %!assert (regexprep ("A,A", "A", "B\0B"), "B\0B,B\0B")
1570 %!assert (regexprep ("A,A", "A", 'B\0B'), "B\0B,B\0B")
1571 
1572 ## Empty matches were broken on ARM architecture
1573 %!test <*52810>
1574 %! assert (strcmp (regexprep ("\nabc", "^(\t*)(abc)$", "$1$2", "lineanchors"), "\nabc"))
1575 */
Definition: Cell.h:37
void assign(const std::string &k, const Cell &val)
Definition: oct-map.h:351
OCTINTERP_API void print_usage(void)
Definition: defun.cc:54
for large enough k
Definition: lu.cc:617
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:53
void error(const char *fmt,...)
Definition: error.cc:578
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:442
static octave_value_list octcellregexp(const octave_value_list &args, int nargout, const std::string &who, bool case_insensitive=false)
Definition: regexp.cc:541
s
Definition: file-io.cc:2729
i e
Definition: data.cc:2591
octave_idx_type numel(void) const
Definition: Array.h:174
static std::string do_regexp_rep_string_escapes(const std::string &s)
Definition: regexp.cc:140
octave_value resize(const dim_vector &dv, bool fill=false) const
Definition: ov.h:511
size_t size(void) const
Definition: base-list.h:49
static std::string do_regexp_ptn_string_escapes(const std::string &s, bool is_sq_str)
Definition: regexp.cc:53
OCTAVE_EXPORT octave_value_list return the number of command line arguments passed to Octave If called with the optional argument the function xample nargout(@histc)
Definition: ov-usr-fcn.cc:997
static octave_value_list octregexp(const octave_value_list &args, int nargout, const std::string &who, bool case_insensitive=false)
Definition: regexp.cc:357
std::string str
Definition: hash.cc:118
void resize(const dim_vector &dv, const T &rfv)
Resizing (with fill).
Definition: Array.cc:1010
double tmp
Definition: data.cc:6252
octave_value retval
Definition: data.cc:6246
static void parse_options(octave::regexp::opts &options, const octave_value_list &args, const std::string &who, int skip, bool &extra_args)
Definition: regexp.cc:308
std::string replace(const std::string &buffer, const std::string &replacement)
Definition: lo-regexp.cc:458
Definition: dMatrix.h:36
sz
Definition: data.cc:5264
is longer than or if then or only for unique occurrences of the complete pattern(false). The default is true. If a cell array of strings ar
Definition: strfind.cc:190
void warning(const char *fmt,...)
Definition: error.cc:801
match_data match(const std::string &buffer)
Definition: lo-regexp.cc:247
return octave_value(v1.char_array_value() . concat(v2.char_array_value(), ra_idx),((a1.is_sq_string()||a2.is_sq_string()) ? '\'' :'"'))
std::list< match_element >::const_iterator const_iterator
Definition: base-list.h:41
octave::sys::time start
Definition: graphics.cc:12337
OCTAVE_EXPORT octave_value_list isa nd deftypefn *return ovl(args(0).isinteger())
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:362
p
Definition: lu.cc:138
string_vector named_patterns(void)
Definition: lo-regexp.h:261
OCTAVE_EXPORT octave_value_list only variables visible in the local scope are displayed The following are valid options
Definition: variables.cc:1862
Cell cell_value(void) const
Definition: ovl.h:88
octave_idx_type length(void) const
Definition: ovl.h:96
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:41
void resize(octave_idx_type n, const octave_value &rfv=octave_value())
Definition: ovl.h:100
args.length() nargin
Definition: file-io.cc:589
ColumnVector transform(const Matrix &m, double x, double y, double z)
Definition: graphics.cc:5410
for i
Definition: data.cc:5264
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:366
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:888
iterator begin(void)
Definition: base-list.h:83
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:204
Array< T >::ArrayRep * rep
Definition: Array.h:218