GNU Octave  4.2.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
regexp.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2005-2017 David Bateman
4 Copyright (C) 2002-2005 Paul Kienzle
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #if defined (HAVE_CONFIG_H)
25 # include "config.h"
26 #endif
27 
28 #include <list>
29 #include <sstream>
30 
31 #include <pcre.h>
32 
33 #include "base-list.h"
34 #include "oct-locbuf.h"
35 #include "quit.h"
36 #include "lo-regexp.h"
37 #include "str-vec.h"
38 
39 #include "defun.h"
40 #include "Cell.h"
41 #include "error.h"
42 #include "errwarn.h"
43 #include "oct-map.h"
44 #include "ovl.h"
45 #include "utils.h"
46 
47 // Replace backslash escapes in a string with the real values. We need
48 // two special functions instead of the one in utils.cc because the set
49 // of escape sequences used for regexp patterns and replacement strings
50 // is different from those used in the *printf functions.
51 
52 static std::string
53 do_regexp_ptn_string_escapes (const std::string& s, bool is_sq_str)
54 {
56 
57  size_t i = 0;
58  size_t j = 0;
59  size_t len = s.length ();
60 
61  retval.resize (len);
62 
63  while (j < len)
64  {
65  if (s[j] == '\\' && j+1 < len)
66  {
67  switch (s[++j])
68  {
69  case 'b': // backspace
70  if (is_sq_str)
71  retval[i] = '\b';
72  else
73  {
74  // Pass escape sequence through
75  retval[i] = '\\';
76  retval[++i] = 'b';
77  }
78  break;
79 
80  // Translate < and > to PCRE word boundary
81  case '<': // begin word boundary
82  case '>': // end word boundary
83  retval[i] = '\\';
84  retval[++i] = 'b';
85  break;
86 
87  case 'o': // octal input
88  {
89  bool bad_esc_seq = (j+1 >= len);
90 
91  bool brace = false;
92  if (! bad_esc_seq && s[++j] == '{')
93  {
94  brace = true;
95  j++;
96  }
97 
98  int tmpi = 0;
99  size_t k;
100  for (k = j; k < std::min (j+3+brace, len); k++)
101  {
102  int digit = s[k] - '0';
103  if (digit < 0 || digit > 7)
104  break;
105  tmpi <<= 3;
106  tmpi += digit;
107  }
108  if (bad_esc_seq || (brace && s[k++] != '}'))
109  {
110  bad_esc_seq = true;
111  tmpi = 0;
112  warning ("malformed octal escape sequence '\\o' -- converting to '\\0'");
113  }
114  retval[i] = tmpi;
115  j = k - 1;
116  break;
117  }
118 
119  default: // pass escape sequence through
120  retval[i] = '\\';
121  retval[++i] = s[j];
122  break;
123  }
124  }
125  else
126  {
127  retval[i] = s[j];
128  }
129 
130  i++;
131  j++;
132  }
133 
134  retval.resize (i);
135 
136  return retval;
137 }
138 
139 static std::string
141 {
143 
144  size_t i = 0;
145  size_t j = 0;
146  size_t len = s.length ();
147 
148  retval.resize (len);
149 
150  while (j < len)
151  {
152  if (s[j] == '\\' && j+1 < len)
153  {
154  switch (s[++j])
155  {
156  case 'a': // alarm
157  retval[i] = '\a';
158  break;
159 
160  case 'b': // backspace
161  retval[i] = '\b';
162  break;
163 
164  case 'f': // formfeed
165  retval[i] = '\f';
166  break;
167 
168  case 'n': // newline
169  retval[i] = '\n';
170  break;
171 
172  case 'r': // carriage return
173  retval[i] = '\r';
174  break;
175 
176  case 't': // horizontal tab
177  retval[i] = '\t';
178  break;
179 
180  case 'v': // vertical tab
181  retval[i] = '\v';
182  break;
183 
184  case 'o': // octal input
185  {
186  bool bad_esc_seq = (j+1 >= len);
187 
188  bool brace = false;
189  if (! bad_esc_seq && s[++j] == '{')
190  {
191  brace = true;
192  j++;
193  }
194 
195  int tmpi = 0;
196  size_t k;
197  for (k = j; k < std::min (j+3+brace, len); k++)
198  {
199  int digit = s[k] - '0';
200  if (digit < 0 || digit > 7)
201  break;
202  tmpi <<= 3;
203  tmpi += digit;
204  }
205  if (bad_esc_seq || (brace && s[k++] != '}'))
206  {
207  warning ("malformed octal escape sequence '\\o' -- converting to '\\0'");
208  tmpi = 0;
209  }
210  retval[i] = tmpi;
211  j = k - 1;
212  break;
213  }
214 
215  case 'x': // hex input
216  {
217  bool bad_esc_seq = (j+1 >= len);
218 
219  bool brace = false;
220  if (! bad_esc_seq && s[++j] == '{')
221  {
222  brace = true;
223  j++;
224  }
225 
226  int tmpi = 0;
227  size_t k;
228  for (k = j; k < std::min (j+2+brace, len); k++)
229  {
230  if (! isxdigit (s[k]))
231  break;
232 
233  tmpi <<= 4;
234  int digit = s[k];
235  if (digit >= 'a')
236  tmpi += digit - 'a' + 10;
237  else if (digit >= 'A')
238  tmpi += digit - 'A' + 10;
239  else
240  tmpi += digit - '0';
241  }
242  if (bad_esc_seq || (brace && s[k++] != '}'))
243  {
244  warning ("malformed hex escape sequence '\\x' -- converting to '\\0'");
245  tmpi = 0;
246  }
247  retval[i] = tmpi;
248  j = k - 1;
249  break;
250  }
251 
252  // Both dollar sign (for capture buffer) and backslash are
253  // passed through with their escape backslash. The processing
254  // for these must occur during the actual replacement operation
255  // in lo-regexp.cc.
256  case '$': // pass dollar sign through with escape
257  retval[i] = '\\'; retval[++i] = '$';
258  break;
259 
260  case '\\': // pass backslash through with escape
261  retval[i] = '\\'; retval[++i] = '\\';
262  break;
263 
264  default: // convert escaped character to unescaped char
265  retval[i] = s[j];
266  break;
267  }
268  }
269  else
270  {
271  retval[i] = s[j];
272  }
273 
274  i++;
275  j++;
276  }
277 
278  retval.resize (i);
279 
280  return retval;
281 }
282 
283 static void
285  const std::string& who, int skip, bool& extra_args)
286 {
287  extra_args = false;
288 
289  for (int i = skip; i < args.length (); i++)
290  {
292 
293  str = args(i).xstring_value ("%s: optional arguments must be strings", who.c_str ());
294 
295  std::transform (str.begin (), str.end (), str.begin (), tolower);
296 
297  if (str.find ("once", 0) == 0)
298  options.once (true);
299  else if (str.find ("matchcase", 0) == 0)
300  options.case_insensitive (false);
301  else if (str.find ("ignorecase", 0) == 0)
302  options.case_insensitive (true);
303  else if (str.find ("dotall", 0) == 0)
304  options.dotexceptnewline (false);
305  else if (str.find ("stringanchors", 0) == 0)
306  options.lineanchors (false);
307  else if (str.find ("literalspacing", 0) == 0)
308  options.freespacing (false);
309  else if (str.find ("noemptymatch", 0) == 0)
310  options.emptymatch (false);
311  else if (str.find ("dotexceptnewline", 0) == 0)
312  options.dotexceptnewline (true);
313  else if (str.find ("lineanchors", 0) == 0)
314  options.lineanchors (true);
315  else if (str.find ("freespacing", 0) == 0)
316  options.freespacing (true);
317  else if (str.find ("emptymatch", 0) == 0)
318  options.emptymatch (true);
319  else if (str.find ("start", 0) == 0
320  || str.find ("end", 0) == 0
321  || str.find ("tokenextents", 0) == 0
322  || str.find ("match", 0) == 0
323  || str.find ("tokens", 0) == 0
324  || str.find ("names", 0) == 0
325  || str.find ("split", 0) == 0)
326  extra_args = true;
327  else
328  error ("%s: unrecognized option", who.c_str ());
329  }
330 }
331 
332 static octave_value_list
334  const std::string &who, bool case_insensitive = false)
335 {
337 
338  int nargin = args.length ();
339 
340  // Make sure we have string, pattern
341  const std::string buffer = args(0).string_value ();
342 
343  std::string pattern = args(1).string_value ();
344 
345  // Rewrite pattern for PCRE
346  pattern = do_regexp_ptn_string_escapes (pattern, args(1).is_sq_string ());
347 
349  options.case_insensitive (case_insensitive);
350  bool extra_options = false;
351  parse_options (options, args, who, 2, extra_options);
352 
354  = octave::regexp::match (pattern, buffer, options, who);
355 
356  string_vector named_pats = rx_lst.named_patterns ();
357 
358  size_t sz = rx_lst.size ();
359 
360  // Converted the linked list in the correct form for the return values
361 
362  octave_idx_type i = 0;
363  octave_scalar_map nmap;
364 
365  retval.resize (7);
366 
367  if (sz == 1)
368  {
369  string_vector named_tokens = rx_lst.begin ()->named_tokens ();
370 
371  for (int j = 0; j < named_pats.numel (); j++)
372  nmap.assign (named_pats(j), named_tokens(j));
373 
374  retval(5) = nmap;
375  }
376  else
377  {
378  for (int j = 0; j < named_pats.numel (); j++)
379  {
380  Cell tmp (dim_vector (1, sz));
381 
382  i = 0;
384  p != rx_lst.end (); p++)
385  {
386  string_vector named_tokens = p->named_tokens ();
387 
388  tmp(i++) = named_tokens(j);
389  }
390 
391  nmap.assign (named_pats(j), octave_value (tmp));
392  }
393 
394  retval(5) = nmap;
395  }
396 
397  if (options.once ())
398  {
400 
401  retval(4) = sz ? p->tokens () : Cell ();
402  retval(3) = sz ? p->match_string () : "";
403  retval(2) = sz ? p->token_extents () : Matrix ();
404 
405  if (sz)
406  {
407  double start = p->start ();
408  double end = p->end ();
409 
410  Cell split (dim_vector (1, 2));
411  split(0) = buffer.substr (0, start-1);
412  split(1) = buffer.substr (end);
413 
414  retval(6) = split;
415  retval(1) = end;
416  retval(0) = start;
417  }
418  else
419  {
420  retval(6) = buffer;
421  retval(1) = Matrix ();
422  retval(0) = Matrix ();
423  }
424  }
425  else
426  {
427  Cell tokens (dim_vector (1, sz));
428  Cell match_string (dim_vector (1, sz));
429  Cell token_extents (dim_vector (1, sz));
430  NDArray end (dim_vector (1, sz));
431  NDArray start (dim_vector (1, sz));
432  Cell split (dim_vector (1, sz+1));
433  size_t sp_start = 0;
434 
435  i = 0;
437  p != rx_lst.end (); p++)
438  {
439  double s = p->start ();
440  double e = p->end ();
441 
442  string_vector tmp = p->tokens ();
443  tokens(i) = Cell (dim_vector (1, tmp.numel ()), tmp);
444  match_string(i) = p->match_string ();
445  token_extents(i) = p->token_extents ();
446  end(i) = e;
447  start(i) = s;
448  split(i) = buffer.substr (sp_start, s-sp_start-1);
449  sp_start = e;
450  i++;
451  }
452 
453  split(i) = buffer.substr (sp_start);
454 
455  retval(6) = split;
456  retval(4) = tokens;
457  retval(3) = match_string;
458  retval(2) = token_extents;
459  retval(1) = end;
460  retval(0) = start;
461  }
462 
463  // Alter the order of the output arguments
464 
465  if (extra_options)
466  {
467  int n = 0;
468  octave_value_list new_retval;
469  new_retval.resize (nargout);
470 
471  OCTAVE_LOCAL_BUFFER (int, arg_used, 6);
472  for (int j = 0; j < 6; j++)
473  arg_used[j] = false;
474 
475  for (int j = 2; j < nargin; j++)
476  {
477  int k = 0;
478  std::string str = args(j).string_value ();
479  std::transform (str.begin (), str.end (), str.begin (), tolower);
480 
481  if (str.find ("once", 0) == 0
482  || str.find ("stringanchors", 0) == 0
483  || str.find ("lineanchors", 0) == 0
484  || str.find ("matchcase", 0) == 0
485  || str.find ("ignorecase", 0) == 0
486  || str.find ("dotall", 0) == 0
487  || str.find ("dotexceptnewline", 0) == 0
488  || str.find ("literalspacing", 0) == 0
489  || str.find ("freespacing", 0) == 0
490  || str.find ("noemptymatch", 0) == 0
491  || str.find ("emptymatch", 0) == 0)
492  continue;
493  else if (str.find ("start", 0) == 0)
494  k = 0;
495  else if (str.find ("end", 0) == 0)
496  k = 1;
497  else if (str.find ("tokenextents", 0) == 0)
498  k = 2;
499  else if (str.find ("match", 0) == 0)
500  k = 3;
501  else if (str.find ("tokens", 0) == 0)
502  k = 4;
503  else if (str.find ("names", 0) == 0)
504  k = 5;
505  else if (str.find ("split", 0) == 0)
506  k = 6;
507 
508  new_retval(n++) = retval(k);
509  arg_used[k] = true;
510 
511  if (n == nargout)
512  break;
513  }
514 
515  // Fill in the rest of the arguments
516  if (n < nargout)
517  {
518  for (int j = 0; j < 6; j++)
519  {
520  if (! arg_used[j])
521  new_retval(n++) = retval(j);
522  }
523  }
524 
525  retval = new_retval;
526  }
527 
528  return retval;
529 }
530 
531 static octave_value_list
533  const std::string &who, bool case_insensitive = false)
534 {
536 
537  if (args(0).is_cell ())
538  {
539  OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
540  octave_value_list new_args = args;
541  Cell cellstr = args(0).cell_value ();
542  if (args(1).is_cell ())
543  {
544  Cell cellpat = args(1).cell_value ();
545 
546  if (cellpat.numel () == 1)
547  {
548  for (int j = 0; j < nargout; j++)
549  newretval[j].resize (cellstr.dims ());
550 
551  new_args(1) = cellpat(0);
552 
553  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
554  {
555  new_args(0) = cellstr(i);
556  octave_value_list tmp = octregexp (new_args, nargout, who,
557  case_insensitive);
558 
559  for (int j = 0; j < nargout; j++)
560  newretval[j](i) = tmp(j);
561  }
562  }
563  else if (cellstr.numel () == 1)
564  {
565  for (int j = 0; j < nargout; j++)
566  newretval[j].resize (cellpat.dims ());
567 
568  new_args(0) = cellstr(0);
569 
570  for (octave_idx_type i = 0; i < cellpat.numel (); i++)
571  {
572  new_args(1) = cellpat(i);
573  octave_value_list tmp = octregexp (new_args, nargout, who,
574  case_insensitive);
575 
576  for (int j = 0; j < nargout; j++)
577  newretval[j](i) = tmp(j);
578  }
579  }
580  else if (cellstr.numel () == cellpat.numel ())
581  {
582  if (cellstr.dims () != cellpat.dims ())
583  error ("%s: inconsistent cell array dimensions", who.c_str ());
584 
585  for (int j = 0; j < nargout; j++)
586  newretval[j].resize (cellstr.dims ());
587 
588  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
589  {
590  new_args(0) = cellstr(i);
591  new_args(1) = cellpat(i);
592 
593  octave_value_list tmp = octregexp (new_args, nargout, who,
594  case_insensitive);
595 
596  for (int j = 0; j < nargout; j++)
597  newretval[j](i) = tmp(j);
598  }
599  }
600  else
601  error ("regexp: cell array arguments must be scalar or equal size");
602  }
603  else
604  {
605  for (int j = 0; j < nargout; j++)
606  newretval[j].resize (cellstr.dims ());
607 
608  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
609  {
610  new_args(0) = cellstr(i);
611  octave_value_list tmp = octregexp (new_args, nargout, who,
612  case_insensitive);
613 
614  for (int j = 0; j < nargout; j++)
615  newretval[j](i) = tmp(j);
616  }
617  }
618 
619  for (int j = 0; j < nargout; j++)
620  retval(j) = octave_value (newretval[j]);
621  }
622  else if (args(1).is_cell ())
623  {
624  OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
625  octave_value_list new_args = args;
626  Cell cellpat = args(1).cell_value ();
627 
628  for (int j = 0; j < nargout; j++)
629  newretval[j].resize (cellpat.dims ());
630 
631  for (octave_idx_type i = 0; i < cellpat.numel (); i++)
632  {
633  new_args(1) = cellpat(i);
634  octave_value_list tmp = octregexp (new_args, nargout, who,
635  case_insensitive);
636 
637  for (int j = 0; j < nargout; j++)
638  newretval[j](i) = tmp(j);
639  }
640 
641  for (int j = 0; j < nargout; j++)
642  retval(j) = octave_value (newretval[j]);
643  }
644  else
645  retval = octregexp (args, nargout, who, case_insensitive);
646 
647  return retval;
648 
649 }
650 
651 DEFUN (regexp, args, nargout,
652  doc: /* -*- texinfo -*-
653 @deftypefn {} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexp (@var{str}, @var{pat})
654 @deftypefnx {} {[@dots{}] =} regexp (@var{str}, @var{pat}, "@var{opt1}", @dots{})
655 Regular expression string matching.
656 
657 Search for @var{pat} in @var{str} and return the positions and substrings of
658 any matches, or empty values if there are none.
659 
660 The matched pattern @var{pat} can include any of the standard regex
661 operators, including:
662 
663 @table @code
664 @item .
665 Match any character
666 
667 @item * + ? @{@}
668 Repetition operators, representing
669 
670 @table @code
671 @item *
672 Match zero or more times
673 
674 @item +
675 Match one or more times
676 
677 @item ?
678 Match zero or one times
679 
680 @item @{@var{n}@}
681 Match exactly @var{n} times
682 
683 @item @{@var{n},@}
684 Match @var{n} or more times
685 
686 @item @{@var{m},@var{n}@}
687 Match between @var{m} and @var{n} times
688 @end table
689 
690 @item [@dots{}] [^@dots{}]
691 
692 List operators. The pattern will match any character listed between
693 @qcode{"["} and @qcode{"]"}. If the first character is @qcode{"^"} then the
694 pattern is inverted and any character except those listed between brackets
695 will match.
696 
697 Escape sequences defined below can also be used inside list operators. For
698 example, a template for a floating point number might be @code{[-+.\d]+}.
699 
700 @item () (?:)
701 Grouping operator. The first form, parentheses only, also creates a token.
702 
703 @item |
704 Alternation operator. Match one of a choice of regular expressions. The
705 alternatives must be delimited by the grouping operator @code{()} above.
706 
707 @item ^ $
708 Anchoring operators. Requires pattern to occur at the start (@code{^}) or
709 end (@code{$}) of the string.
710 @end table
711 
712 In addition, the following escaped characters have special meaning.
713 
714 @table @code
715 
716 @item \d
717 Match any digit
718 
719 @item \D
720 Match any non-digit
721 
722 @item \s
723 Match any whitespace character
724 
725 @item \S
726 Match any non-whitespace character
727 
728 @item \w
729 Match any word character
730 
731 @item \W
732 Match any non-word character
733 
734 @item <
735 Match the beginning of a word
736 
737 @item >
738 Match the end of a word
739 
740 @item \B
741 Match within a word
742 @end table
743 
744 Implementation Note: For compatibility with @sc{matlab}, escape sequences
745 in @var{pat} (e.g., @qcode{"@xbackslashchar{}n"} => newline) are expanded
746 even when @var{pat} has been defined with single quotes. To disable
747 expansion use a second backslash before the escape sequence (e.g.,
748 "@xbackslashchar{}@xbackslashchar{}n") or use the @code{regexptranslate}
749 function.
750 
751 The outputs of @code{regexp} default to the order given below
752 
753 @table @var
754 @item s
755 The start indices of each matching substring
756 
757 @item e
758 The end indices of each matching substring
759 
760 @item te
761 The extents of each matched token surrounded by @code{(@dots{})} in
762 @var{pat}
763 
764 @item m
765 A cell array of the text of each match
766 
767 @item t
768 A cell array of the text of each token matched
769 
770 @item nm
771 A structure containing the text of each matched named token, with the name
772 being used as the fieldname. A named token is denoted by
773 @code{(?<name>@dots{})}.
774 
775 @item sp
776 A cell array of the text not returned by match, i.e., what remains if you
777 split the string based on @var{pat}.
778 @end table
779 
780 Particular output arguments, or the order of the output arguments, can be
781 selected by additional @var{opt} arguments. These are strings and the
782 correspondence between the output arguments and the optional argument
783 are
784 
785 @multitable @columnfractions 0.2 0.3 0.3 0.2
786 @item @tab @qcode{'start'} @tab @var{s} @tab
787 @item @tab @qcode{'end'} @tab @var{e} @tab
788 @item @tab @qcode{'tokenExtents'} @tab @var{te} @tab
789 @item @tab @qcode{'match'} @tab @var{m} @tab
790 @item @tab @qcode{'tokens'} @tab @var{t} @tab
791 @item @tab @qcode{'names'} @tab @var{nm} @tab
792 @item @tab @qcode{'split'} @tab @var{sp} @tab
793 @end multitable
794 
795 Additional arguments are summarized below.
796 
797 @table @samp
798 @item once
799 Return only the first occurrence of the pattern.
800 
801 @item matchcase
802 Make the matching case sensitive. (default)
803 
804 Alternatively, use (?-i) in the pattern.
805 
806 @item ignorecase
807 Ignore case when matching the pattern to the string.
808 
809 Alternatively, use (?i) in the pattern.
810 
811 @item stringanchors
812 Match the anchor characters at the beginning and end of the string.
813 (default)
814 
815 Alternatively, use (?-m) in the pattern.
816 
817 @item lineanchors
818 Match the anchor characters at the beginning and end of the line.
819 
820 Alternatively, use (?m) in the pattern.
821 
822 @item dotall
823 The pattern @code{.} matches all characters including the newline character.
824  (default)
825 
826 Alternatively, use (?s) in the pattern.
827 
828 @item dotexceptnewline
829 The pattern @code{.} matches all characters except the newline character.
830 
831 Alternatively, use (?-s) in the pattern.
832 
833 @item literalspacing
834 All characters in the pattern, including whitespace, are significant and are
835 used in pattern matching. (default)
836 
837 Alternatively, use (?-x) in the pattern.
838 
839 @item freespacing
840 The pattern may include arbitrary whitespace and also comments beginning
841 with the character @samp{#}.
842 
843 Alternatively, use (?x) in the pattern.
844 
845 @item noemptymatch
846 Zero-length matches are not returned. (default)
847 
848 @item emptymatch
849 Return zero-length matches.
850 
851 @code{regexp ('a', 'b*', 'emptymatch')} returns @code{[1 2]} because there
852 are zero or more @qcode{'b'} characters at positions 1 and end-of-string.
853 
854 @end table
855 @seealso{regexpi, strfind, regexprep}
856 @end deftypefn */)
857 {
858  if (args.length () < 2)
859  print_usage ();
860 
862 
863  if (args(0).is_cell () || args(1).is_cell ())
864  retval = octcellregexp (args, (nargout > 0 ? nargout : 1), "regexp");
865  else
866  retval = octregexp (args, nargout, "regexp");
867 
868  return retval;
869 }
870 
871 /*
872 ## PCRE_ERROR_MATCHLIMIT test
873 %!test
874 %! s = sprintf ('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-');
875 %! ws = warning ("query");
876 %! unwind_protect
877 %! warning ("off");
878 %! regexp (s, '(\s*-*\d+[.]*\d*\s*)+\n');
879 %! unwind_protect_cleanup
880 %! warning (ws);
881 %! end_unwind_protect
882 
883 ## segfault test
884 %!assert (regexp ("abcde", "."), [1,2,3,4,5])
885 ## Infinite loop test
886 %!assert (isempty (regexp ("abcde", "")))
887 
888 ## Check that anchoring of pattern works correctly
889 %!assert (regexp ('abcabc', '^abc'), 1)
890 %!assert (regexp ('abcabc', 'abc$'), 4)
891 %!assert (regexp ('abcabc', '^abc$'), zeros (1,0))
892 
893 %!test
894 %! [s, e, te, m, t] = regexp (' No Match ', 'f(.*)uck');
895 %! assert (s, zeros (1,0));
896 %! assert (e, zeros (1,0));
897 %! assert (te, cell (1,0));
898 %! assert (m, cell (1,0));
899 %! assert (t, cell (1,0));
900 
901 %!test
902 %! [s, e, te, m, t] = regexp (' FiRetrUck ', 'f(.*)uck');
903 %! assert (s, zeros (1,0));
904 %! assert (e, zeros (1,0));
905 %! assert (te, cell (1,0));
906 %! assert (m, cell (1,0));
907 %! assert (t, cell (1,0));
908 
909 %!test
910 %! [s, e, te, m, t] = regexp (' firetruck ', 'f(.*)uck');
911 %! assert (s, 2);
912 %! assert (e, 10);
913 %! assert (te{1}, [3, 7]);
914 %! assert (m{1}, 'firetruck');
915 %! assert (t{1}{1}, 'iretr');
916 
917 %!test
918 %! [s, e, te, m, t] = regexp ('short test string', '\w*r\w*');
919 %! assert (s, [1, 12]);
920 %! assert (e, [5, 17]);
921 %! assert (size (te), [1, 2]);
922 %! assert (isempty (te{1}));
923 %! assert (isempty (te{2}));
924 %! assert (m{1}, 'short');
925 %! assert (m{2}, 'string');
926 %! assert (size (t), [1, 2]);
927 %! assert (isempty (t{1}));
928 %! assert (isempty (t{2}));
929 
930 %!test
931 %! [s, e, te, m, t] = regexp ('short test string', '\w*r\w*', 'once');
932 %! assert (s, 1);
933 %! assert (e, 5);
934 %! assert (isempty (te));
935 %! assert (m, 'short');
936 %! assert (isempty (t));
937 
938 %!test
939 %! [m, te, e, s, t] = regexp ('short test string', '\w*r\w*', 'once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
940 %! assert (s, 1);
941 %! assert (e, 5);
942 %! assert (isempty (te));
943 %! assert (m, 'short');
944 %! assert (isempty (t));
945 
946 %!test
947 %! [s, e, te, m, t, nm] = regexp ('short test string', '(?<word1>\w*t)\s*(?<word2>\w*t)');
948 %! assert (s, 1);
949 %! assert (e, 10);
950 %! assert (size (te), [1, 1]);
951 %! assert (te{1}, [1,5; 7,10]);
952 %! assert (m{1}, 'short test');
953 %! assert (size (t), [1, 1]);
954 %! assert (t{1}{1}, 'short');
955 %! assert (t{1}{2}, 'test');
956 %! assert (size (nm), [1, 1]);
957 %! assert (! isempty (fieldnames (nm)));
958 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
959 %! assert (nm.word1, 'short');
960 %! assert (nm.word2, 'test');
961 
962 %!test
963 %! [nm, m, te, e, s, t] = regexp ('short test string', '(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
964 %! assert (s, 1);
965 %! assert (e, 10);
966 %! assert (size (te), [1, 1]);
967 %! assert (te{1}, [1,5; 7,10]);
968 %! assert (m{1}, 'short test');
969 %! assert (size (t), [1, 1]);
970 %! assert (t{1}{1}, 'short');
971 %! assert (t{1}{2}, 'test');
972 %! assert (size (nm), [1, 1]);
973 %! assert (! isempty (fieldnames (nm)));
974 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
975 %! assert (nm.word1, 'short');
976 %! assert (nm.word2, 'test');
977 
978 %!test
979 %! [t, nm] = regexp ("John Davis\nRogers, James", '(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)', 'tokens', 'names');
980 %! assert (size (t), [1, 2]);
981 %! assert (t{1}{1}, 'John');
982 %! assert (t{1}{2}, 'Davis');
983 %! assert (t{2}{1}, 'Rogers');
984 %! assert (t{2}{2}, 'James');
985 %! assert (size (nm), [1, 1]);
986 %! assert (nm.first{1}, 'John');
987 %! assert (nm.first{2}, 'James');
988 %! assert (nm.last{1}, 'Davis');
989 %! assert (nm.last{2}, 'Rogers');
990 
991 ## Tests for named tokens
992 %!test
993 %! ## Parenthesis in named token (ie (int)) causes a problem
994 %! assert (regexp ('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int'));
995 
996 %!test <35683>
997 %! ## Mix of named and unnamed tokens can cause segfault
998 %! str = "abcde";
999 %! ptn = '(?<T1>a)(\w+)(?<T2>d\w+)';
1000 %! tokens = regexp (str, ptn, "names");
1001 %! assert (isstruct (tokens) && numel (tokens) == 1);
1002 %! assert (tokens.T1, "a");
1003 %! assert (tokens.T2, "de");
1004 
1005 %!assert (regexp ("abc\nabc", '.'), [1:7])
1006 %!assert (regexp ("abc\nabc", '.', 'dotall'), [1:7])
1007 %!test
1008 %! assert (regexp ("abc\nabc", '(?s).'), [1:7]);
1009 %! assert (regexp ("abc\nabc", '.', 'dotexceptnewline'), [1,2,3,5,6,7]);
1010 %! assert (regexp ("abc\nabc", '(?-s).'), [1,2,3,5,6,7]);
1011 
1012 %!assert (regexp ("caseCaSe", 'case'), 1)
1013 %!assert (regexp ("caseCaSe", 'case', "matchcase"), 1)
1014 %!assert (regexp ("caseCaSe", 'case', "ignorecase"), [1,5])
1015 %!test
1016 %! assert (regexp ("caseCaSe", '(?-i)case'), 1);
1017 %! assert (regexp ("caseCaSe", '(?i)case'), [1, 5]);
1018 
1019 %!assert (regexp ("abc\nabc", 'c$'), 7)
1020 %!assert (regexp ("abc\nabc", 'c$', "stringanchors"), 7)
1021 %!test
1022 %! assert (regexp ("abc\nabc", '(?-m)c$'), 7);
1023 %! assert (regexp ("abc\nabc", 'c$',"lineanchors"), [3, 7]);
1024 %! assert (regexp ("abc\nabc", '(?m)c$'), [3,7]);
1025 
1026 %!assert (regexp ("this word", 's w'), 4)
1027 %!assert (regexp ("this word", 's w', 'literalspacing'), 4)
1028 %!test
1029 %! assert (regexp ("this word", '(?-x)s w', 'literalspacing'), 4);
1030 %! assert (regexp ("this word", 's w', 'freespacing'), zeros (1,0));
1031 %! assert (regexp ("this word", '(?x)s w'), zeros (1,0));
1032 
1033 %!test
1034 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '[VOCT]*', 'noemptymatch');
1035 %! assert (s, [1 5]);
1036 %! assert (e, [3 5]);
1037 %! assert (te, { zeros(0,2), zeros(0,2) });
1038 %! assert (m, { "OCT", "V" });
1039 %! assert (t, { cell(1,0), cell(1,0) });
1040 %! assert (isempty (fieldnames (nm)));
1041 %! assert (sp, { "", "A", "E" });
1042 
1043 %!test
1044 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '([VOCT]*)', 'noemptymatch');
1045 %! assert (s, [1 5]);
1046 %! assert (e, [3 5]);
1047 %! assert (te, { [1 3], [5 5] });
1048 %! assert (m, { "OCT", "V" });
1049 %! assert (t, { {"OCT"}, {"V"} });
1050 %! assert (isempty (fieldnames (nm)));
1051 %! assert (sp, { "", "A", "E" });
1052 
1053 %!test
1054 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '[VOCT]*', 'emptymatch');
1055 %! assert (s, [1 4 5 6 7]);
1056 %! assert (e, [3 3 5 5 6]);
1057 %! assert (te, repmat ({zeros(0,2)}, [1, 5]));
1058 %! assert (m, { "OCT", "", "V", "", "" });
1059 %! assert (t, repmat({cell(1,0)}, [1, 5]));
1060 %! assert (isempty (fieldnames (nm)));
1061 %! assert (sp, { "", "", "A", "", "E", "" });
1062 
1063 %!test
1064 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '([VOCT]*)', 'emptymatch');
1065 %! assert (s, [1 4 5 6 7]);
1066 %! assert (e, [3 3 5 5 6]);
1067 %! assert (te, { [1 3], [4 3], [5 5], [6 5], [7 6] });
1068 %! assert (m, { "OCT", "", "V", "", "" });
1069 %! assert (t, { {"OCT"}, {""}, {"V"}, {""}, {""} });
1070 %! assert (isempty (fieldnames (nm)));
1071 %! assert (sp, { "", "", "A", "", "E", "" });
1072 
1073 %!error regexp ('string', 'tri', 'BadArg')
1074 %!error regexp ('string')
1075 
1076 %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1,0)})
1077 %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]})
1078 %!assert (regexp ('Strings', {'t','s'}), {2, 7})
1079 
1080 ## Test case for lookaround operators
1081 %!test
1082 %! assert (regexp ('Iraq', 'q(?!u)'), 4);
1083 %! assert (regexp ('quit', 'q(?!u)'), zeros (1, 0));
1084 %! assert (regexp ('quit', 'q(?=u)' , 'match'), {'q'});
1085 %! assert (regexp ("quit", 'q(?=u+)', 'match'), {'q'});
1086 %! assert (regexp ("qit", 'q(?=u+)', 'match'), cell (1, 0));
1087 %! assert (regexp ("qit", 'q(?=u*)', 'match'), {'q'});
1088 %! assert (regexp ('thingamabob', '(?<=a)b'), 9);
1089 
1090 ## Tests for split option.
1091 %!shared str
1092 %! str = "foo bar foo";
1093 %!test
1094 %! [a, b] = regexp (str, "f..", "match", "split");
1095 %! assert (a, {"foo", "foo"});
1096 %! assert (b, {"", " bar ", ""});
1097 %!test
1098 %! [a, b] = regexp (str, "f..", "match", "split", "once");
1099 %! assert (a, "foo");
1100 %! assert (b, {"", " bar foo"});
1101 %!test
1102 %! [a, b] = regexp (str, "fx.", "match", "split");
1103 %! assert (a, cell (1, 0));
1104 %! assert (b, {"foo bar foo"});
1105 %!test
1106 %! [a, b] = regexp (str, "fx.", "match", "split", "once");
1107 %! assert (a, "");;
1108 %! assert (b, "foo bar foo");
1109 
1110 %!shared str
1111 %! str = "foo bar";
1112 %!test
1113 %! [a, b] = regexp (str, "f..", "match", "split");
1114 %! assert (a, {"foo"});
1115 %! assert (b, {"", " bar"});
1116 %!test
1117 %! [a, b] = regexp (str, "b..", "match", "split");
1118 %! assert (a, {"bar"});
1119 %! assert (b, {"foo ", ""});
1120 %!test
1121 %! [a, b] = regexp (str, "x", "match", "split");
1122 %! assert (a, cell (1, 0));
1123 %! assert (b, {"foo bar"});
1124 %!test
1125 %! [a, b] = regexp (str, "[o]+", "match", "split");
1126 %! assert (a, {"oo"});
1127 %! assert (b, {"f", " bar"});
1128 
1129 %!assert (regexp ("\n", '\n'), 1)
1130 %!assert (regexp ("\n", "\n"), 1)
1131 
1132 # Test escape sequences are silently converted
1133 %!test <45407>
1134 %! assert (regexprep ('s', 's', 'x\.y'), 'x.y');
1135 %! assert (regexprep ('s', '(s)', 'x\$1y'), 'x$1y');
1136 %! assert (regexprep ('s', '(s)', 'x\\$1y'), 'x\sy');
1137 
1138 */
1139 
1140 DEFUN (regexpi, args, nargout,
1141  doc: /* -*- texinfo -*-
1142 @deftypefn {} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexpi (@var{str}, @var{pat})
1143 @deftypefnx {} {[@dots{}] =} regexpi (@var{str}, @var{pat}, "@var{opt1}", @dots{})
1144 
1145 Case insensitive regular expression string matching.
1146 
1147 Search for @var{pat} in @var{str} and return the positions and substrings of
1148 any matches, or empty values if there are none. @xref{XREFregexp,,regexp},
1149 for details on the syntax of the search pattern.
1150 @seealso{regexp}
1151 @end deftypefn */)
1152 {
1153  if (args.length () < 2)
1154  print_usage ();
1155 
1156  if (args(0).is_cell () || args(1).is_cell ())
1157  return octcellregexp (args, (nargout > 0 ? nargout : 1), "regexpi", true);
1158  else
1159  return octregexp (args, nargout, "regexpi", true);
1160 }
1161 
1162 /*
1163 ## segfault test
1164 %!assert (regexpi ("abcde", "."), [1,2,3,4,5])
1165 
1166 ## Check that anchoring of pattern works correctly
1167 %!assert (regexpi ('abcabc', '^ABC'), 1)
1168 %!assert (regexpi ('abcabc', 'ABC$'), 4)
1169 %!assert (regexpi ('abcabc', '^ABC$'), zeros (1,0))
1170 
1171 %!test
1172 %! [s, e, te, m, t] = regexpi (' No Match ', 'f(.*)uck');
1173 %! assert (s, zeros (1,0));
1174 %! assert (e, zeros (1,0));
1175 %! assert (te, cell (1,0));
1176 %! assert (m, cell (1,0));
1177 %! assert (t, cell (1,0));
1178 
1179 %!test
1180 %! [s, e, te, m, t] = regexpi (' FiRetrUck ', 'f(.*)uck');
1181 %! assert (s, 2);
1182 %! assert (e, 10);
1183 %! assert (te{1}, [3, 7]);
1184 %! assert (m{1}, 'FiRetrUck');
1185 %! assert (t{1}{1}, 'iRetr');
1186 
1187 %!test
1188 %! [s, e, te, m, t] = regexpi (' firetruck ', 'f(.*)uck');
1189 %! assert (s, 2);
1190 %! assert (e, 10);
1191 %! assert (te{1}, [3, 7]);
1192 %! assert (m{1}, 'firetruck');
1193 %! assert (t{1}{1}, 'iretr');
1194 
1195 %!test
1196 %! [s, e, te, m, t] = regexpi ('ShoRt Test String', '\w*r\w*');
1197 %! assert (s, [1, 12]);
1198 %! assert (e, [5, 17]);
1199 %! assert (size (te), [1, 2]);
1200 %! assert (isempty (te{1}));
1201 %! assert (isempty (te{2}));
1202 %! assert (m{1}, 'ShoRt');
1203 %! assert (m{2}, 'String');
1204 %! assert (size (t), [1, 2]);
1205 %! assert (isempty (t{1}));
1206 %! assert (isempty (t{2}));
1207 
1208 %!test
1209 %! [s, e, te, m, t] = regexpi ('ShoRt Test String', '\w*r\w*', 'once');
1210 %! assert (s, 1);
1211 %! assert (e, 5);
1212 %! assert (isempty (te));
1213 %! assert (m, 'ShoRt');
1214 %! assert (isempty (t));
1215 
1216 %!test
1217 %! [m, te, e, s, t] = regexpi ('ShoRt Test String', '\w*r\w*', 'once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1218 %! assert (s, 1);
1219 %! assert (e, 5);
1220 %! assert (isempty (te));
1221 %! assert (m, 'ShoRt');
1222 %! assert (isempty (t));
1223 
1224 %!test
1225 %! [s, e, te, m, t, nm] = regexpi ('ShoRt Test String', '(?<word1>\w*t)\s*(?<word2>\w*t)');
1226 %! assert (s, 1);
1227 %! assert (e, 10);
1228 %! assert (size (te), [1, 1]);
1229 %! assert (te{1}, [1,5; 7,10]);
1230 %! assert (m{1}, 'ShoRt Test');
1231 %! assert (size (t), [1, 1]);
1232 %! assert (t{1}{1}, 'ShoRt');
1233 %! assert (t{1}{2}, 'Test');
1234 %! assert (size (nm), [1, 1]);
1235 %! assert (! isempty (fieldnames (nm)));
1236 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
1237 %! assert (nm.word1, 'ShoRt');
1238 %! assert (nm.word2, 'Test');
1239 
1240 %!test
1241 %! [nm, m, te, e, s, t] = regexpi ('ShoRt Test String', '(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1242 %! assert (s, 1);
1243 %! assert (e, 10);
1244 %! assert (size (te), [1, 1]);
1245 %! assert (te{1}, [1,5; 7,10]);
1246 %! assert (m{1}, 'ShoRt Test');
1247 %! assert (size (t), [1, 1]);
1248 %! assert (t{1}{1}, 'ShoRt');
1249 %! assert (t{1}{2}, 'Test');
1250 %! assert (size (nm), [1, 1]);
1251 %! assert (! isempty (fieldnames (nm)));
1252 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
1253 %! assert (nm.word1, 'ShoRt');
1254 %! assert (nm.word2, 'Test');
1255 
1256 %!assert (regexpi ("abc\nabc", '.'), [1:7])
1257 %!assert (regexpi ("abc\nabc", '.', 'dotall'), [1:7])
1258 %!test
1259 %! assert (regexpi ("abc\nabc", '(?s).'), [1:7]);
1260 %! assert (regexpi ("abc\nabc", '.', 'dotexceptnewline'), [1,2,3,5,6,7]);
1261 %! assert (regexpi ("abc\nabc", '(?-s).'), [1,2,3,5,6,7]);
1262 
1263 %!assert (regexpi ("caseCaSe", 'case'), [1, 5])
1264 %!assert (regexpi ("caseCaSe", 'case', "matchcase"), 1)
1265 %!assert (regexpi ("caseCaSe", 'case', "ignorecase"), [1, 5])
1266 %!test
1267 %! assert (regexpi ("caseCaSe", '(?-i)case'), 1);
1268 %! assert (regexpi ("caseCaSe", '(?i)case'), [1, 5]);
1269 
1270 %!assert (regexpi ("abc\nabc", 'C$'), 7)
1271 %!assert (regexpi ("abc\nabc", 'C$', "stringanchors"), 7)
1272 %!test
1273 %! assert (regexpi ("abc\nabc", '(?-m)C$'), 7);
1274 %! assert (regexpi ("abc\nabc", 'C$', "lineanchors"), [3, 7]);
1275 %! assert (regexpi ("abc\nabc", '(?m)C$'), [3, 7]);
1276 
1277 %!assert (regexpi ("this word", 'S w'), 4)
1278 %!assert (regexpi ("this word", 'S w', 'literalspacing'), 4)
1279 %!test
1280 %! assert (regexpi ("this word", '(?-x)S w', 'literalspacing'), 4);
1281 %! assert (regexpi ("this word", 'S w', 'freespacing'), zeros (1,0));
1282 %! assert (regexpi ("this word", '(?x)S w'), zeros (1,0));
1283 
1284 %!error regexpi ('string', 'tri', 'BadArg')
1285 %!error regexpi ('string')
1286 
1287 %!assert (regexpi ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1, 0)})
1288 %!assert (regexpi ({'asdfg-dfd', '-dfd-dfd-', 'qasfdfdaq'}, '-'), {6, [1,5,9], zeros(1,0)})
1289 %!assert (regexpi ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]})
1290 %!assert (regexpi ('Strings', {'t', 's'}), {2, [1, 7]})
1291 
1292 %!assert (regexpi ("\n", '\n'), 1)
1293 %!assert (regexpi ("\n", "\n"), 1)
1294 */
1295 
1296 static octave_value
1297 octregexprep (const octave_value_list &args, const std::string &who)
1298 {
1299  int nargin = args.length ();
1300 
1301  // Make sure we have string, pattern, replacement
1302  const std::string buffer = args(0).string_value ();
1303 
1304  std::string pattern = args(1).string_value ();
1305 
1306  // Rewrite pattern for PCRE
1307  pattern = do_regexp_ptn_string_escapes (pattern, args(1).is_sq_string ());
1308 
1309  std::string replacement = args(2).string_value ();
1310 
1311  // Matlab compatibility.
1312  if (args(2).is_sq_string ())
1313  replacement = do_regexp_rep_string_escapes (replacement);
1314 
1315  // Pack options excluding 'tokenize' and various output
1316  // reordering strings into regexp arg list
1317  octave_value_list regexpargs (nargin-3, octave_value ());
1318 
1319  int len = 0;
1320  for (int i = 3; i < nargin; i++)
1321  {
1322  const std::string opt = args(i).string_value ();
1323  if (opt != "tokenize" && opt != "start" && opt != "end"
1324  && opt != "tokenextents" && opt != "match" && opt != "tokens"
1325  && opt != "names" && opt != "split" && opt != "warnings")
1326  {
1327  regexpargs(len++) = args(i);
1328  }
1329  }
1330  regexpargs.resize (len);
1331 
1333  bool extra_args = false;
1334  parse_options (options, regexpargs, who, 0, extra_args);
1335 
1336  return octave::regexp::replace (pattern, buffer, replacement, options, who);
1337 }
1338 
1339 DEFUN (regexprep, args, ,
1340  doc: /* -*- texinfo -*-
1341 @deftypefn {} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})
1342 @deftypefnx {} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, "@var{opt1}", @dots{})
1343 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.
1344 
1345 The pattern is a regular expression as documented for @code{regexp}.
1346 @xref{XREFregexp,,regexp}.
1347 
1348 The replacement string may contain @code{$i}, which substitutes for the ith
1349 set of parentheses in the match string. For example,
1350 
1351 @example
1352 regexprep ("Bill Dunn", '(\w+) (\w+)', '$2, $1')
1353 @end example
1354 
1355 @noindent
1356 returns @qcode{"Dunn, Bill"}
1357 
1358 Options in addition to those of @code{regexp} are
1359 
1360 @table @samp
1361 
1362 @item once
1363 Replace only the first occurrence of @var{pat} in the result.
1364 
1365 @item warnings
1366 This option is present for compatibility but is ignored.
1367 
1368 @end table
1369 
1370 Implementation Note: For compatibility with @sc{matlab}, escape sequences
1371 in @var{pat} (e.g., @qcode{"@xbackslashchar{}n"} => newline) are expanded
1372 even when @var{pat} has been defined with single quotes. To disable
1373 expansion use a second backslash before the escape sequence (e.g.,
1374 "@xbackslashchar{}@xbackslashchar{}n") or use the @code{regexptranslate}
1375 function.
1376 @seealso{regexp, regexpi, strrep}
1377 @end deftypefn */)
1378 {
1379  if (args.length () < 3)
1380  print_usage ();
1381 
1383 
1384  if (args(0).is_cell () || args(1).is_cell () || args(2).is_cell ())
1385  {
1386  Cell str, pat, rep;
1387  dim_vector dv0;
1388  dim_vector dv1 (1, 1);
1389 
1390  if (args(0).is_cell ())
1391  str = args(0).cell_value ();
1392  else
1393  str = Cell (args(0));
1394 
1395  if (args(1).is_cell ())
1396  pat = args(1).cell_value ();
1397  else
1398  pat = Cell (args(1));
1399 
1400  if (args(2).is_cell ())
1401  rep = args(2).cell_value ();
1402  else
1403  rep = Cell (args(2));
1404 
1405  dv0 = str.dims ();
1406  if (pat.numel () != 1)
1407  {
1408  dv1 = pat.dims ();
1409  if (rep.numel () != 1 && dv1 != rep.dims ())
1410  error ("regexprep: inconsistent cell array dimensions");
1411  }
1412  else if (rep.numel () != 1)
1413  dv1 = rep.dims ();
1414 
1415  Cell ret (dv0);
1416  octave_value_list new_args = args;
1417 
1418  for (octave_idx_type i = 0; i < dv0.numel (); i++)
1419  {
1420  new_args(0) = str(i);
1421  if (pat.numel () == 1)
1422  new_args(1) = pat(0);
1423  if (rep.numel () == 1)
1424  new_args(2) = rep(0);
1425 
1426  for (octave_idx_type j = 0; j < dv1.numel (); j++)
1427  {
1428  if (pat.numel () != 1)
1429  new_args(1) = pat(j);
1430  if (rep.numel () != 1)
1431  new_args(2) = rep(j);
1432  new_args(0) = octregexprep (new_args, "regexprep");
1433  }
1434 
1435  ret(i) = new_args(0);
1436  }
1437 
1438  retval = args(0).is_cell () ? ovl (ret)
1439  : ovl (ret(0));
1440  }
1441  else
1442  retval = octregexprep (args, "regexprep");
1443 
1444  return retval;
1445 }
1446 
1447 /*
1448 %!test # Replace with empty
1449 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1450 %! t = regexprep (xml, '<[!?][^>]*>', '');
1451 %! assert (t, ' <tag v="hello">some stuff</tag>');
1452 
1453 %!test # Replace with non-empty
1454 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1455 %! t = regexprep (xml, '<[!?][^>]*>', '?');
1456 %! assert (t, '? <tag v="hello">some stuff?</tag>');
1457 
1458 %!test # Check that 'tokenize' is ignored
1459 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1460 %! t = regexprep (xml, '<[!?][^>]*>', '', 'tokenize');
1461 %! assert (t, ' <tag v="hello">some stuff</tag>');
1462 
1463 ## Test capture replacement
1464 %!test
1465 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins";
1466 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam";
1467 %! t = regexprep (data, '(?m)^(\w+)\s+(\w+)$', '$2, $1');
1468 %! assert (t, result);
1469 
1470 ## Return the original if no match
1471 %!assert (regexprep ('hello', 'world', 'earth'), 'hello')
1472 
1473 ## Test emptymatch
1474 %!assert (regexprep ('World', '^', 'Hello '), 'World')
1475 %!assert (regexprep ('World', '^', 'Hello ', 'emptymatch'), 'Hello World')
1476 
1477 ## Test a general replacement
1478 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g")
1479 
1480 ## Make sure it works at the beginning and end
1481 %!assert (regexprep ("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g")
1482 %!assert (regexprep ("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_")
1483 
1484 ## Options
1485 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), "a_b]c{d}e-f=g")
1486 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), "a_b_c_d_e_f_g")
1487 
1488 ## Option combinations
1489 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), "a_b]c{d}e-f=g")
1490 
1491 ## End conditions on replacement
1492 %!assert (regexprep ("abc", "(b)", ".$1"), "a.bc")
1493 %!assert (regexprep ("abc", "(b)", "$1"), "abc")
1494 %!assert (regexprep ("abc", "(b)", "$1."), "ab.c")
1495 %!assert (regexprep ("abc", "(b)", "$1.."), "ab..c")
1496 
1497 ## Test cell array arguments
1498 %!assert (regexprep ("abc", {"b","a"}, "?"), "??c")
1499 %!assert (regexprep ({"abc","cba"}, "b", "?"), {"a?c","c?a"})
1500 %!assert (regexprep ({"abc","cba"}, {"b","a"}, {"?","!"}), {"!?c","c?!"})
1501 
1502 # Nasty lookbehind expression
1503 %!test
1504 %! warning ("off", "Octave:regexp-lookbehind-limit", "local");
1505 %! assert (regexprep ('x^(-1)+y(-1)+z(-1)=0', '(?<=[a-z]+)\(\-[1-9]*\)', '_minus1'),'x^(-1)+y_minus1+z_minus1=0');
1506 
1507 %!assert (regexprep ("\n", '\n', "X"), "X")
1508 %!assert (regexprep ("\n", "\n", "X"), "X")
1509 */
Definition: Cell.h:37
OCTAVE_EXPORT octave_value_list isa nd deftypefn *return ovl(args(0).is_integer_type())
OCTINTERP_API void print_usage(void)
Definition: defun.cc:52
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:363
octave_idx_type length(void) const
Definition: ovl.h:96
for large enough k
Definition: lu.cc:606
#define DEFUN(name, args_name, nargout_name, doc)
Definition: defun.h:46
void error(const char *fmt,...)
Definition: error.cc:570
static octave_value_list octcellregexp(const octave_value_list &args, int nargout, const std::string &who, bool case_insensitive=false)
Definition: regexp.cc:532
s
Definition: file-io.cc:2682
i e
Definition: data.cc:2724
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:389
static std::string do_regexp_rep_string_escapes(const std::string &s)
Definition: regexp.cc:140
JNIEnv void * args
Definition: ov-java.cc:67
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:439
static std::string do_regexp_ptn_string_escapes(const std::string &s, bool is_sq_str)
Definition: regexp.cc:53
void dotexceptnewline(bool val)
Definition: lo-regexp.h:173
iterator end(void)
Definition: base-list.h:86
OCTAVE_EXPORT octave_value_list return the number of command line arguments passed to Octave If called with the optional argument the function xample nargout(@histc)
Definition: ov-usr-fcn.cc:935
static octave_value_list octregexp(const octave_value_list &args, int nargout, const std::string &who, bool case_insensitive=false)
Definition: regexp.cc:333
int nargin
Definition: graphics.cc:10115
std::string str
Definition: hash.cc:118
double tmp
Definition: data.cc:6300
octave_value retval
Definition: data.cc:6294
static void parse_options(octave::regexp::opts &options, const octave_value_list &args, const std::string &who, int skip, bool &extra_args)
Definition: regexp.cc:284
std::string replace(const std::string &buffer, const std::string &replacement)
Definition: lo-regexp.cc:448
octave_idx_type length(void) const
Definition: ov.cc:1623
Definition: dMatrix.h:37
sz
Definition: data.cc:5342
void warning(const char *fmt,...)
Definition: error.cc:788
match_data match(const std::string &buffer)
Definition: lo-regexp.cc:241
std::list< match_element >::const_iterator const_iterator
Definition: base-list.h:41
octave::sys::time start
Definition: graphics.cc:11731
void emptymatch(bool val)
Definition: lo-regexp.h:174
=val(i)}if ode{val(i)}occurs in table i
Definition: lookup.cc:239
size_t size(void) const
Definition: base-list.h:49
is longer than or if then or only for unique occurrences of the complete pattern(false).The default is true.If a cell array of strings ar
Definition: strfind.cc:192
p
Definition: lu.cc:138
void assign(const std::string &k, const octave_value &val)
Definition: oct-map.h:223
string_vector named_patterns(void)
Definition: lo-regexp.h:261
OCTAVE_EXPORT octave_value_list only variables visible in the local scope are displayed The following are valid options
Definition: variables.cc:1859
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:200
void resize(octave_idx_type n, const octave_value &rfv=octave_value())
Definition: ovl.h:100
ColumnVector transform(const Matrix &m, double x, double y, double z)
Definition: graphics.cc:5118
void case_insensitive(bool val)
Definition: lo-regexp.h:172
void lineanchors(bool val)
Definition: lo-regexp.h:176
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:854
iterator begin(void)
Definition: base-list.h:83
return octave_value(v1.char_array_value().concat(v2.char_array_value(), ra_idx),((a1.is_sq_string()||a2.is_sq_string())? '\'': '"'))
charNDArray min(char d, const charNDArray &m)
Definition: chNDArray.cc:205
void once(bool val)
Definition: lo-regexp.h:177
void freespacing(bool val)
Definition: lo-regexp.h:175