GNU Octave  4.0.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
regexp.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2005-2015 David Bateman
4 Copyright (C) 2002-2005 Paul Kienzle
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 #include <list>
29 #include <sstream>
30 
31 #include <pcre.h>
32 
33 #include "base-list.h"
34 #include "oct-locbuf.h"
35 #include "quit.h"
36 #include "lo-regexp.h"
37 #include "str-vec.h"
38 
39 #include "defun.h"
40 #include "Cell.h"
41 #include "error.h"
42 #include "gripes.h"
43 #include "oct-map.h"
44 #include "oct-obj.h"
45 #include "utils.h"
46 
47 // Replace backslash escapes in a string with the real values. We need
48 // two special functions instead of the one in utils.cc because the set
49 // of escape sequences used for regexp patterns and replacement strings
50 // is different from those used in the *printf functions.
51 
52 static std::string
53 do_regexp_ptn_string_escapes (const std::string& s)
54 {
55  std::string retval;
56 
57  size_t i = 0;
58  size_t j = 0;
59  size_t len = s.length ();
60 
61  retval.resize (len);
62 
63  while (j < len)
64  {
65  if (s[j] == '\\' && j+1 < len)
66  {
67  switch (s[++j])
68  {
69  case 'b': // backspace
70  retval[i] = '\b';
71  break;
72 
73  // Translate < and > to PCRE word boundary
74  case '<': // begin word boundary
75  case '>': // end word boundary
76  retval[i] = '\\';
77  retval[++i] = 'b';
78  break;
79 
80 #if 0
81 // FIXME: To be complete, we need to handle \oN, \o{N}.
82 // The PCRE library already handles \N where N
83 // is an octal number. New code needs to merely
84 // replace \oN or \o{N} with \N.
85  case 'o': // octal number
86 #endif
87 
88  default: // pass escape sequence through
89  retval[i] = '\\';
90  retval[++i] = s[j];
91  break;
92  }
93  }
94  else
95  {
96  retval[i] = s[j];
97  }
98 
99  i++;
100  j++;
101  }
102 
103  retval.resize (i);
104 
105  return retval;
106 }
107 
108 static std::string
109 do_regexp_rep_string_escapes (const std::string& s)
110 {
111  std::string retval;
112 
113  size_t i = 0;
114  size_t j = 0;
115  size_t len = s.length ();
116 
117  retval.resize (len);
118 
119  while (j < len)
120  {
121  if (s[j] == '\\' && j+1 < len)
122  {
123  switch (s[++j])
124  {
125  case 'a': // alarm
126  retval[i] = '\a';
127  break;
128 
129  case 'b': // backspace
130  retval[i] = '\b';
131  break;
132 
133  case 'f': // formfeed
134  retval[i] = '\f';
135  break;
136 
137  case 'n': // newline
138  retval[i] = '\n';
139  break;
140 
141  case 'r': // carriage return
142  retval[i] = '\r';
143  break;
144 
145  case 't': // horizontal tab
146  retval[i] = '\t';
147  break;
148 
149  case 'v': // vertical tab
150  retval[i] = '\v';
151  break;
152 
153 #if 0
154 // FIXME: to be complete, we need to handle \oN, \o{N}, \xN, and
155 // \x{N}. Hex digits may be upper or lower case. Brackets are
156 // optional, so \x5Bz is the same as \x{5B}z.
157 
158  case 'o': // octal number
159  case 'x': // hex number
160 #endif
161 
162  default: // pass escape sequence through
163  retval[i] = '\\';
164  retval[++i] = s[j];
165  break;
166  }
167  }
168  else
169  {
170  retval[i] = s[j];
171  }
172 
173  i++;
174  j++;
175  }
176 
177  retval.resize (i);
178 
179  return retval;
180 }
181 
182 static void
184  const std::string& who, int skip, bool& extra_args)
185 {
186  int nargin = args.length ();
187 
188  extra_args = false;
189 
190  for (int i = skip; i < nargin; i++)
191  {
192  std::string str;
193 
194  if (args(i).is_string ())
195  str = args(i).string_value ();
196  else
197  {
198  error ("%s: optional arguments must be strings", who.c_str ());
199  break;
200  }
201 
202  std::transform (str.begin (), str.end (), str.begin (), tolower);
203 
204  if (str.find ("once", 0) == 0)
205  options.once (true);
206  else if (str.find ("matchcase", 0) == 0)
207  options.case_insensitive (false);
208  else if (str.find ("ignorecase", 0) == 0)
209  options.case_insensitive (true);
210  else if (str.find ("dotall", 0) == 0)
211  options.dotexceptnewline (false);
212  else if (str.find ("stringanchors", 0) == 0)
213  options.lineanchors (false);
214  else if (str.find ("literalspacing", 0) == 0)
215  options.freespacing (false);
216  else if (str.find ("noemptymatch", 0) == 0)
217  options.emptymatch (false);
218  else if (str.find ("dotexceptnewline", 0) == 0)
219  options.dotexceptnewline (true);
220  else if (str.find ("lineanchors", 0) == 0)
221  options.lineanchors (true);
222  else if (str.find ("freespacing", 0) == 0)
223  options.freespacing (true);
224  else if (str.find ("emptymatch", 0) == 0)
225  options.emptymatch (true);
226  else if (str.find ("start", 0) == 0
227  || str.find ("end", 0) == 0
228  || str.find ("tokenextents", 0) == 0
229  || str.find ("match", 0) == 0
230  || str.find ("tokens", 0) == 0
231  || str.find ("names", 0) == 0
232  || str.find ("split", 0) == 0)
233  extra_args = true;
234  else
235  error ("%s: unrecognized option", who.c_str ());
236  }
237 }
238 
239 static octave_value_list
240 octregexp (const octave_value_list &args, int nargout,
241  const std::string &who, bool case_insensitive = false)
242 {
243  octave_value_list retval;
244 
245  int nargin = args.length ();
246 
247  // Make sure we have string, pattern
248  const std::string buffer = args(0).string_value ();
249  if (error_state)
250  return retval;
251 
252  std::string pattern = args(1).string_value ();
253  if (error_state)
254  return retval;
255  // Matlab compatibility.
256  if (args(1).is_sq_string ())
257  pattern = do_regexp_ptn_string_escapes (pattern);
258 
259  regexp::opts options;
260  options.case_insensitive (case_insensitive);
261  bool extra_options = false;
262  parse_options (options, args, who, 2, extra_options);
263  if (error_state)
264  return retval;
265 
266  regexp::match_data rx_lst = regexp_match (pattern, buffer, options, who);
267 
268  string_vector named_pats = rx_lst.named_patterns ();
269 
270  size_t sz = rx_lst.size ();
271 
272  if (! error_state)
273  {
274  // Converted the linked list in the correct form for the return values
275 
276  octave_idx_type i = 0;
277  octave_scalar_map nmap;
278 
279  retval.resize (7);
280 
281  if (sz == 1)
282  {
283  string_vector named_tokens = rx_lst.begin ()->named_tokens ();
284 
285  for (int j = 0; j < named_pats.length (); j++)
286  nmap.assign (named_pats(j), named_tokens(j));
287 
288  retval(5) = nmap;
289  }
290  else
291  {
292  for (int j = 0; j < named_pats.length (); j++)
293  {
294  Cell tmp (dim_vector (1, sz));
295 
296  i = 0;
297  for (regexp::match_data::const_iterator p = rx_lst.begin ();
298  p != rx_lst.end (); p++)
299  {
300  string_vector named_tokens = p->named_tokens ();
301 
302  tmp(i++) = named_tokens(j);
303  }
304 
305  nmap.assign (named_pats(j), octave_value (tmp));
306  }
307 
308  retval(5) = nmap;
309  }
310 
311  if (options.once ())
312  {
314 
315  retval(4) = sz ? p->tokens () : Cell ();
316  retval(3) = sz ? p->match_string () : std::string ();
317  retval(2) = sz ? p->token_extents () : Matrix ();
318 
319  if (sz)
320  {
321  double start = p->start ();
322  double end = p->end ();
323 
324  Cell split (dim_vector (1, 2));
325  split(0) = buffer.substr (0, start-1);
326  split(1) = buffer.substr (end);
327 
328  retval(6) = split;
329  retval(1) = end;
330  retval(0) = start;
331  }
332  else
333  {
334  retval(6) = buffer;
335  retval(1) = Matrix ();
336  retval(0) = Matrix ();
337  }
338  }
339  else
340  {
341  Cell tokens (dim_vector (1, sz));
342  Cell match_string (dim_vector (1, sz));
343  Cell token_extents (dim_vector (1, sz));
344  NDArray end (dim_vector (1, sz));
345  NDArray start (dim_vector (1, sz));
346  Cell split (dim_vector (1, sz+1));
347  size_t sp_start = 0;
348 
349  i = 0;
350  for (regexp::match_data::const_iterator p = rx_lst.begin ();
351  p != rx_lst.end (); p++)
352  {
353  double s = p->start ();
354  double e = p->end ();
355 
356  string_vector tmp = p->tokens ();
357  tokens(i) = Cell (dim_vector (1, tmp.length ()), tmp);
358  match_string(i) = p->match_string ();
359  token_extents(i) = p->token_extents ();
360  end(i) = e;
361  start(i) = s;
362  split(i) = buffer.substr (sp_start, s-sp_start-1);
363  sp_start = e;
364  i++;
365  }
366 
367  split(i) = buffer.substr (sp_start);
368 
369  retval(6) = split;
370  retval(4) = tokens;
371  retval(3) = match_string;
372  retval(2) = token_extents;
373  retval(1) = end;
374  retval(0) = start;
375  }
376 
377  // Alter the order of the output arguments
378 
379  if (extra_options)
380  {
381  int n = 0;
382  octave_value_list new_retval;
383  new_retval.resize (nargout);
384 
385  OCTAVE_LOCAL_BUFFER (int, arg_used, 6);
386  for (int j = 0; j < 6; j++)
387  arg_used[j] = false;
388 
389  for (int j = 2; j < nargin; j++)
390  {
391  int k = 0;
392  std::string str = args(j).string_value ();
393  std::transform (str.begin (), str.end (), str.begin (), tolower);
394 
395  if (str.find ("once", 0) == 0
396  || str.find ("stringanchors", 0) == 0
397  || str.find ("lineanchors", 0) == 0
398  || str.find ("matchcase", 0) == 0
399  || str.find ("ignorecase", 0) == 0
400  || str.find ("dotall", 0) == 0
401  || str.find ("dotexceptnewline", 0) == 0
402  || str.find ("literalspacing", 0) == 0
403  || str.find ("freespacing", 0) == 0
404  || str.find ("noemptymatch", 0) == 0
405  || str.find ("emptymatch", 0) == 0)
406  continue;
407  else if (str.find ("start", 0) == 0)
408  k = 0;
409  else if (str.find ("end", 0) == 0)
410  k = 1;
411  else if (str.find ("tokenextents", 0) == 0)
412  k = 2;
413  else if (str.find ("match", 0) == 0)
414  k = 3;
415  else if (str.find ("tokens", 0) == 0)
416  k = 4;
417  else if (str.find ("names", 0) == 0)
418  k = 5;
419  else if (str.find ("split", 0) == 0)
420  k = 6;
421 
422  new_retval(n++) = retval(k);
423  arg_used[k] = true;
424 
425  if (n == nargout)
426  break;
427  }
428 
429  // Fill in the rest of the arguments
430  if (n < nargout)
431  {
432  for (int j = 0; j < 6; j++)
433  {
434  if (! arg_used[j])
435  new_retval(n++) = retval(j);
436  }
437  }
438 
439  retval = new_retval;
440  }
441  }
442 
443  return retval;
444 }
445 
446 static octave_value_list
447 octcellregexp (const octave_value_list &args, int nargout,
448  const std::string &who, bool case_insensitive = false)
449 {
450  octave_value_list retval;
451 
452  if (args(0).is_cell ())
453  {
454  OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
455  octave_value_list new_args = args;
456  Cell cellstr = args(0).cell_value ();
457  if (args(1).is_cell ())
458  {
459  Cell cellpat = args(1).cell_value ();
460 
461  if (cellpat.numel () == 1)
462  {
463  for (int j = 0; j < nargout; j++)
464  newretval[j].resize (cellstr.dims ());
465 
466  new_args(1) = cellpat(0);
467 
468  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
469  {
470  new_args(0) = cellstr(i);
471  octave_value_list tmp = octregexp (new_args, nargout, who,
472  case_insensitive);
473 
474  if (error_state)
475  break;
476 
477  for (int j = 0; j < nargout; j++)
478  newretval[j](i) = tmp(j);
479  }
480  }
481  else if (cellstr.numel () == 1)
482  {
483  for (int j = 0; j < nargout; j++)
484  newretval[j].resize (cellpat.dims ());
485 
486  new_args(0) = cellstr(0);
487 
488  for (octave_idx_type i = 0; i < cellpat.numel (); i++)
489  {
490  new_args(1) = cellpat(i);
491  octave_value_list tmp = octregexp (new_args, nargout, who,
492  case_insensitive);
493 
494  if (error_state)
495  break;
496 
497  for (int j = 0; j < nargout; j++)
498  newretval[j](i) = tmp(j);
499  }
500  }
501  else if (cellstr.numel () == cellpat.numel ())
502  {
503 
504  if (cellstr.dims () != cellpat.dims ())
505  error ("%s: inconsistent cell array dimensions", who.c_str ());
506  else
507  {
508  for (int j = 0; j < nargout; j++)
509  newretval[j].resize (cellstr.dims ());
510 
511  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
512  {
513  new_args(0) = cellstr(i);
514  new_args(1) = cellpat(i);
515 
516  octave_value_list tmp = octregexp (new_args, nargout, who,
517  case_insensitive);
518 
519  if (error_state)
520  break;
521 
522  for (int j = 0; j < nargout; j++)
523  newretval[j](i) = tmp(j);
524  }
525  }
526  }
527  else
528  error ("regexp: cell array arguments must be scalar or equal size");
529  }
530  else
531  {
532  for (int j = 0; j < nargout; j++)
533  newretval[j].resize (cellstr.dims ());
534 
535  for (octave_idx_type i = 0; i < cellstr.numel (); i++)
536  {
537  new_args(0) = cellstr(i);
538  octave_value_list tmp = octregexp (new_args, nargout, who,
539  case_insensitive);
540 
541  if (error_state)
542  break;
543 
544  for (int j = 0; j < nargout; j++)
545  newretval[j](i) = tmp(j);
546  }
547  }
548 
549  if (!error_state)
550  for (int j = 0; j < nargout; j++)
551  retval(j) = octave_value (newretval[j]);
552  }
553  else if (args(1).is_cell ())
554  {
555  OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout);
556  octave_value_list new_args = args;
557  Cell cellpat = args(1).cell_value ();
558 
559  for (int j = 0; j < nargout; j++)
560  newretval[j].resize (cellpat.dims ());
561 
562  for (octave_idx_type i = 0; i < cellpat.numel (); i++)
563  {
564  new_args(1) = cellpat(i);
565  octave_value_list tmp = octregexp (new_args, nargout, who,
566  case_insensitive);
567 
568  if (error_state)
569  break;
570 
571  for (int j = 0; j < nargout; j++)
572  newretval[j](i) = tmp(j);
573  }
574 
575  if (!error_state)
576  {
577  for (int j = 0; j < nargout; j++)
578  retval(j) = octave_value (newretval[j]);
579  }
580  }
581  else
582  retval = octregexp (args, nargout, who, case_insensitive);
583 
584  return retval;
585 
586 }
587 
588 DEFUN (regexp, args, nargout,
589  "-*- texinfo -*-\n\
590 @deftypefn {Built-in Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexp (@var{str}, @var{pat})\n\
591 @deftypefnx {Built-in Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
592 Regular expression string matching.\n\
593 \n\
594 Search for @var{pat} in @var{str} and return the positions and substrings of\n\
595 any matches, or empty values if there are none.\n\
596 \n\
597 The matched pattern @var{pat} can include any of the standard regex\n\
598 operators, including:\n\
599 \n\
600 @table @code\n\
601 @item .\n\
602 Match any character\n\
603 \n\
604 @item * + ? @{@}\n\
605 Repetition operators, representing\n\
606 \n\
607 @table @code\n\
608 @item *\n\
609 Match zero or more times\n\
610 \n\
611 @item +\n\
612 Match one or more times\n\
613 \n\
614 @item ?\n\
615 Match zero or one times\n\
616 \n\
617 @item @{@var{n}@}\n\
618 Match exactly @var{n} times\n\
619 \n\
620 @item @{@var{n},@}\n\
621 Match @var{n} or more times\n\
622 \n\
623 @item @{@var{m},@var{n}@}\n\
624 Match between @var{m} and @var{n} times\n\
625 @end table\n\
626 \n\
627 @item [@dots{}] [^@dots{}]\n\
628 \n\
629 List operators. The pattern will match any character listed between \"[\"\n\
630 and \"]\". If the first character is \"^\" then the pattern is inverted and\n\
631 any character except those listed between brackets will match.\n\
632 \n\
633 Escape sequences defined below can also be used inside list operators. For\n\
634 example, a template for a floating point number might be @code{[-+.\\d]+}.\n\
635 \n\
636 @item () (?:)\n\
637 Grouping operator. The first form, parentheses only, also creates a token.\n\
638 \n\
639 @item |\n\
640 Alternation operator. Match one of a choice of regular expressions. The\n\
641 alternatives must be delimited by the grouping operator @code{()} above.\n\
642 \n\
643 @item ^ $\n\
644 Anchoring operators. Requires pattern to occur at the start (@code{^}) or\n\
645 end (@code{$}) of the string.\n\
646 @end table\n\
647 \n\
648 In addition, the following escaped characters have special meaning.\n\
649 \n\
650 @table @code\n\
651 \n\
652 @item \\d\n\
653 Match any digit\n\
654 \n\
655 @item \\D\n\
656 Match any non-digit\n\
657 \n\
658 @item \\s\n\
659 Match any whitespace character\n\
660 \n\
661 @item \\S\n\
662 Match any non-whitespace character\n\
663 \n\
664 @item \\w\n\
665 Match any word character\n\
666 \n\
667 @item \\W\n\
668 Match any non-word character\n\
669 \n\
670 @item \\<\n\
671 Match the beginning of a word\n\
672 \n\
673 @item \\>\n\
674 Match the end of a word\n\
675 \n\
676 @item \\B\n\
677 Match within a word\n\
678 @end table\n\
679 \n\
680 Implementation Note: For compatibility with @sc{matlab}, escape sequences\n\
681 in @var{pat} (e.g., @qcode{\"@xbackslashchar{}n\"} => newline) are expanded\n\
682 even when @var{pat} has been defined with single quotes. To disable\n\
683 expansion use a second backslash before the escape sequence (e.g.,\n\
684 \"@xbackslashchar{}@xbackslashchar{}n\") or use the @code{regexptranslate}\n\
685 function.\n\
686 \n\
687 The outputs of @code{regexp} default to the order given below\n\
688 \n\
689 @table @var\n\
690 @item s\n\
691 The start indices of each matching substring\n\
692 \n\
693 @item e\n\
694 The end indices of each matching substring\n\
695 \n\
696 @item te\n\
697 The extents of each matched token surrounded by @code{(@dots{})} in\n\
698 @var{pat}\n\
699 \n\
700 @item m\n\
701 A cell array of the text of each match\n\
702 \n\
703 @item t\n\
704 A cell array of the text of each token matched\n\
705 \n\
706 @item nm\n\
707 A structure containing the text of each matched named token, with the name\n\
708 being used as the fieldname. A named token is denoted by\n\
709 @code{(?<name>@dots{})}.\n\
710 \n\
711 @item sp\n\
712 A cell array of the text not returned by match, i.e., what remains if you\n\
713 split the string based on @var{pat}.\n\
714 @end table\n\
715 \n\
716 Particular output arguments, or the order of the output arguments, can be\n\
717 selected by additional @var{opt} arguments. These are strings and the\n\
718 correspondence between the output arguments and the optional argument\n\
719 are\n\
720 \n\
721 @multitable @columnfractions 0.2 0.3 0.3 0.2\n\
722 @item @tab @qcode{'start'} @tab @var{s} @tab\n\
723 @item @tab @qcode{'end'} @tab @var{e} @tab\n\
724 @item @tab @qcode{'tokenExtents'} @tab @var{te} @tab\n\
725 @item @tab @qcode{'match'} @tab @var{m} @tab\n\
726 @item @tab @qcode{'tokens'} @tab @var{t} @tab\n\
727 @item @tab @qcode{'names'} @tab @var{nm} @tab\n\
728 @item @tab @qcode{'split'} @tab @var{sp} @tab\n\
729 @end multitable\n\
730 \n\
731 Additional arguments are summarized below.\n\
732 \n\
733 @table @samp\n\
734 @item once\n\
735 Return only the first occurrence of the pattern.\n\
736 \n\
737 @item matchcase\n\
738 Make the matching case sensitive. (default)\n\
739 \n\
740 Alternatively, use (?-i) in the pattern.\n\
741 \n\
742 @item ignorecase\n\
743 Ignore case when matching the pattern to the string.\n\
744 \n\
745 Alternatively, use (?i) in the pattern.\n\
746 \n\
747 @item stringanchors\n\
748 Match the anchor characters at the beginning and end of the string.\n\
749 (default)\n\
750 \n\
751 Alternatively, use (?-m) in the pattern.\n\
752 \n\
753 @item lineanchors\n\
754 Match the anchor characters at the beginning and end of the line.\n\
755 \n\
756 Alternatively, use (?m) in the pattern.\n\
757 \n\
758 @item dotall\n\
759 The pattern @code{.} matches all characters including the newline character.\n\
760  (default)\n\
761 \n\
762 Alternatively, use (?s) in the pattern.\n\
763 \n\
764 @item dotexceptnewline\n\
765 The pattern @code{.} matches all characters except the newline character.\n\
766 \n\
767 Alternatively, use (?-s) in the pattern.\n\
768 \n\
769 @item literalspacing\n\
770 All characters in the pattern, including whitespace, are significant and are\n\
771 used in pattern matching. (default)\n\
772 \n\
773 Alternatively, use (?-x) in the pattern.\n\
774 \n\
775 @item freespacing\n\
776 The pattern may include arbitrary whitespace and also comments beginning with\n\
777 the character @samp{#}.\n\
778 \n\
779 Alternatively, use (?x) in the pattern.\n\
780 \n\
781 @item noemptymatch\n\
782 Zero-length matches are not returned. (default)\n\
783 \n\
784 @item emptymatch\n\
785 Return zero-length matches.\n\
786 \n\
787 @code{regexp ('a', 'b*', 'emptymatch')} returns @code{[1 2]} because there\n\
788 are zero or more @qcode{'b'} characters at positions 1 and end-of-string.\n\
789 \n\
790 @end table\n\
791 @seealso{regexpi, strfind, regexprep}\n\
792 @end deftypefn")
793 {
794  octave_value_list retval;
795 
796  int nargin = args.length ();
797 
798  if (nargin < 2)
799  print_usage ();
800  else if (args(0).is_cell () || args(1).is_cell ())
801  retval = octcellregexp (args, (nargout > 0 ? nargout : 1), "regexp");
802  else
803  retval = octregexp (args, nargout, "regexp");
804 
805  return retval;
806 }
807 
808 /*
809 ## PCRE_ERROR_MATCHLIMIT test
810 %!test
811 %! s = sprintf ('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-');
812 %! ws = warning ("query");
813 %! unwind_protect
814 %! warning ("off");
815 %! regexp (s, '(\s*-*\d+[.]*\d*\s*)+\n');
816 %! unwind_protect_cleanup
817 %! warning (ws);
818 %! end_unwind_protect
819 
820 ## segfault test
821 %!assert (regexp ("abcde", "."), [1,2,3,4,5])
822 ## Infinite loop test
823 %!assert (isempty (regexp ("abcde", "")))
824 
825 ## Check that anchoring of pattern works correctly
826 %!assert (regexp ('abcabc', '^abc'), 1)
827 %!assert (regexp ('abcabc', 'abc$'), 4)
828 %!assert (regexp ('abcabc', '^abc$'), zeros (1,0))
829 
830 %!test
831 %! [s, e, te, m, t] = regexp (' No Match ', 'f(.*)uck');
832 %! assert (s, zeros (1,0));
833 %! assert (e, zeros (1,0));
834 %! assert (te, cell (1,0));
835 %! assert (m, cell (1,0));
836 %! assert (t, cell (1,0));
837 
838 %!test
839 %! [s, e, te, m, t] = regexp (' FiRetrUck ', 'f(.*)uck');
840 %! assert (s, zeros (1,0));
841 %! assert (e, zeros (1,0));
842 %! assert (te, cell (1,0));
843 %! assert (m, cell (1,0));
844 %! assert (t, cell (1,0));
845 
846 %!test
847 %! [s, e, te, m, t] = regexp (' firetruck ', 'f(.*)uck');
848 %! assert (s, 2);
849 %! assert (e, 10);
850 %! assert (te{1}, [3, 7]);
851 %! assert (m{1}, 'firetruck');
852 %! assert (t{1}{1}, 'iretr');
853 
854 %!test
855 %! [s, e, te, m, t] = regexp ('short test string', '\w*r\w*');
856 %! assert (s, [1, 12]);
857 %! assert (e, [5, 17]);
858 %! assert (size (te), [1, 2]);
859 %! assert (isempty (te{1}));
860 %! assert (isempty (te{2}));
861 %! assert (m{1}, 'short');
862 %! assert (m{2}, 'string');
863 %! assert (size (t), [1, 2]);
864 %! assert (isempty (t{1}));
865 %! assert (isempty (t{2}));
866 
867 %!test
868 %! [s, e, te, m, t] = regexp ('short test string', '\w*r\w*', 'once');
869 %! assert (s, 1);
870 %! assert (e, 5);
871 %! assert (isempty (te));
872 %! assert (m, 'short');
873 %! assert (isempty (t));
874 
875 %!test
876 %! [m, te, e, s, t] = regexp ('short test string', '\w*r\w*', 'once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
877 %! assert (s, 1);
878 %! assert (e, 5);
879 %! assert (isempty (te));
880 %! assert (m, 'short');
881 %! assert (isempty (t));
882 
883 %!test
884 %! [s, e, te, m, t, nm] = regexp ('short test string', '(?<word1>\w*t)\s*(?<word2>\w*t)');
885 %! assert (s, 1);
886 %! assert (e, 10);
887 %! assert (size (te), [1, 1]);
888 %! assert (te{1}, [1,5; 7,10]);
889 %! assert (m{1}, 'short test');
890 %! assert (size (t), [1, 1]);
891 %! assert (t{1}{1}, 'short');
892 %! assert (t{1}{2}, 'test');
893 %! assert (size (nm), [1, 1]);
894 %! assert (! isempty (fieldnames (nm)));
895 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
896 %! assert (nm.word1, 'short');
897 %! assert (nm.word2, 'test');
898 
899 %!test
900 %! [nm, m, te, e, s, t] = regexp ('short test string', '(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
901 %! assert (s, 1);
902 %! assert (e, 10);
903 %! assert (size (te), [1, 1]);
904 %! assert (te{1}, [1,5; 7,10]);
905 %! assert (m{1}, 'short test');
906 %! assert (size (t), [1, 1]);
907 %! assert (t{1}{1}, 'short');
908 %! assert (t{1}{2}, 'test');
909 %! assert (size (nm), [1, 1]);
910 %! assert (!isempty (fieldnames (nm)));
911 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
912 %! assert (nm.word1, 'short');
913 %! assert (nm.word2, 'test');
914 
915 %!test
916 %! [t, nm] = regexp ("John Davis\nRogers, James", '(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)', 'tokens', 'names');
917 %! assert (size (t), [1, 2]);
918 %! assert (t{1}{1}, 'John');
919 %! assert (t{1}{2}, 'Davis');
920 %! assert (t{2}{1}, 'Rogers');
921 %! assert (t{2}{2}, 'James');
922 %! assert (size (nm), [1, 1]);
923 %! assert (nm.first{1}, 'John');
924 %! assert (nm.first{2}, 'James');
925 %! assert (nm.last{1}, 'Davis');
926 %! assert (nm.last{2}, 'Rogers');
927 
928 ## Tests for named tokens
929 %!test
930 %! ## Parenthesis in named token (ie (int)) causes a problem
931 %! assert (regexp ('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int'));
932 
933 %!test
934 %! ## Mix of named and unnamed tokens can cause segfault (bug #35683)
935 %! str = "abcde";
936 %! ptn = '(?<T1>a)(\w+)(?<T2>d\w+)';
937 %! tokens = regexp (str, ptn, "names");
938 %! assert (isstruct (tokens) && numel (tokens) == 1);
939 %! assert (tokens.T1, "a");
940 %! assert (tokens.T2, "de");
941 
942 %!assert (regexp ("abc\nabc", '.'), [1:7])
943 %!assert (regexp ("abc\nabc", '.', 'dotall'), [1:7])
944 %!test
945 %! assert (regexp ("abc\nabc", '(?s).'), [1:7]);
946 %! assert (regexp ("abc\nabc", '.', 'dotexceptnewline'), [1,2,3,5,6,7]);
947 %! assert (regexp ("abc\nabc", '(?-s).'), [1,2,3,5,6,7]);
948 
949 %!assert (regexp ("caseCaSe", 'case'), 1)
950 %!assert (regexp ("caseCaSe", 'case', "matchcase"), 1)
951 %!assert (regexp ("caseCaSe", 'case', "ignorecase"), [1,5])
952 %!test
953 %! assert (regexp ("caseCaSe", '(?-i)case'), 1);
954 %! assert (regexp ("caseCaSe", '(?i)case'), [1, 5]);
955 
956 %!assert (regexp ("abc\nabc", 'c$'), 7)
957 %!assert (regexp ("abc\nabc", 'c$', "stringanchors"), 7)
958 %!test
959 %! assert (regexp ("abc\nabc", '(?-m)c$'), 7);
960 %! assert (regexp ("abc\nabc", 'c$',"lineanchors"), [3, 7]);
961 %! assert (regexp ("abc\nabc", '(?m)c$'), [3,7]);
962 
963 %!assert (regexp ("this word", 's w'), 4)
964 %!assert (regexp ("this word", 's w', 'literalspacing'), 4)
965 %!test
966 %! assert (regexp ("this word", '(?-x)s w', 'literalspacing'), 4);
967 %! assert (regexp ("this word", 's w', 'freespacing'), zeros (1,0));
968 %! assert (regexp ("this word", '(?x)s w'), zeros (1,0));
969 
970 %!test
971 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '[VOCT]*', 'noemptymatch');
972 %! assert (s, [1 5]);
973 %! assert (e, [3 5]);
974 %! assert (te, { zeros(0,2), zeros(0,2) });
975 %! assert (m, { "OCT", "V" });
976 %! assert (t, { cell(1,0), cell(1,0) });
977 %! assert (isempty (fieldnames (nm)));
978 %! assert (sp, { "", "A", "E" });
979 
980 %!test
981 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '([VOCT]*)', 'noemptymatch');
982 %! assert (s, [1 5]);
983 %! assert (e, [3 5]);
984 %! assert (te, { [1 3], [5 5] });
985 %! assert (m, { "OCT", "V" });
986 %! assert (t, { {"OCT"}, {"V"} });
987 %! assert (isempty (fieldnames (nm)));
988 %! assert (sp, { "", "A", "E" });
989 
990 %!test
991 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '[VOCT]*', 'emptymatch');
992 %! assert (s, [1 4 5 6 7]);
993 %! assert (e, [3 3 5 5 6]);
994 %! assert (te, repmat ({zeros(0,2)}, [1, 5]));
995 %! assert (m, { "OCT", "", "V", "", "" });
996 %! assert (t, repmat({cell(1,0)}, [1, 5]));
997 %! assert (isempty (fieldnames (nm)));
998 %! assert (sp, { "", "", "A", "", "E", "" });
999 
1000 %!test
1001 %! [s, e, te, m, t, nm, sp] = regexp ('OCTAVE', '([VOCT]*)', 'emptymatch');
1002 %! assert (s, [1 4 5 6 7]);
1003 %! assert (e, [3 3 5 5 6]);
1004 %! assert (te, { [1 3], [4 3], [5 5], [6 5], [7 6] });
1005 %! assert (m, { "OCT", "", "V", "", "" });
1006 %! assert (t, { {"OCT"}, {""}, {"V"}, {""}, {""} });
1007 %! assert (isempty (fieldnames (nm)));
1008 %! assert (sp, { "", "", "A", "", "E", "" });
1009 
1010 %!error regexp ('string', 'tri', 'BadArg')
1011 %!error regexp ('string')
1012 
1013 %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1,0)})
1014 %!assert (regexp ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]})
1015 %!assert (regexp ('Strings', {'t','s'}), {2, 7})
1016 
1017 ## Test case for lookaround operators
1018 %!test
1019 %! assert (regexp ('Iraq', 'q(?!u)'), 4);
1020 %! assert (regexp ('quit', 'q(?!u)'), zeros (1, 0));
1021 %! assert (regexp ('quit', 'q(?=u)' , 'match'), {'q'});
1022 %! assert (regexp ("quit", 'q(?=u+)', 'match'), {'q'});
1023 %! assert (regexp ("qit", 'q(?=u+)', 'match'), cell (1, 0));
1024 %! assert (regexp ("qit", 'q(?=u*)', 'match'), {'q'});
1025 %! assert (regexp ('thingamabob', '(?<=a)b'), 9);
1026 
1027 ## Tests for split option.
1028 %!shared str
1029 %! str = "foo bar foo";
1030 %!test
1031 %! [a, b] = regexp (str, "f..", "match", "split");
1032 %! assert (a, {"foo", "foo"});
1033 %! assert (b, {"", " bar ", ""});
1034 %!test
1035 %! [a, b] = regexp (str, "f..", "match", "split", "once");
1036 %! assert (a, "foo");
1037 %! assert (b, {"", " bar foo"});
1038 %!test
1039 %! [a, b] = regexp (str, "fx.", "match", "split");
1040 %! assert (a, cell (1, 0));
1041 %! assert (b, {"foo bar foo"});
1042 %!test
1043 %! [a, b] = regexp (str, "fx.", "match", "split", "once");
1044 %! assert (a, "");;
1045 %! assert (b, "foo bar foo");
1046 
1047 %!shared str
1048 %! str = "foo bar";
1049 %!test
1050 %! [a, b] = regexp (str, "f..", "match", "split");
1051 %! assert (a, {"foo"});
1052 %! assert (b, {"", " bar"});
1053 %!test
1054 %! [a, b] = regexp (str, "b..", "match", "split");
1055 %! assert (a, {"bar"});
1056 %! assert (b, {"foo ", ""});
1057 %!test
1058 %! [a, b] = regexp (str, "x", "match", "split");
1059 %! assert (a, cell (1, 0));
1060 %! assert (b, {"foo bar"});
1061 %!test
1062 %! [a, b] = regexp (str, "[o]+", "match", "split");
1063 %! assert (a, {"oo"});
1064 %! assert (b, {"f", " bar"});
1065 
1066 %!assert (regexp ("\n", '\n'), 1);
1067 %!assert (regexp ("\n", "\n"), 1);
1068 */
1069 
1070 DEFUN (regexpi, args, nargout,
1071  "-*- texinfo -*-\n\
1072 @deftypefn {Built-in Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}, @var{sp}] =} regexpi (@var{str}, @var{pat})\n\
1073 @deftypefnx {Built-in Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
1074 \n\
1075 Case insensitive regular expression string matching.\n\
1076 \n\
1077 Search for @var{pat} in @var{str} and return the positions and substrings of\n\
1078 any matches, or empty values if there are none. @xref{XREFregexp,,regexp},\n\
1079 for details on the syntax of the search pattern.\n\
1080 @seealso{regexp}\n\
1081 @end deftypefn")
1082 {
1083  octave_value_list retval;
1084 
1085  int nargin = args.length ();
1086 
1087  if (nargin < 2)
1088  print_usage ();
1089  else if (args(0).is_cell () || args(1).is_cell ())
1090  retval = octcellregexp (args, (nargout > 0 ? nargout : 1), "regexpi", true);
1091  else
1092  retval = octregexp (args, nargout, "regexpi", true);
1093 
1094  return retval;
1095 }
1096 
1097 /*
1098 ## segfault test
1099 %!assert (regexpi ("abcde", "."), [1,2,3,4,5])
1100 
1101 ## Check that anchoring of pattern works correctly
1102 %!assert (regexpi ('abcabc', '^ABC'), 1)
1103 %!assert (regexpi ('abcabc', 'ABC$'), 4)
1104 %!assert (regexpi ('abcabc', '^ABC$'), zeros (1,0))
1105 
1106 %!test
1107 %! [s, e, te, m, t] = regexpi (' No Match ', 'f(.*)uck');
1108 %! assert (s, zeros (1,0));
1109 %! assert (e, zeros (1,0));
1110 %! assert (te, cell (1,0));
1111 %! assert (m, cell (1,0));
1112 %! assert (t, cell (1,0));
1113 
1114 %!test
1115 %! [s, e, te, m, t] = regexpi (' FiRetrUck ', 'f(.*)uck');
1116 %! assert (s, 2);
1117 %! assert (e, 10);
1118 %! assert (te{1}, [3, 7]);
1119 %! assert (m{1}, 'FiRetrUck');
1120 %! assert (t{1}{1}, 'iRetr');
1121 
1122 %!test
1123 %! [s, e, te, m, t] = regexpi (' firetruck ', 'f(.*)uck');
1124 %! assert (s, 2);
1125 %! assert (e, 10);
1126 %! assert (te{1}, [3, 7]);
1127 %! assert (m{1}, 'firetruck');
1128 %! assert (t{1}{1}, 'iretr');
1129 
1130 %!test
1131 %! [s, e, te, m, t] = regexpi ('ShoRt Test String', '\w*r\w*');
1132 %! assert (s, [1, 12]);
1133 %! assert (e, [5, 17]);
1134 %! assert (size (te), [1, 2]);
1135 %! assert (isempty (te{1}));
1136 %! assert (isempty (te{2}));
1137 %! assert (m{1}, 'ShoRt');
1138 %! assert (m{2}, 'String');
1139 %! assert (size (t), [1, 2]);
1140 %! assert (isempty (t{1}));
1141 %! assert (isempty (t{2}));
1142 
1143 %!test
1144 %! [s, e, te, m, t] = regexpi ('ShoRt Test String', '\w*r\w*', 'once');
1145 %! assert (s, 1);
1146 %! assert (e, 5);
1147 %! assert (isempty (te));
1148 %! assert (m, 'ShoRt');
1149 %! assert (isempty (t));
1150 
1151 %!test
1152 %! [m, te, e, s, t] = regexpi ('ShoRt Test String', '\w*r\w*', 'once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1153 %! assert (s, 1);
1154 %! assert (e, 5);
1155 %! assert (isempty (te));
1156 %! assert (m, 'ShoRt');
1157 %! assert (isempty (t));
1158 
1159 %!test
1160 %! [s, e, te, m, t, nm] = regexpi ('ShoRt Test String', '(?<word1>\w*t)\s*(?<word2>\w*t)');
1161 %! assert (s, 1);
1162 %! assert (e, 10);
1163 %! assert (size (te), [1, 1]);
1164 %! assert (te{1}, [1,5; 7,10]);
1165 %! assert (m{1}, 'ShoRt Test');
1166 %! assert (size (t), [1, 1]);
1167 %! assert (t{1}{1}, 'ShoRt');
1168 %! assert (t{1}{2}, 'Test');
1169 %! assert (size (nm), [1, 1]);
1170 %! assert (! isempty (fieldnames (nm)));
1171 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
1172 %! assert (nm.word1, 'ShoRt');
1173 %! assert (nm.word2, 'Test');
1174 
1175 %!test
1176 %! [nm, m, te, e, s, t] = regexpi ('ShoRt Test String', '(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1177 %! assert (s, 1);
1178 %! assert (e, 10);
1179 %! assert (size (te), [1, 1]);
1180 %! assert (te{1}, [1,5; 7,10]);
1181 %! assert (m{1}, 'ShoRt Test');
1182 %! assert (size (t), [1, 1]);
1183 %! assert (t{1}{1}, 'ShoRt');
1184 %! assert (t{1}{2}, 'Test');
1185 %! assert (size (nm), [1, 1]);
1186 %! assert (!isempty (fieldnames (nm)));
1187 %! assert (sort (fieldnames (nm)), {'word1';'word2'});
1188 %! assert (nm.word1, 'ShoRt');
1189 %! assert (nm.word2, 'Test');
1190 
1191 %!assert (regexpi ("abc\nabc", '.'), [1:7])
1192 %!assert (regexpi ("abc\nabc", '.', 'dotall'), [1:7])
1193 %!test
1194 %! assert (regexpi ("abc\nabc", '(?s).'), [1:7]);
1195 %! assert (regexpi ("abc\nabc", '.', 'dotexceptnewline'), [1,2,3,5,6,7]);
1196 %! assert (regexpi ("abc\nabc", '(?-s).'), [1,2,3,5,6,7]);
1197 
1198 %!assert (regexpi ("caseCaSe", 'case'), [1, 5])
1199 %!assert (regexpi ("caseCaSe", 'case', "matchcase"), 1)
1200 %!assert (regexpi ("caseCaSe", 'case', "ignorecase"), [1, 5])
1201 %!test
1202 %! assert (regexpi ("caseCaSe", '(?-i)case'), 1);
1203 %! assert (regexpi ("caseCaSe", '(?i)case'), [1, 5]);
1204 
1205 %!assert (regexpi ("abc\nabc", 'C$'), 7)
1206 %!assert (regexpi ("abc\nabc", 'C$', "stringanchors"), 7)
1207 %!test
1208 %! assert (regexpi ("abc\nabc", '(?-m)C$'), 7);
1209 %! assert (regexpi ("abc\nabc", 'C$', "lineanchors"), [3, 7]);
1210 %! assert (regexpi ("abc\nabc", '(?m)C$'), [3, 7]);
1211 
1212 %!assert (regexpi ("this word", 'S w'), 4)
1213 %!assert (regexpi ("this word", 'S w', 'literalspacing'), 4)
1214 %!test
1215 %! assert (regexpi ("this word", '(?-x)S w', 'literalspacing'), 4);
1216 %! assert (regexpi ("this word", 'S w', 'freespacing'), zeros (1,0));
1217 %! assert (regexpi ("this word", '(?x)S w'), zeros (1,0));
1218 
1219 %!error regexpi ('string', 'tri', 'BadArg')
1220 %!error regexpi ('string')
1221 
1222 %!assert (regexpi ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, '-'), {6;[1,5,9];zeros(1, 0)})
1223 %!assert (regexpi ({'asdfg-dfd', '-dfd-dfd-', 'qasfdfdaq'}, '-'), {6, [1,5,9], zeros(1,0)})
1224 %!assert (regexpi ({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'}, {'-';'f';'q'}), {6;[3,7];[1,9]})
1225 %!assert (regexpi ('Strings', {'t', 's'}), {2, [1, 7]})
1226 
1227 %!assert (regexpi ("\n", '\n'), 1);
1228 %!assert (regexpi ("\n", "\n"), 1);
1229 */
1230 
1231 static octave_value
1232 octregexprep (const octave_value_list &args, const std::string &who)
1233 {
1234  octave_value retval;
1235 
1236  int nargin = args.length ();
1237 
1238  // Make sure we have string, pattern, replacement
1239  const std::string buffer = args(0).string_value ();
1240  if (error_state)
1241  return retval;
1242 
1243  std::string pattern = args(1).string_value ();
1244  if (error_state)
1245  return retval;
1246  // Matlab compatibility.
1247  if (args(1).is_sq_string ())
1248  pattern = do_regexp_ptn_string_escapes (pattern);
1249 
1250  std::string replacement = args(2).string_value ();
1251  if (error_state)
1252  return retval;
1253  // Matlab compatibility.
1254  if (args(2).is_sq_string ())
1255  replacement = do_regexp_rep_string_escapes (replacement);
1256 
1257  // Pack options excluding 'tokenize' and various output
1258  // reordering strings into regexp arg list
1259  octave_value_list regexpargs (nargin-3, octave_value ());
1260 
1261  int len = 0;
1262  for (int i = 3; i < nargin; i++)
1263  {
1264  const std::string opt = args(i).string_value ();
1265  if (opt != "tokenize" && opt != "start" && opt != "end"
1266  && opt != "tokenextents" && opt != "match" && opt != "tokens"
1267  && opt != "names" && opt != "split" && opt != "warnings")
1268  {
1269  regexpargs(len++) = args(i);
1270  }
1271  }
1272  regexpargs.resize (len);
1273 
1274  regexp::opts options;
1275  bool extra_args = false;
1276  parse_options (options, regexpargs, who, 0, extra_args);
1277  if (error_state)
1278  return retval;
1279 
1280  return regexp_replace (pattern, buffer, replacement, options, who);
1281 }
1282 
1283 DEFUN (regexprep, args, ,
1284  "-*- texinfo -*-\n\
1285 @deftypefn {Built-in Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr})\n\
1286 @deftypefnx {Built-in Function} {@var{outstr} =} regexprep (@var{string}, @var{pat}, @var{repstr}, \"@var{opt1}\", @dots{})\n\
1287 Replace occurrences of pattern @var{pat} in @var{string} with @var{repstr}.\n\
1288 \n\
1289 The pattern is a regular expression as documented for @code{regexp}.\n\
1290 @xref{XREFregexp,,regexp}.\n\
1291 \n\
1292 The replacement string may contain @code{$i}, which substitutes for the ith\n\
1293 set of parentheses in the match string. For example,\n\
1294 \n\
1295 @example\n\
1296 regexprep (\"Bill Dunn\", '(\\w+) (\\w+)', '$2, $1')\n\
1297 @end example\n\
1298 \n\
1299 @noindent\n\
1300 returns \"Dunn, Bill\"\n\
1301 \n\
1302 Options in addition to those of @code{regexp} are\n\
1303 \n\
1304 @table @samp\n\
1305 \n\
1306 @item once\n\
1307 Replace only the first occurrence of @var{pat} in the result.\n\
1308 \n\
1309 @item warnings\n\
1310 This option is present for compatibility but is ignored.\n\
1311 \n\
1312 @end table\n\
1313 \n\
1314 Implementation Note: For compatibility with @sc{matlab}, escape sequences\n\
1315 in @var{pat} (e.g., @qcode{\"@xbackslashchar{}n\"} => newline) are expanded\n\
1316 even when @var{pat} has been defined with single quotes. To disable\n\
1317 expansion use a second backslash before the escape sequence (e.g.,\n\
1318 \"@xbackslashchar{}@xbackslashchar{}n\") or use the @code{regexptranslate}\n\
1319 function.\n\
1320 @seealso{regexp, regexpi, strrep}\n\
1321 @end deftypefn")
1322 {
1323  octave_value_list retval;
1324  int nargin = args.length ();
1325 
1326  if (nargin < 3)
1327  {
1328  print_usage ();
1329  return retval;
1330  }
1331 
1332  if (args(0).is_cell () || args(1).is_cell () || args(2).is_cell ())
1333  {
1334  Cell str;
1335  Cell pat;
1336  Cell rep;
1337  dim_vector dv0;
1338  dim_vector dv1 (1, 1);
1339 
1340  if (args(0).is_cell ())
1341  str = args(0).cell_value ();
1342  else
1343  str = Cell (args(0));
1344 
1345  if (args(1).is_cell ())
1346  pat = args(1).cell_value ();
1347  else
1348  pat = Cell (args(1));
1349 
1350  if (args(2).is_cell ())
1351  rep = args(2).cell_value ();
1352  else
1353  rep = Cell (args(2));
1354 
1355  dv0 = str.dims ();
1356  if (pat.numel () != 1)
1357  {
1358  dv1 = pat.dims ();
1359  if (rep.numel () != 1 && dv1 != rep.dims ())
1360  error ("regexprep: inconsistent cell array dimensions");
1361  }
1362  else if (rep.numel () != 1)
1363  dv1 = rep.dims ();
1364 
1365  if (!error_state)
1366  {
1367  Cell ret (dv0);
1368  octave_value_list new_args = args;
1369 
1370  for (octave_idx_type i = 0; i < dv0.numel (); i++)
1371  {
1372  new_args(0) = str(i);
1373  if (pat.numel () == 1)
1374  new_args(1) = pat(0);
1375  if (rep.numel () == 1)
1376  new_args(2) = rep(0);
1377 
1378  for (octave_idx_type j = 0; j < dv1.numel (); j++)
1379  {
1380  if (pat.numel () != 1)
1381  new_args(1) = pat(j);
1382  if (rep.numel () != 1)
1383  new_args(2) = rep(j);
1384  new_args(0) = octregexprep (new_args, "regexprep");
1385 
1386  if (error_state)
1387  break;
1388  }
1389 
1390  if (error_state)
1391  break;
1392 
1393  ret(i) = new_args(0);
1394  }
1395 
1396  if (!error_state)
1397  retval = args(0).is_cell () ? octave_value (ret)
1398  : octave_value (ret(0));
1399  }
1400  }
1401  else
1402  retval = octregexprep (args, "regexprep");
1403 
1404  return retval;
1405 }
1406 
1407 /*
1408 %!test # Replace with empty
1409 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1410 %! t = regexprep (xml, '<[!?][^>]*>', '');
1411 %! assert (t, ' <tag v="hello">some stuff</tag>');
1412 
1413 %!test # Replace with non-empty
1414 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1415 %! t = regexprep (xml, '<[!?][^>]*>', '?');
1416 %! assert (t, '? <tag v="hello">some stuff?</tag>');
1417 
1418 %!test # Check that 'tokenize' is ignored
1419 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1420 %! t = regexprep (xml, '<[!?][^>]*>', '', 'tokenize');
1421 %! assert (t, ' <tag v="hello">some stuff</tag>');
1422 
1423 ## Test capture replacement
1424 %!test
1425 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins";
1426 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam";
1427 %! t = regexprep (data, '(?m)^(\w+)\s+(\w+)$', '$2, $1');
1428 %! assert (t, result);
1429 
1430 ## Return the original if no match
1431 %!assert (regexprep ('hello', 'world', 'earth'), 'hello')
1432 
1433 ## Test emptymatch
1434 %!assert (regexprep ('World', '^', 'Hello '), 'World')
1435 %!assert (regexprep ('World', '^', 'Hello ', 'emptymatch'), 'Hello World')
1436 
1437 ## Test a general replacement
1438 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g")
1439 
1440 ## Make sure it works at the beginning and end
1441 %!assert (regexprep ("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g")
1442 %!assert (regexprep ("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_")
1443 
1444 ## Options
1445 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), "a_b]c{d}e-f=g")
1446 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), "a_b_c_d_e_f_g")
1447 
1448 ## Option combinations
1449 %!assert (regexprep ("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), "a_b]c{d}e-f=g")
1450 
1451 ## End conditions on replacement
1452 %!assert (regexprep ("abc", "(b)", ".$1"), "a.bc");
1453 %!assert (regexprep ("abc", "(b)", "$1"), "abc");
1454 %!assert (regexprep ("abc", "(b)", "$1."), "ab.c");
1455 %!assert (regexprep ("abc", "(b)", "$1.."), "ab..c");
1456 
1457 ## Test cell array arguments
1458 %!assert (regexprep ("abc", {"b","a"}, "?"), "??c")
1459 %!assert (regexprep ({"abc","cba"}, "b", "?"), {"a?c","c?a"})
1460 %!assert (regexprep ({"abc","cba"}, {"b","a"}, {"?","!"}), {"!?c","c?!"})
1461 
1462 # Nasty lookbehind expression
1463 %!assert (regexprep ('x^(-1)+y(-1)+z(-1)=0', '(?<=[a-z]+)\(\-[1-9]*\)', '_minus1'),'x^(-1)+y_minus1+z_minus1=0')
1464 
1465 %!assert (regexprep ("\n", '\n', "X"), "X");
1466 %!assert (regexprep ("\n", "\n", "X"), "X");
1467 */
Definition: Cell.h:35
void emptymatch(bool val)
Definition: lo-regexp.h:129
OCTINTERP_API void print_usage(void)
Definition: defun.cc:51
octave_idx_type numel(void) const
Number of elements in the array.
Definition: Array.h:275
octave_idx_type length(void) const
Definition: oct-obj.h:89
regexp::match_data regexp_match(const std::string &pat, const std::string &buffer, const regexp::opts &opt=regexp::opts(), const std::string &who="regexp")
Definition: lo-regexp.h:245
string_vector named_patterns(void)
Definition: lo-regexp.h:216
void dotexceptnewline(bool val)
Definition: lo-regexp.h:128
#define DEFUN(name, args_name, nargout_name, doc)
Definition: defun.h:44
void error(const char *fmt,...)
Definition: error.cc:476
void freespacing(bool val)
Definition: lo-regexp.h:130
static octave_value_list octcellregexp(const octave_value_list &args, int nargout, const std::string &who, bool case_insensitive=false)
Definition: regexp.cc:447
octave_idx_type numel(int n=0) const
Number of elements that a matrix with this dimensions would have.
Definition: dim-vector.h:361
static std::string do_regexp_rep_string_escapes(const std::string &s)
Definition: regexp.cc:109
Cell cell_value(void) const
Definition: oct-obj.h:81
const dim_vector & dims(void) const
Return a const-reference so that dims ()(i) works efficiently.
Definition: Array.h:337
iterator end(void)
Definition: base-list.h:81
std::string regexp_replace(const std::string &pat, const std::string &buffer, const std::string &replacement, const regexp::opts &opt=regexp::opts(), const std::string &who="regexp")
Definition: lo-regexp.h:278
static octave_value_list octregexp(const octave_value_list &args, int nargout, const std::string &who, bool case_insensitive=false)
Definition: regexp.cc:240
static std::string do_regexp_ptn_string_escapes(const std::string &s)
Definition: regexp.cc:53
int error_state
Definition: error.cc:101
std::list< match_element >::const_iterator const_iterator
Definition: base-list.h:37
Definition: dMatrix.h:35
static void parse_options(regexp::opts &options, const octave_value_list &args, const std::string &who, int skip, bool &extra_args)
Definition: regexp.cc:183
void once(bool val)
Definition: lo-regexp.h:132
iterator begin(void)
Definition: base-list.h:78
octave_idx_type length(void) const
Number of elements in the array.
Definition: Array.h:267
void assign(const std::string &k, const octave_value &val)
Definition: oct-map.h:225
void case_insensitive(bool val)
Definition: lo-regexp.h:127
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:197
void lineanchors(bool val)
Definition: lo-regexp.h:131
void resize(octave_idx_type n, const octave_value &rfv=octave_value())
Definition: oct-obj.h:93
ColumnVector transform(const Matrix &m, double x, double y, double z)
Definition: graphics.cc:5259
static octave_value octregexprep(const octave_value_list &args, const std::string &who)
Definition: regexp.cc:1232
return octave_value(v1.char_array_value().concat(v2.char_array_value(), ra_idx),((a1.is_sq_string()||a2.is_sq_string())? '\'': '"'))
size_t size(void) const
Definition: base-list.h:44